2 # pep8.py - Check Python source code formatting, according to PEP 8
3 # Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
5 # Permission is hereby granted, free of charge, to any person
6 # obtaining a copy of this software and associated documentation files
7 # (the "Software"), to deal in the Software without restriction,
8 # including without limitation the rights to use, copy, modify, merge,
9 # publish, distribute, sublicense, and/or sell copies of the Software,
10 # and to permit persons to whom the Software is furnished to do so,
11 # subject to the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 Check Python source code formatting, according to PEP 8:
27 http://www.python.org/dev/peps/pep-0008/
29 For usage and a list of options, try this:
32 This program and its regression test suite live here:
33 http://github.com/jcrocholl/pep8
35 Groups of errors and warnings:
46 You can add checks to this program by writing plugins. Each plugin is
47 a simple function that is called for each line of source code, either
51 - Raw line of text from the input file.
54 - Multi-line statements converted to a single line.
55 - Stripped left and right.
56 - Contents of strings replaced with 'xxx' of same length.
59 The check function requests physical or logical lines by the name of
62 def maximum_line_length(physical_line)
63 def extraneous_whitespace(logical_line)
64 def blank_lines(logical_line, blank_lines, indent_level, line_number)
66 The last example above demonstrates how check plugins can request
67 additional information with extra arguments. All attributes of the
68 Checker object are available. Some examples:
70 lines: a list of the raw lines from the input file
71 tokens: the tokens that contribute to this logical line
72 line_number: line number in the input file
73 blank_lines: blank lines before this one
74 indent_char: first indentation character in this file (' ' or '\t')
75 indent_level: indentation (with tabs expanded to multiples of 8)
76 previous_indent_level: indentation on previous line
77 previous_logical: previous logical line
79 The docstring of each check function shall be the relevant part of
80 text from PEP 8. It is printed if the user enables --show-pep8.
81 Several docstrings contain examples directly from the PEP 8 document.
83 Okay: spam(ham[1], {eggs: 2})
84 E201: spam( ham[1], {eggs: 2})
86 These examples are verified automatically when pep8.py is run with the
87 --doctest option. You can add examples for your own check functions.
88 The format is simple: "Okay" or error/warning code followed by colon
89 and space, the rest of the line is example source code. If you put 'r'
90 before the docstring, you can use \n for newline, \t for tab and \s
95 __version__
= '0.5.1dev'
104 from optparse
import OptionParser
105 from fnmatch
import fnmatch
109 from sets
import ImmutableSet
as frozenset
112 DEFAULT_EXCLUDE
= '.svn,CVS,.bzr,.hg,.git'
113 DEFAULT_IGNORE
= 'E24'
116 INDENT_REGEX
= re
.compile(r
'([ \t]*)')
117 RAISE_COMMA_REGEX
= re
.compile(r
'raise\s+\w+\s*(,)')
118 SELFTEST_REGEX
= re
.compile(r
'(Okay|[EW]\d{3}):\s(.*)')
119 ERRORCODE_REGEX
= re
.compile(r
'[EW]\d{3}')
120 DOCSTRING_REGEX
= re
.compile(r
'u?r?["\']')
121 WHITESPACE_AROUND_OPERATOR_REGEX = \
122 re.compile('([^\w\s
]*)\s
*(\t|
)\s
*([^\w\s
]*)')
123 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] |
[]}),;:]')
124 WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \
125 re.compile(r'[()]|\s
=[^
=]|
[^
=!<>]=\s
')
130 BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>',
131 '%=', '^
=', '&=', '|
=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=',
132 '%', '^
', '&', '|
', '=', '/', '//', '<', '>', '<<'])
133 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
134 OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
135 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT,
136 tokenize.DEDENT, tokenize.NEWLINE])
137 E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) -
138 frozenset(['False', 'None', 'True']))
139 BENCHMARK_KEYS = ('directories
', 'files
', 'logical lines
', 'physical lines
')
145 ##############################################################################
146 # Plugins (check functions) for physical lines
147 ##############################################################################
150 def tabs_or_spaces(physical_line, indent_char):
152 Never mix tabs and spaces.
154 The most popular way of indenting Python is with spaces only. The
155 second-most popular way is with tabs only. Code indented with a mixture
156 of tabs and spaces should be converted to using spaces exclusively. When
157 invoking the Python command line interpreter with the -t option, it issues
158 warnings about code that illegally mixes tabs and spaces. When using -tt
159 these warnings become errors. These options are highly recommended!
161 Okay: if a == 0:\n a = 1\n b = 1
162 E101: if a == 0:\n a = 1\n\tb = 1
164 indent = INDENT_REGEX.match(physical_line).group(1)
165 for offset, char in enumerate(indent):
166 if char != indent_char:
167 return offset, "E101 indentation contains mixed spaces and tabs"
170 def tabs_obsolete(physical_line):
172 For new projects, spaces-only are strongly recommended over tabs. Most
173 editors have features that make this easy to do.
175 Okay: if True:\n return
176 W191: if True:\n\treturn
178 indent = INDENT_REGEX.match(physical_line).group(1)
179 if indent.count('\t'):
180 return indent.index('\t'), "W191 indentation contains tabs"
183 def trailing_whitespace(physical_line):
185 JCR: Trailing whitespace is superfluous.
186 FBM: Except when it occurs as part of a blank line (i.e. the line is
187 nothing but whitespace). According to Python docs[1] a line with only
188 whitespace is considered a blank line, and is to be ignored. However,
189 matching a blank line to its indentation level avoids mistakenly
190 terminating a multi-line statement (e.g. class declaration) when
191 pasting code into the standard Python interpreter.
193 [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
195 The warning returned varies on whether the line itself is blank, for easier
196 filtering for those who want to indent their blank lines.
200 W293: class Foo(object):\n \n bang = 12
202 physical_line = physical_line.rstrip('\n') # chr(10), newline
203 physical_line = physical_line.rstrip('\r') # chr(13), carriage return
204 physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
205 stripped = physical_line.rstrip()
206 if physical_line != stripped:
208 return len(stripped), "W291 trailing whitespace"
210 return 0, "W293 blank line contains whitespace"
213 def trailing_blank_lines(physical_line, lines, line_number):
215 JCR: Trailing blank lines are superfluous.
220 if physical_line.strip() == '' and line_number == len(lines):
221 return 0, "W391 blank line at end of file"
224 def missing_newline(physical_line):
226 JCR: The last line should have a newline.
228 if physical_line.rstrip() == physical_line:
229 return len(physical_line), "W292 no newline at end of file"
232 def maximum_line_length(physical_line):
234 Limit all lines to a maximum of 79 characters.
236 There are still many devices around that are limited to 80 character
237 lines; plus, limiting windows to 80 characters makes it possible to have
238 several windows side-by-side. The default wrapping on such devices looks
239 ugly. Therefore, please limit all lines to a maximum of 79 characters.
240 For flowing long blocks of text (docstrings or comments), limiting the
241 length to 72 characters is recommended.
243 line = physical_line.rstrip()
245 if length > MAX_LINE_LENGTH:
247 # The line could contain multi-byte characters
248 if not hasattr(line, 'decode
'): # Python 3
249 line = line.encode('latin
-1')
250 length = len(line.decode('utf
-8'))
251 except UnicodeDecodeError:
253 if length > MAX_LINE_LENGTH:
254 return MAX_LINE_LENGTH, "E501 line too long (%d characters)" % length
257 ##############################################################################
258 # Plugins (check functions) for logical lines
259 ##############################################################################
262 def blank_lines(logical_line, blank_lines, indent_level, line_number,
263 previous_logical, previous_indent_level,
264 blank_lines_before_comment):
266 Separate top-level function and class definitions with two blank lines.
268 Method definitions inside a class are separated by a single blank line.
270 Extra blank lines may be used (sparingly) to separate groups of related
271 functions. Blank lines may be omitted between a bunch of related
272 one-liners (e.g. a set of dummy implementations).
274 Use blank lines in functions, sparingly, to indicate logical sections.
276 Okay: def a():\n pass\n\n\ndef b():\n pass
277 Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass
279 E301: class Foo:\n b = 0\n def bar():\n pass
280 E302: def a():\n pass\n\ndef b(n):\n pass
281 E303: def a():\n pass\n\n\n\ndef b(n):\n pass
282 E303: def a():\n\n\n\n pass
283 E304: @decorator\n\ndef a():\n pass
286 return # Don't expect blank lines before the first line
287 max_blank_lines
= max(blank_lines
, blank_lines_before_comment
)
288 if previous_logical
.startswith('@'):
290 return 0, "E304 blank lines found after function decorator"
291 elif max_blank_lines
> 2 or (indent_level
and max_blank_lines
== 2):
292 return 0, "E303 too many blank lines (%d)" % max_blank_lines
293 elif (logical_line
.startswith('def ') or
294 logical_line
.startswith('class ') or
295 logical_line
.startswith('@')):
297 if not (max_blank_lines
or previous_indent_level
< indent_level
or
298 DOCSTRING_REGEX
.match(previous_logical
)):
299 return 0, "E301 expected 1 blank line, found 0"
300 elif max_blank_lines
!= 2:
301 return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
304 def extraneous_whitespace(logical_line
):
306 Avoid extraneous whitespace in the following situations:
308 - Immediately inside parentheses, brackets or braces.
310 - Immediately before a comma, semicolon, or colon.
312 Okay: spam(ham[1], {eggs: 2})
313 E201: spam( ham[1], {eggs: 2})
314 E201: spam(ham[ 1], {eggs: 2})
315 E201: spam(ham[1], { eggs: 2})
316 E202: spam(ham[1], {eggs: 2} )
317 E202: spam(ham[1 ], {eggs: 2})
318 E202: spam(ham[1], {eggs: 2 })
320 E203: if x == 4: print x, y; x, y = y , x
321 E203: if x == 4: print x, y ; x, y = y, x
322 E203: if x == 4 : print x, y; x, y = y, x
325 for match
in EXTRANEOUS_WHITESPACE_REGEX
.finditer(line
):
328 found
= match
.start()
329 if text
== char
+ ' ' and char
in '([{':
330 return found
+ 1, "E201 whitespace after '%s'" % char
331 if text
== ' ' + char
and line
[found
- 1] != ',':
333 return found
, "E202 whitespace before '%s'" % char
335 return found
, "E203 whitespace before '%s'" % char
338 def missing_whitespace(logical_line
):
340 JCR: Each comma, semicolon or colon should be followed by whitespace.
352 for index
in range(len(line
) - 1):
354 if char
in ',;:' and line
[index
+ 1] not in WHITESPACE
:
355 before
= line
[:index
]
356 if char
== ':' and before
.count('[') > before
.count(']'):
357 continue # Slice syntax, no space required
358 if char
== ',' and line
[index
+ 1] == ')':
359 continue # Allow tuple with only one element: (3,)
360 return index
, "E231 missing whitespace after '%s'" % char
363 def indentation(logical_line
, previous_logical
, indent_char
,
364 indent_level
, previous_indent_level
):
366 Use 4 spaces per indentation level.
368 For really old code that you don't want to mess up, you can continue to
372 Okay: if a == 0:\n a = 1
375 Okay: for item in items:\n pass
376 E112: for item in items:\npass
381 if indent_char
== ' ' and indent_level
% 4:
382 return 0, "E111 indentation is not a multiple of four"
383 indent_expect
= previous_logical
.endswith(':')
384 if indent_expect
and indent_level
<= previous_indent_level
:
385 return 0, "E112 expected an indented block"
386 if indent_level
> previous_indent_level
and not indent_expect
:
387 return 0, "E113 unexpected indentation"
390 def whitespace_before_parameters(logical_line
, tokens
):
392 Avoid extraneous whitespace in the following situations:
394 - Immediately before the open parenthesis that starts the argument
395 list of a function call.
397 - Immediately before the open parenthesis that starts an indexing or
403 Okay: dict['key'] = list[index]
404 E211: dict ['key'] = list[index]
405 E211: dict['key'] = list [index]
407 prev_type
= tokens
[0][0]
408 prev_text
= tokens
[0][1]
409 prev_end
= tokens
[0][3]
410 for index
in range(1, len(tokens
)):
411 token_type
, text
, start
, end
, line
= tokens
[index
]
412 if (token_type
== tokenize
.OP
and
414 start
!= prev_end
and
415 (prev_type
== tokenize
.NAME
or prev_text
in '}])') and
416 # Syntax "class A (B):" is allowed, but avoid it
417 (index
< 2 or tokens
[index
- 2][1] != 'class') and
418 # Allow "return (a.foo for a in range(5))"
419 (not keyword
.iskeyword(prev_text
))):
420 return prev_end
, "E211 whitespace before '%s'" % text
421 prev_type
= token_type
426 def whitespace_around_operator(logical_line
):
428 Avoid extraneous whitespace in the following situations:
430 - More than one space around an assignment (or other) operator to
431 align it with another.
439 for match
in WHITESPACE_AROUND_OPERATOR_REGEX
.finditer(logical_line
):
440 before
, whitespace
, after
= match
.groups()
441 tab
= whitespace
== '\t'
442 offset
= match
.start(2)
443 if before
in OPERATORS
:
444 return offset
, (tab
and "E224 tab after operator" or
445 "E222 multiple spaces after operator")
446 elif after
in OPERATORS
:
447 return offset
, (tab
and "E223 tab before operator" or
448 "E221 multiple spaces before operator")
451 def missing_whitespace_around_operator(logical_line
, tokens
):
453 - Always surround these binary operators with a single space on
454 either side: assignment (=), augmented assignment (+=, -= etc.),
455 comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
456 Booleans (and, or, not).
458 - Use spaces around arithmetic operators.
463 Okay: hypot2 = x * x + y * y
464 Okay: c = (a + b) * (a - b)
465 Okay: foo(bar, key='word', *args, **kwargs)
470 Okay: if not -5 < x < +5:\n pass
471 Okay: lambda *args, **kw: (args, kw)
476 E225: hypot2 = x*x + y*y
477 E225: c = (a+b) * (a-b)
483 prev_type
= tokenize
.OP
484 prev_text
= prev_end
= None
485 for token_type
, text
, start
, end
, line
in tokens
:
486 if token_type
in (tokenize
.NL
, tokenize
.NEWLINE
, tokenize
.ERRORTOKEN
):
487 # ERRORTOKEN is triggered by backticks in Python 3000
489 if text
in ('(', 'lambda'):
494 if start
!= prev_end
:
496 elif text
== '>' and prev_text
== '<':
497 # Tolerate the "<>" operator, even if running Python 3
500 return prev_end
, "E225 missing whitespace around operator"
501 elif token_type
== tokenize
.OP
and prev_end
is not None:
502 if text
== '=' and parens
:
503 # Allow keyword args or defaults: foo(bar=None).
505 elif text
in BINARY_OPERATORS
:
507 elif text
in UNARY_OPERATORS
:
508 # Allow unary operators: -123, -x, +1.
509 # Allow argument unpacking: foo(*args, **kwargs).
510 if prev_type
== tokenize
.OP
:
511 if prev_text
in '}])':
513 elif prev_type
== tokenize
.NAME
:
514 if prev_text
not in E225NOT_KEYWORDS
:
518 if need_space
and start
== prev_end
:
519 return prev_end
, "E225 missing whitespace around operator"
520 prev_type
= token_type
525 def whitespace_around_comma(logical_line
):
527 Avoid extraneous whitespace in the following situations:
529 - More than one space around an assignment (or other) operator to
530 align it with another.
532 JCR: This should also be applied around comma etc.
533 Note: these checks are disabled by default
540 for separator
in ',;:':
541 found
= line
.find(separator
+ ' ')
543 return found
+ 1, "E241 multiple spaces after '%s'" % separator
544 found
= line
.find(separator
+ '\t')
546 return found
+ 1, "E242 tab after '%s'" % separator
549 def whitespace_around_named_parameter_equals(logical_line
):
551 Don't use spaces around the '=' sign when used to indicate a
552 keyword argument or a default parameter value.
554 Okay: def complex(real, imag=0.0):
555 Okay: return magic(r=real, i=imag)
556 Okay: boolean(a == b)
557 Okay: boolean(a != b)
558 Okay: boolean(a <= b)
559 Okay: boolean(a >= b)
561 E251: def complex(real, imag = 0.0):
562 E251: return magic(r = real, i = imag)
565 for match
in WHITESPACE_AROUND_NAMED_PARAMETER_REGEX
.finditer(
568 if parens
and len(text
) == 3:
569 issue
= "E251 no spaces around keyword / parameter equals"
570 return match
.start(), issue
577 def whitespace_before_inline_comment(logical_line
, tokens
):
579 Separate inline comments by at least two spaces.
581 An inline comment is a comment on the same line as a statement. Inline
582 comments should be separated by at least two spaces from the statement.
583 They should start with a # and a single space.
585 Okay: x = x + 1 # Increment x
586 Okay: x = x + 1 # Increment x
587 E261: x = x + 1 # Increment x
588 E262: x = x + 1 #Increment x
589 E262: x = x + 1 # Increment x
592 for token_type
, text
, start
, end
, line
in tokens
:
593 if token_type
== tokenize
.NL
:
595 if token_type
== tokenize
.COMMENT
:
596 if not line
[:start
[1]].strip():
598 if prev_end
[0] == start
[0] and start
[1] < prev_end
[1] + 2:
600 "E261 at least two spaces before inline comment")
601 if (len(text
) > 1 and text
.startswith('# ')
602 or not text
.startswith('# ')):
603 return start
, "E262 inline comment should start with '# '"
608 def imports_on_separate_lines(logical_line
):
610 Imports should usually be on separate lines.
612 Okay: import os\nimport sys
615 Okay: from subprocess import Popen, PIPE
616 Okay: from myclas import MyClass
617 Okay: from foo.bar.yourclass import YourClass
619 Okay: import foo.bar.yourclass
622 if line
.startswith('import '):
623 found
= line
.find(',')
625 return found
, "E401 multiple imports on one line"
628 def compound_statements(logical_line
):
630 Compound statements (multiple statements on the same line) are
631 generally discouraged.
633 While sometimes it's okay to put an if/for/while with a small body
634 on the same line, never do this for multi-clause statements. Also
635 avoid folding such long lines!
637 Okay: if foo == 'blah':\n do_blah_thing()
642 E701: if foo == 'blah': do_blah_thing()
643 E701: for x in lst: total += x
644 E701: while t < 10: t = delay()
645 E701: if foo == 'blah': do_blah_thing()
646 E701: else: do_non_blah_thing()
647 E701: try: something()
648 E701: finally: cleanup()
649 E701: if foo == 'blah': one(); two(); three()
651 E702: do_one(); do_two(); do_three()
654 found
= line
.find(':')
655 if -1 < found
< len(line
) - 1:
656 before
= line
[:found
]
657 if (before
.count('{') <= before
.count('}') and # {'a': 1} (dict)
658 before
.count('[') <= before
.count(']') and # [1:2] (slice)
659 not re
.search(r
'\blambda\b', before
)): # lambda x: x
660 return found
, "E701 multiple statements on one line (colon)"
661 found
= line
.find(';')
663 return found
, "E702 multiple statements on one line (semicolon)"
666 def python_3000_has_key(logical_line
):
668 The {}.has_key() method will be removed in the future version of
669 Python. Use the 'in' operation instead, like:
674 pos
= logical_line
.find('.has_key(')
676 return pos
, "W601 .has_key() is deprecated, use 'in'"
679 def python_3000_raise_comma(logical_line
):
681 When raising an exception, use "raise ValueError('message')"
682 instead of the older form "raise ValueError, 'message'".
684 The paren-using form is preferred because when the exception arguments
685 are long or include string formatting, you don't need to use line
686 continuation characters thanks to the containing parentheses. The older
687 form will be removed in Python 3000.
689 match
= RAISE_COMMA_REGEX
.match(logical_line
)
691 return match
.start(1), "W602 deprecated form of raising exception"
694 def python_3000_not_equal(logical_line
):
696 != can also be written <>, but this is an obsolete usage kept for
697 backwards compatibility only. New code should always use !=.
698 The older syntax is removed in Python 3000.
700 pos
= logical_line
.find('<>')
702 return pos
, "W603 '<>' is deprecated, use '!='"
705 def python_3000_backticks(logical_line
):
707 Backticks are removed in Python 3000.
710 pos
= logical_line
.find('`')
712 return pos
, "W604 backticks are deprecated, use 'repr()'"
715 ##############################################################################
717 ##############################################################################
720 if '' == ''.encode():
721 # Python 2: implicit encoding.
722 def readlines(filename
):
723 return open(filename
).readlines()
725 # Python 3: decode to latin-1.
726 # This function is lazy, it does not read the encoding declaration.
727 # XXX: use tokenize.detect_encoding()
728 def readlines(filename
):
729 return open(filename
, encoding
='latin-1').readlines()
732 def expand_indent(line
):
734 Return the amount of indentation.
735 Tabs are expanded to the next multiple of 8.
737 >>> expand_indent(' ')
739 >>> expand_indent('\\t')
741 >>> expand_indent(' \\t')
743 >>> expand_indent(' \\t')
745 >>> expand_indent(' \\t')
751 result
= result
// 8 * 8 + 8
759 def mute_string(text
):
761 Replace contents with 'xxx' to prevent syntax matching.
763 >>> mute_string('"abc"')
765 >>> mute_string("'''abc'''")
767 >>> mute_string("r'abc'")
772 # String modifiers (e.g. u or r)
773 if text
.endswith('"'):
774 start
+= text
.index('"')
775 elif text
.endswith("'"):
776 start
+= text
.index("'")
778 if text
.endswith('"""') or text
.endswith("'''"):
781 return text
[:start
] + 'x' * (end
- start
) + text
[end
:]
785 """Print a message."""
786 # print >> sys.stderr, options.prog + ': ' + text
787 # print >> sys.stderr, text
791 ##############################################################################
792 # Framework to run all checks
793 ##############################################################################
796 def find_checks(argument_name
):
798 Find all globally visible functions where the first argument name
799 starts with argument_name.
802 for name
, function
in globals().items():
803 if not inspect
.isfunction(function
):
805 args
= inspect
.getargspec(function
)[0]
806 if args
and args
[0].startswith(argument_name
):
807 codes
= ERRORCODE_REGEX
.findall(inspect
.getdoc(function
) or '')
808 for code
in codes
or ['']:
809 if not code
or not ignore_code(code
):
810 checks
.append((name
, function
, args
))
816 class Checker(object):
818 Load a Python source file, tokenize it, check coding style.
821 def __init__(self
, filename
, lines
=None):
822 self
.filename
= filename
824 self
.filename
= 'stdin'
825 self
.lines
= lines
or []
827 self
.lines
= readlines(filename
)
830 options
.counters
['physical lines'] += len(self
.lines
)
834 Get the next line from the input buffer.
836 self
.line_number
+= 1
837 if self
.line_number
> len(self
.lines
):
839 return self
.lines
[self
.line_number
- 1]
841 def readline_check_physical(self
):
843 Check and return the next physical line. This method can be
844 used to feed tokenize.generate_tokens.
846 line
= self
.readline()
848 self
.check_physical(line
)
851 def run_check(self
, check
, argument_names
):
856 for name
in argument_names
:
857 arguments
.append(getattr(self
, name
))
858 return check(*arguments
)
860 def check_physical(self
, line
):
862 Run all physical checks on a raw input line.
864 self
.physical_line
= line
865 if self
.indent_char
is None and len(line
) and line
[0] in ' \t':
866 self
.indent_char
= line
[0]
867 for name
, check
, argument_names
in options
.physical_checks
:
868 result
= self
.run_check(check
, argument_names
)
869 if result
is not None:
870 offset
, text
= result
871 self
.report_error(self
.line_number
, offset
, text
, check
)
873 def build_tokens_line(self
):
875 Build a logical line from tokens.
881 for token
in self
.tokens
:
882 token_type
, text
= token
[0:2]
883 if token_type
in SKIP_TOKENS
:
885 if token_type
== tokenize
.STRING
:
886 text
= mute_string(text
)
888 end_line
, end
= previous
[3]
889 start_line
, start
= token
[2]
890 if end_line
!= start_line
: # different row
891 prev_text
= self
.lines
[end_line
- 1][end
- 1]
892 if prev_text
== ',' or (prev_text
not in '{[('
893 and text
not in '}])'):
896 elif end
!= start
: # different column
897 fill
= self
.lines
[end_line
- 1][end
:start
]
900 self
.mapping
.append((length
, token
))
904 self
.logical_line
= ''.join(logical
)
905 assert self
.logical_line
.lstrip() == self
.logical_line
906 assert self
.logical_line
.rstrip() == self
.logical_line
908 def check_logical(self
):
910 Build a line from tokens and run all logical checks on it.
912 options
.counters
['logical lines'] += 1
913 self
.build_tokens_line()
914 first_line
= self
.lines
[self
.mapping
[0][1][2][0] - 1]
915 indent
= first_line
[:self
.mapping
[0][1][2][1]]
916 self
.previous_indent_level
= self
.indent_level
917 self
.indent_level
= expand_indent(indent
)
918 if options
.verbose
>= 2:
919 print(self
.logical_line
[:80].rstrip())
920 for name
, check
, argument_names
in options
.logical_checks
:
921 if options
.verbose
>= 4:
923 result
= self
.run_check(check
, argument_names
)
924 if result
is not None:
925 offset
, text
= result
926 if isinstance(offset
, tuple):
927 original_number
, original_offset
= offset
929 for token_offset
, token
in self
.mapping
:
930 if offset
>= token_offset
:
931 original_number
= token
[2][0]
932 original_offset
= (token
[2][1]
933 + offset
- token_offset
)
934 self
.report_error(original_number
, original_offset
,
936 self
.previous_logical
= self
.logical_line
938 def check_all(self
, expected
=None, line_offset
=0):
940 Run all checks on the input file.
942 self
.expected
= expected
or ()
943 self
.line_offset
= line_offset
946 self
.indent_char
= None
947 self
.indent_level
= 0
948 self
.previous_logical
= ''
950 self
.blank_lines_before_comment
= 0
953 for token
in tokenize
.generate_tokens(self
.readline_check_physical
):
954 if options
.verbose
>= 3:
955 if token
[2][0] == token
[3][0]:
956 pos
= '[%s:%s]' % (token
[2][1] or '', token
[3][1])
958 pos
= 'l.%s' % token
[3][0]
959 print('l.%s\t%s\t%s\t%r' %
960 (token
[2][0], pos
, tokenize
.tok_name
[token
[0]], token
[1]))
961 self
.tokens
.append(token
)
962 token_type
, text
= token
[0:2]
963 if token_type
== tokenize
.OP
and text
in '([{':
965 if token_type
== tokenize
.OP
and text
in '}])':
967 if token_type
== tokenize
.NEWLINE
and not parens
:
970 self
.blank_lines_before_comment
= 0
972 if token_type
== tokenize
.NL
and not parens
:
973 if len(self
.tokens
) <= 1:
974 # The physical line contains only this token.
975 self
.blank_lines
+= 1
977 if token_type
== tokenize
.COMMENT
:
978 source_line
= token
[4]
979 token_start
= token
[2][1]
980 if source_line
[:token_start
].strip() == '':
981 self
.blank_lines_before_comment
= max(self
.blank_lines
,
982 self
.blank_lines_before_comment
)
984 if text
.endswith('\n') and not parens
:
985 # The comment also ends a physical line. This works around
986 # Python < 2.6 behaviour, which does not generate NL after
987 # a comment which is on a line by itself.
989 return self
.file_errors
991 def report_error(self
, line_number
, offset
, text
, check
):
993 Report an error, according to options.
996 if ignore_code(code
):
998 if options
.quiet
== 1 and not self
.file_errors
:
999 message(self
.filename
)
1000 if code
in options
.counters
:
1001 options
.counters
[code
] += 1
1003 options
.counters
[code
] = 1
1004 options
.messages
[code
] = text
[5:]
1005 if options
.quiet
or code
in self
.expected
:
1006 # Don't care about expected errors or warnings
1008 self
.file_errors
+= 1
1009 if options
.counters
[code
] == 1 or options
.repeat
:
1010 message("%s:%s:%d: %s" %
1011 (self
.filename
, self
.line_offset
+ line_number
,
1013 if options
.show_source
:
1014 line
= self
.lines
[line_number
- 1]
1015 message(line
.rstrip())
1016 message(' ' * offset
+ '^')
1017 if options
.show_pep8
:
1018 message(check
.__doc
__.lstrip('\n').rstrip())
1021 def input_file(filename
):
1023 Run all checks on a Python source file.
1026 message('checking ' + filename
)
1027 errors
= Checker(filename
).check_all()
1030 def input_dir(dirname
, runner
=None):
1032 Check all Python source files in this directory and all subdirectories.
1034 dirname
= dirname
.rstrip('/')
1035 if excluded(dirname
):
1039 for root
, dirs
, files
in os
.walk(dirname
):
1041 message('directory ' + root
)
1042 options
.counters
['directories'] += 1
1045 if excluded(subdir
):
1048 for filename
in files
:
1049 if filename_match(filename
) and not excluded(filename
):
1050 options
.counters
['files'] += 1
1051 runner(os
.path
.join(root
, filename
))
1054 def excluded(filename
):
1056 Check if options.exclude contains a pattern that matches filename.
1058 basename
= os
.path
.basename(filename
)
1059 for pattern
in options
.exclude
:
1060 if fnmatch(basename
, pattern
):
1061 # print basename, 'excluded because it matches', pattern
1065 def filename_match(filename
):
1067 Check if options.filename contains a pattern that matches filename.
1068 If options.filename is unspecified, this always returns True.
1070 if not options
.filename
:
1072 for pattern
in options
.filename
:
1073 if fnmatch(filename
, pattern
):
1077 def ignore_code(code
):
1079 Check if options.ignore contains a prefix of the error code.
1080 If options.select contains a prefix of the error code, do not ignore it.
1082 for select
in options
.select
:
1083 if code
.startswith(select
):
1085 for ignore
in options
.ignore
:
1086 if code
.startswith(ignore
):
1090 def reset_counters():
1091 for key
in list(options
.counters
.keys()):
1092 if key
not in BENCHMARK_KEYS
:
1093 del options
.counters
[key
]
1094 options
.messages
= {}
1097 def get_error_statistics():
1098 """Get error statistics."""
1099 return get_statistics("E")
1102 def get_warning_statistics():
1103 """Get warning statistics."""
1104 return get_statistics("W")
1107 def get_statistics(prefix
=''):
1109 Get statistics for message codes that start with the prefix.
1111 prefix='' matches all errors and warnings
1112 prefix='E' matches all errors
1113 prefix='W' matches all warnings
1114 prefix='E4' matches all errors that have to do with imports
1117 keys
= list(options
.messages
.keys())
1120 if key
.startswith(prefix
):
1121 stats
.append('%-7s %s %s' %
1122 (options
.counters
[key
], key
, options
.messages
[key
]))
1126 def get_count(prefix
=''):
1127 """Return the total count of errors and warnings."""
1128 keys
= list(options
.messages
.keys())
1131 if key
.startswith(prefix
):
1132 count
+= options
.counters
[key
]
1136 def print_statistics(prefix
=''):
1137 """Print overall statistics (number of errors and warnings)."""
1138 for line
in get_statistics(prefix
):
1142 def print_benchmark(elapsed
):
1144 Print benchmark numbers.
1146 print('%-7.2f %s' % (elapsed
, 'seconds elapsed'))
1147 for key
in BENCHMARK_KEYS
:
1148 print('%-7d %s per second (%d total)' % (
1149 options
.counters
[key
] / elapsed
, key
,
1150 options
.counters
[key
]))
1153 def run_tests(filename
):
1155 Run all the tests from a file.
1157 A test file can provide many tests. Each test starts with a declaration.
1158 This declaration is a single line starting with '#:'.
1159 It declares codes of expected failures, separated by spaces or 'Okay'
1160 if no failure is expected.
1161 If the file does not contain such declaration, it should pass all tests.
1162 If the declaration is empty, following lines are not checked, until next
1167 * Only E224 and W701 are expected: #: E224 W701
1168 * Following example is conform: #: Okay
1169 * Don't check these lines: #:
1171 lines
= readlines(filename
) + ['#:\n']
1175 for index
, line
in enumerate(lines
):
1176 if not line
.startswith('#:'):
1178 # Collect the lines of the test case
1179 testcase
.append(line
)
1181 if codes
and index
> 0:
1182 label
= '%s:%s:1' % (filename
, line_offset
+ 1)
1183 codes
= [c
for c
in codes
if c
!= 'Okay']
1185 errors
= Checker(filename
, testcase
).check_all(codes
, line_offset
)
1186 # Check if the expected errors were found
1188 if not options
.counters
.get(code
):
1190 message('%s: error %s not found' % (label
, code
))
1191 if options
.verbose
and not errors
:
1192 message('%s: passed (%s)' % (label
, ' '.join(codes
)))
1193 # Keep showing errors for multiple tests
1195 # output the real line numbers
1197 # configure the expected errors
1198 codes
= line
.split()[1:]
1199 # empty the test case buffer
1205 Test all check functions with test cases in docstrings.
1209 checks
= options
.physical_checks
+ options
.logical_checks
1210 for name
, check
, argument_names
in checks
:
1211 for line
in check
.__doc
__.splitlines():
1212 line
= line
.lstrip()
1213 match
= SELFTEST_REGEX
.match(line
)
1216 code
, source
= match
.groups()
1217 checker
= Checker(None)
1218 for part
in source
.split(r
'\n'):
1219 part
= part
.replace(r
'\t', '\t')
1220 part
= part
.replace(r
'\s', ' ')
1221 checker
.lines
.append(part
+ '\n')
1226 if len(options
.counters
) > len(BENCHMARK_KEYS
):
1227 codes
= [key
for key
in options
.counters
.keys()
1228 if key
not in BENCHMARK_KEYS
]
1229 error
= "incorrectly found %s" % ', '.join(codes
)
1230 elif not options
.counters
.get(code
):
1231 error
= "failed to find %s" % code
1232 # Reset the counters
1238 if len(checker
.lines
) == 1:
1239 print("pep8.py: %s: %s" %
1240 (error
, checker
.lines
[0].rstrip()))
1242 print("pep8.py: %s:" % error
)
1243 for line
in checker
.lines
:
1244 print(line
.rstrip())
1246 print("%d passed and %d failed." % (count_passed
, count_failed
))
1248 print("Test failed.")
1250 print("Test passed.")
1253 def process_options(arglist
=None):
1255 Process options passed either via arglist or via command line args.
1257 global options
, args
1258 parser
= OptionParser(version
=__version__
,
1259 usage
="%prog [options] input ...")
1260 parser
.add_option('-v', '--verbose', default
=0, action
='count',
1261 help="print status messages, or debug with -vv")
1262 parser
.add_option('-q', '--quiet', default
=0, action
='count',
1263 help="report only file names, or nothing with -qq")
1264 parser
.add_option('-r', '--repeat', action
='store_true',
1265 help="show all occurrences of the same error")
1266 parser
.add_option('--exclude', metavar
='patterns', default
=DEFAULT_EXCLUDE
,
1267 help="exclude files or directories which match these "
1268 "comma separated patterns (default: %s)" %
1270 parser
.add_option('--filename', metavar
='patterns', default
='*.py',
1271 help="when parsing directories, only check filenames "
1272 "matching these comma separated patterns (default: "
1274 parser
.add_option('--select', metavar
='errors', default
='',
1275 help="select errors and warnings (e.g. E,W6)")
1276 parser
.add_option('--ignore', metavar
='errors', default
='',
1277 help="skip errors and warnings (e.g. E4,W)")
1278 parser
.add_option('--show-source', action
='store_true',
1279 help="show source code for each error")
1280 parser
.add_option('--show-pep8', action
='store_true',
1281 help="show text of PEP 8 for each error")
1282 parser
.add_option('--statistics', action
='store_true',
1283 help="count errors and warnings")
1284 parser
.add_option('--count', action
='store_true',
1285 help="print total number of errors and warnings "
1286 "to standard error and set exit code to 1 if "
1287 "total is not null")
1288 parser
.add_option('--benchmark', action
='store_true',
1289 help="measure processing speed")
1290 parser
.add_option('--testsuite', metavar
='dir',
1291 help="run regression tests from dir")
1292 parser
.add_option('--doctest', action
='store_true',
1293 help="run doctest on myself")
1294 options
, args
= parser
.parse_args(arglist
)
1295 if options
.testsuite
:
1296 args
.append(options
.testsuite
)
1297 if not args
and not options
.doctest
:
1298 parser
.error('input not specified')
1299 options
.prog
= os
.path
.basename(sys
.argv
[0])
1300 options
.exclude
= options
.exclude
.split(',')
1301 for index
in range(len(options
.exclude
)):
1302 options
.exclude
[index
] = options
.exclude
[index
].rstrip('/')
1303 if options
.filename
:
1304 options
.filename
= options
.filename
.split(',')
1306 options
.select
= options
.select
.split(',')
1310 options
.ignore
= options
.ignore
.split(',')
1311 elif options
.select
:
1312 # Ignore all checks which are not explicitly selected
1313 options
.ignore
= ['']
1314 elif options
.testsuite
or options
.doctest
:
1315 # For doctest and testsuite, all checks are required
1318 # The default choice: ignore controversial checks
1319 options
.ignore
= DEFAULT_IGNORE
.split(',')
1320 options
.physical_checks
= find_checks('physical_line')
1321 options
.logical_checks
= find_checks('logical_line')
1322 options
.counters
= dict.fromkeys(BENCHMARK_KEYS
, 0)
1323 options
.messages
= {}
1324 return options
, args
1329 Parse options and run checks on Python source.
1331 options
, args
= process_options()
1334 doctest
.testmod(verbose
=options
.verbose
)
1336 if options
.testsuite
:
1340 start_time
= time
.time()
1342 if os
.path
.isdir(path
):
1343 input_dir(path
, runner
=runner
)
1344 elif not excluded(path
):
1345 options
.counters
['files'] += 1
1347 elapsed
= time
.time() - start_time
1348 if options
.statistics
:
1350 if options
.benchmark
:
1351 print_benchmark(elapsed
)
1355 sys
.stderr
.write(str(count
) + '\n')
1359 if __name__
== '__main__':