lib/pep8/pep8.py

   1 #!/usr/bin/python
   2 # pep8.py - Check Python source code formatting, according to PEP 8
   3 # Copyright (C) 2006 Johann C. Rocholl <johann@rocholl.net>
   4 #
   5 # Permission is hereby granted, free of charge, to any person
   6 # obtaining a copy of this software and associated documentation files
   7 # (the "Software"), to deal in the Software without restriction,
   8 # including without limitation the rights to use, copy, modify, merge,
   9 # publish, distribute, sublicense, and/or sell copies of the Software,
  10 # and to permit persons to whom the Software is furnished to do so,
  11 # subject to the following conditions:
  12 #
  13 # The above copyright notice and this permission notice shall be
  14 # included in all copies or substantial portions of the Software.
  15 #
  16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  19 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  20 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  21 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23 # SOFTWARE.
  24
  25 """
  26 Check Python source code formatting, according to PEP 8:
  27 http://www.python.org/dev/peps/pep-0008/
  28
  29 For usage and a list of options, try this:
  30 $ python pep8.py -h
  31
  32 This program and its regression test suite live here:
  33 http://github.com/jcrocholl/pep8
  34
  35 Groups of errors and warnings:
  36 E errors
  37 W warnings
  38 100 indentation
  39 200 whitespace
  40 300 blank lines
  41 400 imports
  42 500 line length
  43 600 deprecation
  44 700 statements
  45
  46 You can add checks to this program by writing plugins. Each plugin is
  47 a simple function that is called for each line of source code, either
  48 physical or logical.
  49
  50 Physical line:
  51 - Raw line of text from the input file.
  52
  53 Logical line:
  54 - Multi-line statements converted to a single line.
  55 - Stripped left and right.
  56 - Contents of strings replaced with 'xxx' of same length.
  57 - Comments removed.
  58
  59 The check function requests physical or logical lines by the name of
  60 the first argument:
  61
  62 def maximum_line_length(physical_line)
  63 def extraneous_whitespace(logical_line)
  64 def blank_lines(logical_line, blank_lines, indent_level, line_number)
  65
  66 The last example above demonstrates how check plugins can request
  67 additional information with extra arguments. All attributes of the
  68 Checker object are available. Some examples:
  69
  70 lines: a list of the raw lines from the input file
  71 tokens: the tokens that contribute to this logical line
  72 line_number: line number in the input file
  73 blank_lines: blank lines before this one
  74 indent_char: first indentation character in this file (' ' or '\t')
  75 indent_level: indentation (with tabs expanded to multiples of 8)
  76 previous_indent_level: indentation on previous line
  77 previous_logical: previous logical line
  78
  79 The docstring of each check function shall be the relevant part of
  80 text from PEP 8. It is printed if the user enables --show-pep8.
  81 Several docstrings contain examples directly from the PEP 8 document.
  82
  83 Okay: spam(ham[1], {eggs: 2})
  84 E201: spam( ham[1], {eggs: 2})
  85
  86 These examples are verified automatically when pep8.py is run with the
  87 --doctest option. You can add examples for your own check functions.
  88 The format is simple: "Okay" or error/warning code followed by colon
  89 and space, the rest of the line is example source code. If you put 'r'
  90 before the docstring, you can use \n for newline, \t for tab and \s
  91 for space.
  92
  93 """
  94
  95 __version__ = '0.5.1dev'
  96
  97 import os
  98 import sys
  99 import re
 100 import time
 101 import inspect
 102 import keyword
 103 import tokenize
 104 from optparse import OptionParser
 105 from fnmatch import fnmatch
 106 try:
 107     frozenset
 108 except NameError:
 109     from sets import ImmutableSet as frozenset
 110
 111
 112 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git'
 113 DEFAULT_IGNORE = 'E24'
 114 MAX_LINE_LENGTH = 79
 115
 116 INDENT_REGEX = re.compile(r'([ \t]*)')
 117 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*(,)')
 118 SELFTEST_REGEX = re.compile(r'(Okay|[EW]\d{3}):\s(.*)')
 119 ERRORCODE_REGEX = re.compile(r'[EW]\d{3}')
 120 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
 121 WHITESPACE_AROUND_OPERATOR_REGEX = \
 122     re.compile('([^\w\s]*)\s*(\t|  )\s*([^\w\s]*)')
 123 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
 124 WHITESPACE_AROUND_NAMED_PARAMETER_REGEX = \
 125     re.compile(r'[()]|\s=[^=]|[^=!<>]=\s')
 126
 127
 128 WHITESPACE = ' \t'
 129
 130 BINARY_OPERATORS = frozenset(['**=', '*=', '+=', '-=', '!=', '<>',
 131     '%=', '^=', '&=', '|=', '==', '/=', '//=', '<=', '>=', '<<=', '>>=',
 132     '%',  '^',  '&',  '|',  '=',  '/',  '//',  '<',  '>',  '<<'])
 133 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
 134 OPERATORS = BINARY_OPERATORS | UNARY_OPERATORS
 135 SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.INDENT,
 136                          tokenize.DEDENT, tokenize.NEWLINE])
 137 E225NOT_KEYWORDS = (frozenset(keyword.kwlist + ['print']) -
 138                     frozenset(['False', 'None', 'True']))
 139 BENCHMARK_KEYS = ('directories', 'files', 'logical lines', 'physical lines')
 140
 141 options = None
 142 args = None
 143
 144
 145 ##############################################################################
 146 # Plugins (check functions) for physical lines
 147 ##############################################################################
 148
 149
 150 def tabs_or_spaces(physical_line, indent_char):
 151     r"""
 152     Never mix tabs and spaces.
 153
 154     The most popular way of indenting Python is with spaces only.  The
 155     second-most popular way is with tabs only.  Code indented with a mixture
 156     of tabs and spaces should be converted to using spaces exclusively.  When
 157     invoking the Python command line interpreter with the -t option, it issues
 158     warnings about code that illegally mixes tabs and spaces.  When using -tt
 159     these warnings become errors.  These options are highly recommended!
 160
 161     Okay: if a == 0:\n        a = 1\n        b = 1
 162     E101: if a == 0:\n        a = 1\n\tb = 1
 163     """
 164     indent = INDENT_REGEX.match(physical_line).group(1)
 165     for offset, char in enumerate(indent):
 166         if char != indent_char:
 167             return offset, "E101 indentation contains mixed spaces and tabs"
 168
 169
 170 def tabs_obsolete(physical_line):
 171     r"""
 172     For new projects, spaces-only are strongly recommended over tabs.  Most
 173     editors have features that make this easy to do.
 174
 175     Okay: if True:\n    return
 176     W191: if True:\n\treturn
 177     """
 178     indent = INDENT_REGEX.match(physical_line).group(1)
 179     if indent.count('\t'):
 180         return indent.index('\t'), "W191 indentation contains tabs"
 181
 182
 183 def trailing_whitespace(physical_line):
 184     r"""
 185     JCR: Trailing whitespace is superfluous.
 186     FBM: Except when it occurs as part of a blank line (i.e. the line is
 187          nothing but whitespace). According to Python docs[1] a line with only
 188          whitespace is considered a blank line, and is to be ignored. However,
 189          matching a blank line to its indentation level avoids mistakenly
 190          terminating a multi-line statement (e.g. class declaration) when
 191          pasting code into the standard Python interpreter.
 192
 193          [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
 194
 195     The warning returned varies on whether the line itself is blank, for easier
 196     filtering for those who want to indent their blank lines.
 197
 198     Okay: spam(1)
 199     W291: spam(1)\s
 200     W293: class Foo(object):\n    \n    bang = 12
 201     """
 202     physical_line = physical_line.rstrip('\n')    # chr(10), newline
 203     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
 204     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
 205     stripped = physical_line.rstrip()
 206     if physical_line != stripped:
 207         if stripped:
 208             return len(stripped), "W291 trailing whitespace"
 209         else:
 210             return 0, "W293 blank line contains whitespace"
 211
 212
 213 def trailing_blank_lines(physical_line, lines, line_number):
 214     r"""
 215     JCR: Trailing blank lines are superfluous.
 216
 217     Okay: spam(1)
 218     W391: spam(1)\n
 219     """
 220     if physical_line.strip() == '' and line_number == len(lines):
 221         return 0, "W391 blank line at end of file"
 222
 223
 224 def missing_newline(physical_line):
 225     """
 226     JCR: The last line should have a newline.
 227     """
 228     if physical_line.rstrip() == physical_line:
 229         return len(physical_line), "W292 no newline at end of file"
 230
 231
 232 def maximum_line_length(physical_line):
 233     """
 234     Limit all lines to a maximum of 79 characters.
 235
 236     There are still many devices around that are limited to 80 character
 237     lines; plus, limiting windows to 80 characters makes it possible to have
 238     several windows side-by-side.  The default wrapping on such devices looks
 239     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
 240     For flowing long blocks of text (docstrings or comments), limiting the
 241     length to 72 characters is recommended.
 242     """
 243     line = physical_line.rstrip()
 244     length = len(line)
 245     if length > MAX_LINE_LENGTH:
 246         try:
 247             # The line could contain multi-byte characters
 248             if not hasattr(line, 'decode'):   # Python 3
 249                 line = line.encode('latin-1')
 250             length = len(line.decode('utf-8'))
 251         except UnicodeDecodeError:
 252             pass
 253     if length > MAX_LINE_LENGTH:
 254         return MAX_LINE_LENGTH, "E501 line too long (%d characters)" % length
 255
 256
 257 ##############################################################################
 258 # Plugins (check functions) for logical lines
 259 ##############################################################################
 260
 261
 262 def blank_lines(logical_line, blank_lines, indent_level, line_number,
 263                 previous_logical, previous_indent_level,
 264                 blank_lines_before_comment):
 265     r"""
 266     Separate top-level function and class definitions with two blank lines.
 267
 268     Method definitions inside a class are separated by a single blank line.
 269
 270     Extra blank lines may be used (sparingly) to separate groups of related
 271     functions.  Blank lines may be omitted between a bunch of related
 272     one-liners (e.g. a set of dummy implementations).
 273
 274     Use blank lines in functions, sparingly, to indicate logical sections.
 275
 276     Okay: def a():\n    pass\n\n\ndef b():\n    pass
 277     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
 278
 279     E301: class Foo:\n    b = 0\n    def bar():\n        pass
 280     E302: def a():\n    pass\n\ndef b(n):\n    pass
 281     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
 282     E303: def a():\n\n\n\n    pass
 283     E304: @decorator\n\ndef a():\n    pass
 284     """
 285     if line_number == 1:
 286         return  # Don't expect blank lines before the first line
 287     max_blank_lines = max(blank_lines, blank_lines_before_comment)
 288     if previous_logical.startswith('@'):
 289         if max_blank_lines:
 290             return 0, "E304 blank lines found after function decorator"
 291     elif max_blank_lines > 2 or (indent_level and max_blank_lines == 2):
 292         return 0, "E303 too many blank lines (%d)" % max_blank_lines
 293     elif (logical_line.startswith('def ') or
 294           logical_line.startswith('class ') or
 295           logical_line.startswith('@')):
 296         if indent_level:
 297             if not (max_blank_lines or previous_indent_level < indent_level or
 298                     DOCSTRING_REGEX.match(previous_logical)):
 299                 return 0, "E301 expected 1 blank line, found 0"
 300         elif max_blank_lines != 2:
 301             return 0, "E302 expected 2 blank lines, found %d" % max_blank_lines
 302
 303
 304 def extraneous_whitespace(logical_line):
 305     """
 306     Avoid extraneous whitespace in the following situations:
 307
 308     - Immediately inside parentheses, brackets or braces.
 309
 310     - Immediately before a comma, semicolon, or colon.
 311
 312     Okay: spam(ham[1], {eggs: 2})
 313     E201: spam( ham[1], {eggs: 2})
 314     E201: spam(ham[ 1], {eggs: 2})
 315     E201: spam(ham[1], { eggs: 2})
 316     E202: spam(ham[1], {eggs: 2} )
 317     E202: spam(ham[1 ], {eggs: 2})
 318     E202: spam(ham[1], {eggs: 2 })
 319
 320     E203: if x == 4: print x, y; x, y = y , x
 321     E203: if x == 4: print x, y ; x, y = y, x
 322     E203: if x == 4 : print x, y; x, y = y, x
 323     """
 324     line = logical_line
 325     for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
 326         text = match.group()
 327         char = text.strip()
 328         found = match.start()
 329         if text == char + ' ' and char in '([{':
 330             return found + 1, "E201 whitespace after '%s'" % char
 331         if text == ' ' + char and line[found - 1] != ',':
 332             if char in '}])':
 333                 return found, "E202 whitespace before '%s'" % char
 334             if char in ',;:':
 335                 return found, "E203 whitespace before '%s'" % char
 336
 337
 338 def missing_whitespace(logical_line):
 339     """
 340     JCR: Each comma, semicolon or colon should be followed by whitespace.
 341
 342     Okay: [a, b]
 343     Okay: (3,)
 344     Okay: a[1:4]
 345     Okay: a[:4]
 346     Okay: a[1:]
 347     Okay: a[1:4:2]
 348     E231: ['a','b']
 349     E231: foo(bar,baz)
 350     """
 351     line = logical_line
 352     for index in range(len(line) - 1):
 353         char = line[index]
 354         if char in ',;:' and line[index + 1] not in WHITESPACE:
 355             before = line[:index]
 356             if char == ':' and before.count('[') > before.count(']'):
 357                 continue  # Slice syntax, no space required
 358             if char == ',' and line[index + 1] == ')':
 359                 continue  # Allow tuple with only one element: (3,)
 360             return index, "E231 missing whitespace after '%s'" % char
 361
 362
 363 def indentation(logical_line, previous_logical, indent_char,
 364                 indent_level, previous_indent_level):
 365     r"""
 366     Use 4 spaces per indentation level.
 367
 368     For really old code that you don't want to mess up, you can continue to
 369     use 8-space tabs.
 370
 371     Okay: a = 1
 372     Okay: if a == 0:\n    a = 1
 373     E111:   a = 1
 374
 375     Okay: for item in items:\n    pass
 376     E112: for item in items:\npass
 377
 378     Okay: a = 1\nb = 2
 379     E113: a = 1\n    b = 2
 380     """
 381     if indent_char == ' ' and indent_level % 4:
 382         return 0, "E111 indentation is not a multiple of four"
 383     indent_expect = previous_logical.endswith(':')
 384     if indent_expect and indent_level <= previous_indent_level:
 385         return 0, "E112 expected an indented block"
 386     if indent_level > previous_indent_level and not indent_expect:
 387         return 0, "E113 unexpected indentation"
 388
 389
 390 def whitespace_before_parameters(logical_line, tokens):
 391     """
 392     Avoid extraneous whitespace in the following situations:
 393
 394     - Immediately before the open parenthesis that starts the argument
 395       list of a function call.
 396
 397     - Immediately before the open parenthesis that starts an indexing or
 398       slicing.
 399
 400     Okay: spam(1)
 401     E211: spam (1)
 402
 403     Okay: dict['key'] = list[index]
 404     E211: dict ['key'] = list[index]
 405     E211: dict['key'] = list [index]
 406     """
 407     prev_type = tokens[0][0]
 408     prev_text = tokens[0][1]
 409     prev_end = tokens[0][3]
 410     for index in range(1, len(tokens)):
 411         token_type, text, start, end, line = tokens[index]
 412         if (token_type == tokenize.OP and
 413             text in '([' and
 414             start != prev_end and
 415             (prev_type == tokenize.NAME or prev_text in '}])') and
 416             # Syntax "class A (B):" is allowed, but avoid it
 417             (index < 2 or tokens[index - 2][1] != 'class') and
 418             # Allow "return (a.foo for a in range(5))"
 419             (not keyword.iskeyword(prev_text))):
 420             return prev_end, "E211 whitespace before '%s'" % text
 421         prev_type = token_type
 422         prev_text = text
 423         prev_end = end
 424
 425
 426 def whitespace_around_operator(logical_line):
 427     """
 428     Avoid extraneous whitespace in the following situations:
 429
 430     - More than one space around an assignment (or other) operator to
 431       align it with another.
 432
 433     Okay: a = 12 + 3
 434     E221: a = 4  + 5
 435     E222: a = 4 +  5
 436     E223: a = 4\t+ 5
 437     E224: a = 4 +\t5
 438     """
 439     for match in WHITESPACE_AROUND_OPERATOR_REGEX.finditer(logical_line):
 440         before, whitespace, after = match.groups()
 441         tab = whitespace == '\t'
 442         offset = match.start(2)
 443         if before in OPERATORS:
 444             return offset, (tab and "E224 tab after operator" or
 445                             "E222 multiple spaces after operator")
 446         elif after in OPERATORS:
 447             return offset, (tab and "E223 tab before operator" or
 448                             "E221 multiple spaces before operator")
 449
 450
 451 def missing_whitespace_around_operator(logical_line, tokens):
 452     r"""
 453     - Always surround these binary operators with a single space on
 454       either side: assignment (=), augmented assignment (+=, -= etc.),
 455       comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
 456       Booleans (and, or, not).
 457
 458     - Use spaces around arithmetic operators.
 459
 460     Okay: i = i + 1
 461     Okay: submitted += 1
 462     Okay: x = x * 2 - 1
 463     Okay: hypot2 = x * x + y * y
 464     Okay: c = (a + b) * (a - b)
 465     Okay: foo(bar, key='word', *args, **kwargs)
 466     Okay: baz(**kwargs)
 467     Okay: negative = -1
 468     Okay: spam(-1)
 469     Okay: alpha[:-i]
 470     Okay: if not -5 < x < +5:\n    pass
 471     Okay: lambda *args, **kw: (args, kw)
 472
 473     E225: i=i+1
 474     E225: submitted +=1
 475     E225: x = x*2 - 1
 476     E225: hypot2 = x*x + y*y
 477     E225: c = (a+b) * (a-b)
 478     E225: c = alpha -4
 479     E225: z = x **y
 480     """
 481     parens = 0
 482     need_space = False
 483     prev_type = tokenize.OP
 484     prev_text = prev_end = None
 485     for token_type, text, start, end, line in tokens:
 486         if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
 487             # ERRORTOKEN is triggered by backticks in Python 3000
 488             continue
 489         if text in ('(', 'lambda'):
 490             parens += 1
 491         elif text == ')':
 492             parens -= 1
 493         if need_space:
 494             if start != prev_end:
 495                 need_space = False
 496             elif text == '>' and prev_text == '<':
 497                 # Tolerate the "<>" operator, even if running Python 3
 498                 pass
 499             else:
 500                 return prev_end, "E225 missing whitespace around operator"
 501         elif token_type == tokenize.OP and prev_end is not None:
 502             if text == '=' and parens:
 503                 # Allow keyword args or defaults: foo(bar=None).
 504                 pass
 505             elif text in BINARY_OPERATORS:
 506                 need_space = True
 507             elif text in UNARY_OPERATORS:
 508                 # Allow unary operators: -123, -x, +1.
 509                 # Allow argument unpacking: foo(*args, **kwargs).
 510                 if prev_type == tokenize.OP:
 511                     if prev_text in '}])':
 512                         need_space = True
 513                 elif prev_type == tokenize.NAME:
 514                     if prev_text not in E225NOT_KEYWORDS:
 515                         need_space = True
 516                 else:
 517                     need_space = True
 518             if need_space and start == prev_end:
 519                 return prev_end, "E225 missing whitespace around operator"
 520         prev_type = token_type
 521         prev_text = text
 522         prev_end = end
 523
 524
 525 def whitespace_around_comma(logical_line):
 526     """
 527     Avoid extraneous whitespace in the following situations:
 528
 529     - More than one space around an assignment (or other) operator to
 530       align it with another.
 531
 532     JCR: This should also be applied around comma etc.
 533     Note: these checks are disabled by default
 534
 535     Okay: a = (1, 2)
 536     E241: a = (1,  2)
 537     E242: a = (1,\t2)
 538     """
 539     line = logical_line
 540     for separator in ',;:':
 541         found = line.find(separator + '  ')
 542         if found > -1:
 543             return found + 1, "E241 multiple spaces after '%s'" % separator
 544         found = line.find(separator + '\t')
 545         if found > -1:
 546             return found + 1, "E242 tab after '%s'" % separator
 547
 548
 549 def whitespace_around_named_parameter_equals(logical_line):
 550     """
 551     Don't use spaces around the '=' sign when used to indicate a
 552     keyword argument or a default parameter value.
 553
 554     Okay: def complex(real, imag=0.0):
 555     Okay: return magic(r=real, i=imag)
 556     Okay: boolean(a == b)
 557     Okay: boolean(a != b)
 558     Okay: boolean(a <= b)
 559     Okay: boolean(a >= b)
 560
 561     E251: def complex(real, imag = 0.0):
 562     E251: return magic(r = real, i = imag)
 563     """
 564     parens = 0
 565     for match in WHITESPACE_AROUND_NAMED_PARAMETER_REGEX.finditer(
 566             logical_line):
 567         text = match.group()
 568         if parens and len(text) == 3:
 569             issue = "E251 no spaces around keyword / parameter equals"
 570             return match.start(), issue
 571         if text == '(':
 572             parens += 1
 573         elif text == ')':
 574             parens -= 1
 575
 576
 577 def whitespace_before_inline_comment(logical_line, tokens):
 578     """
 579     Separate inline comments by at least two spaces.
 580
 581     An inline comment is a comment on the same line as a statement.  Inline
 582     comments should be separated by at least two spaces from the statement.
 583     They should start with a # and a single space.
 584
 585     Okay: x = x + 1  # Increment x
 586     Okay: x = x + 1    # Increment x
 587     E261: x = x + 1 # Increment x
 588     E262: x = x + 1  #Increment x
 589     E262: x = x + 1  #  Increment x
 590     """
 591     prev_end = (0, 0)
 592     for token_type, text, start, end, line in tokens:
 593         if token_type == tokenize.NL:
 594             continue
 595         if token_type == tokenize.COMMENT:
 596             if not line[:start[1]].strip():
 597                 continue
 598             if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
 599                 return (prev_end,
 600                         "E261 at least two spaces before inline comment")
 601             if (len(text) > 1 and text.startswith('#  ')
 602                            or not text.startswith('# ')):
 603                 return start, "E262 inline comment should start with '# '"
 604         else:
 605             prev_end = end
 606
 607
 608 def imports_on_separate_lines(logical_line):
 609     r"""
 610     Imports should usually be on separate lines.
 611
 612     Okay: import os\nimport sys
 613     E401: import sys, os
 614
 615     Okay: from subprocess import Popen, PIPE
 616     Okay: from myclas import MyClass
 617     Okay: from foo.bar.yourclass import YourClass
 618     Okay: import myclass
 619     Okay: import foo.bar.yourclass
 620     """
 621     line = logical_line
 622     if line.startswith('import '):
 623         found = line.find(',')
 624         if found > -1:
 625             return found, "E401 multiple imports on one line"
 626
 627
 628 def compound_statements(logical_line):
 629     r"""
 630     Compound statements (multiple statements on the same line) are
 631     generally discouraged.
 632
 633     While sometimes it's okay to put an if/for/while with a small body
 634     on the same line, never do this for multi-clause statements. Also
 635     avoid folding such long lines!
 636
 637     Okay: if foo == 'blah':\n    do_blah_thing()
 638     Okay: do_one()
 639     Okay: do_two()
 640     Okay: do_three()
 641
 642     E701: if foo == 'blah': do_blah_thing()
 643     E701: for x in lst: total += x
 644     E701: while t < 10: t = delay()
 645     E701: if foo == 'blah': do_blah_thing()
 646     E701: else: do_non_blah_thing()
 647     E701: try: something()
 648     E701: finally: cleanup()
 649     E701: if foo == 'blah': one(); two(); three()
 650
 651     E702: do_one(); do_two(); do_three()
 652     """
 653     line = logical_line
 654     found = line.find(':')
 655     if -1 < found < len(line) - 1:
 656         before = line[:found]
 657         if (before.count('{') <= before.count('}') and  # {'a': 1} (dict)
 658             before.count('[') <= before.count(']') and  # [1:2] (slice)
 659             not re.search(r'\blambda\b', before)):      # lambda x: x
 660             return found, "E701 multiple statements on one line (colon)"
 661     found = line.find(';')
 662     if -1 < found:
 663         return found, "E702 multiple statements on one line (semicolon)"
 664
 665
 666 def python_3000_has_key(logical_line):
 667     """
 668     The {}.has_key() method will be removed in the future version of
 669     Python. Use the 'in' operation instead, like:
 670     d = {"a": 1, "b": 2}
 671     if "b" in d:
 672         print d["b"]
 673     """
 674     pos = logical_line.find('.has_key(')
 675     if pos > -1:
 676         return pos, "W601 .has_key() is deprecated, use 'in'"
 677
 678
 679 def python_3000_raise_comma(logical_line):
 680     """
 681     When raising an exception, use "raise ValueError('message')"
 682     instead of the older form "raise ValueError, 'message'".
 683
 684     The paren-using form is preferred because when the exception arguments
 685     are long or include string formatting, you don't need to use line
 686     continuation characters thanks to the containing parentheses.  The older
 687     form will be removed in Python 3000.
 688     """
 689     match = RAISE_COMMA_REGEX.match(logical_line)
 690     if match:
 691         return match.start(1), "W602 deprecated form of raising exception"
 692
 693
 694 def python_3000_not_equal(logical_line):
 695     """
 696     != can also be written <>, but this is an obsolete usage kept for
 697     backwards compatibility only. New code should always use !=.
 698     The older syntax is removed in Python 3000.
 699     """
 700     pos = logical_line.find('<>')
 701     if pos > -1:
 702         return pos, "W603 '<>' is deprecated, use '!='"
 703
 704
 705 def python_3000_backticks(logical_line):
 706     """
 707     Backticks are removed in Python 3000.
 708     Use repr() instead.
 709     """
 710     pos = logical_line.find('`')
 711     if pos > -1:
 712         return pos, "W604 backticks are deprecated, use 'repr()'"
 713
 714
 715 ##############################################################################
 716 # Helper functions
 717 ##############################################################################
 718
 719
 720 if '' == ''.encode():
 721     # Python 2: implicit encoding.
 722     def readlines(filename):
 723         return open(filename).readlines()
 724 else:
 725     # Python 3: decode to latin-1.
 726     # This function is lazy, it does not read the encoding declaration.
 727     # XXX: use tokenize.detect_encoding()
 728     def readlines(filename):
 729         return open(filename, encoding='latin-1').readlines()
 730
 731
 732 def expand_indent(line):
 733     """
 734     Return the amount of indentation.
 735     Tabs are expanded to the next multiple of 8.
 736
 737     >>> expand_indent('    ')
 738     4
 739     >>> expand_indent('\\t')
 740     8
 741     >>> expand_indent('    \\t')
 742     8
 743     >>> expand_indent('       \\t')
 744     8
 745     >>> expand_indent('        \\t')
 746     16
 747     """
 748     result = 0
 749     for char in line:
 750         if char == '\t':
 751             result = result // 8 * 8 + 8
 752         elif char == ' ':
 753             result += 1
 754         else:
 755             break
 756     return result
 757
 758
 759 def mute_string(text):
 760     """
 761     Replace contents with 'xxx' to prevent syntax matching.
 762
 763     >>> mute_string('"abc"')
 764     '"xxx"'
 765     >>> mute_string("'''abc'''")
 766     "'''xxx'''"
 767     >>> mute_string("r'abc'")
 768     "r'xxx'"
 769     """
 770     start = 1
 771     end = len(text) - 1
 772     # String modifiers (e.g. u or r)
 773     if text.endswith('"'):
 774         start += text.index('"')
 775     elif text.endswith("'"):
 776         start += text.index("'")
 777     # Triple quotes
 778     if text.endswith('"""') or text.endswith("'''"):
 779         start += 2
 780         end -= 2
 781     return text[:start] + 'x' * (end - start) + text[end:]
 782
 783
 784 def message(text):
 785     """Print a message."""
 786     # print >> sys.stderr, options.prog + ': ' + text
 787     # print >> sys.stderr, text
 788     print(text)
 789
 790
 791 ##############################################################################
 792 # Framework to run all checks
 793 ##############################################################################
 794
 795
 796 def find_checks(argument_name):
 797     """
 798     Find all globally visible functions where the first argument name
 799     starts with argument_name.
 800     """
 801     checks = []
 802     for name, function in globals().items():
 803         if not inspect.isfunction(function):
 804             continue
 805         args = inspect.getargspec(function)[0]
 806         if args and args[0].startswith(argument_name):
 807             codes = ERRORCODE_REGEX.findall(inspect.getdoc(function) or '')
 808             for code in codes or ['']:
 809                 if not code or not ignore_code(code):
 810                     checks.append((name, function, args))
 811                     break
 812     checks.sort()
 813     return checks
 814
 815
 816 class Checker(object):
 817     """
 818     Load a Python source file, tokenize it, check coding style.
 819     """
 820
 821     def __init__(self, filename, lines=None):
 822         self.filename = filename
 823         if filename is None:
 824             self.filename = 'stdin'
 825             self.lines = lines or []
 826         elif lines is None:
 827             self.lines = readlines(filename)
 828         else:
 829             self.lines = lines
 830         options.counters['physical lines'] += len(self.lines)
 831
 832     def readline(self):
 833         """
 834         Get the next line from the input buffer.
 835         """
 836         self.line_number += 1
 837         if self.line_number > len(self.lines):
 838             return ''
 839         return self.lines[self.line_number - 1]
 840
 841     def readline_check_physical(self):
 842         """
 843         Check and return the next physical line. This method can be
 844         used to feed tokenize.generate_tokens.
 845         """
 846         line = self.readline()
 847         if line:
 848             self.check_physical(line)
 849         return line
 850
 851     def run_check(self, check, argument_names):
 852         """
 853         Run a check plugin.
 854         """
 855         arguments = []
 856         for name in argument_names:
 857             arguments.append(getattr(self, name))
 858         return check(*arguments)
 859
 860     def check_physical(self, line):
 861         """
 862         Run all physical checks on a raw input line.
 863         """
 864         self.physical_line = line
 865         if self.indent_char is None and len(line) and line[0] in ' \t':
 866             self.indent_char = line[0]
 867         for name, check, argument_names in options.physical_checks:
 868             result = self.run_check(check, argument_names)
 869             if result is not None:
 870                 offset, text = result
 871                 self.report_error(self.line_number, offset, text, check)
 872
 873     def build_tokens_line(self):
 874         """
 875         Build a logical line from tokens.
 876         """
 877         self.mapping = []
 878         logical = []
 879         length = 0
 880         previous = None
 881         for token in self.tokens:
 882             token_type, text = token[0:2]
 883             if token_type in SKIP_TOKENS:
 884                 continue
 885             if token_type == tokenize.STRING:
 886                 text = mute_string(text)
 887             if previous:
 888                 end_line, end = previous[3]
 889                 start_line, start = token[2]
 890                 if end_line != start_line:  # different row
 891                     prev_text = self.lines[end_line - 1][end - 1]
 892                     if prev_text == ',' or (prev_text not in '{[('
 893                                             and text not in '}])'):
 894                         logical.append(' ')
 895                         length += 1
 896                 elif end != start:  # different column
 897                     fill = self.lines[end_line - 1][end:start]
 898                     logical.append(fill)
 899                     length += len(fill)
 900             self.mapping.append((length, token))
 901             logical.append(text)
 902             length += len(text)
 903             previous = token
 904         self.logical_line = ''.join(logical)
 905         assert self.logical_line.lstrip() == self.logical_line
 906         assert self.logical_line.rstrip() == self.logical_line
 907
 908     def check_logical(self):
 909         """
 910         Build a line from tokens and run all logical checks on it.
 911         """
 912         options.counters['logical lines'] += 1
 913         self.build_tokens_line()
 914         first_line = self.lines[self.mapping[0][1][2][0] - 1]
 915         indent = first_line[:self.mapping[0][1][2][1]]
 916         self.previous_indent_level = self.indent_level
 917         self.indent_level = expand_indent(indent)
 918         if options.verbose >= 2:
 919             print(self.logical_line[:80].rstrip())
 920         for name, check, argument_names in options.logical_checks:
 921             if options.verbose >= 4:
 922                 print('   ' + name)
 923             result = self.run_check(check, argument_names)
 924             if result is not None:
 925                 offset, text = result
 926                 if isinstance(offset, tuple):
 927                     original_number, original_offset = offset
 928                 else:
 929                     for token_offset, token in self.mapping:
 930                         if offset >= token_offset:
 931                             original_number = token[2][0]
 932                             original_offset = (token[2][1]
 933                                                + offset - token_offset)
 934                 self.report_error(original_number, original_offset,
 935                                   text, check)
 936         self.previous_logical = self.logical_line
 937
 938     def check_all(self, expected=None, line_offset=0):
 939         """
 940         Run all checks on the input file.
 941         """
 942         self.expected = expected or ()
 943         self.line_offset = line_offset
 944         self.line_number = 0
 945         self.file_errors = 0
 946         self.indent_char = None
 947         self.indent_level = 0
 948         self.previous_logical = ''
 949         self.blank_lines = 0
 950         self.blank_lines_before_comment = 0
 951         self.tokens = []
 952         parens = 0
 953         for token in tokenize.generate_tokens(self.readline_check_physical):
 954             if options.verbose >= 3:
 955                 if token[2][0] == token[3][0]:
 956                     pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
 957                 else:
 958                     pos = 'l.%s' % token[3][0]
 959                 print('l.%s\t%s\t%s\t%r' %
 960                     (token[2][0], pos, tokenize.tok_name[token[0]], token[1]))
 961             self.tokens.append(token)
 962             token_type, text = token[0:2]
 963             if token_type == tokenize.OP and text in '([{':
 964                 parens += 1
 965             if token_type == tokenize.OP and text in '}])':
 966                 parens -= 1
 967             if token_type == tokenize.NEWLINE and not parens:
 968                 self.check_logical()
 969                 self.blank_lines = 0
 970                 self.blank_lines_before_comment = 0
 971                 self.tokens = []
 972             if token_type == tokenize.NL and not parens:
 973                 if len(self.tokens) <= 1:
 974                     # The physical line contains only this token.
 975                     self.blank_lines += 1
 976                 self.tokens = []
 977             if token_type == tokenize.COMMENT:
 978                 source_line = token[4]
 979                 token_start = token[2][1]
 980                 if source_line[:token_start].strip() == '':
 981                     self.blank_lines_before_comment = max(self.blank_lines,
 982                         self.blank_lines_before_comment)
 983                     self.blank_lines = 0
 984                 if text.endswith('\n') and not parens:
 985                     # The comment also ends a physical line.  This works around
 986                     # Python < 2.6 behaviour, which does not generate NL after
 987                     # a comment which is on a line by itself.
 988                     self.tokens = []
 989         return self.file_errors
 990
 991     def report_error(self, line_number, offset, text, check):
 992         """
 993         Report an error, according to options.
 994         """
 995         code = text[:4]
 996         if ignore_code(code):
 997             return
 998         if options.quiet == 1 and not self.file_errors:
 999             message(self.filename)
1000         if code in options.counters:
1001             options.counters[code] += 1
1002         else:
1003             options.counters[code] = 1
1004             options.messages[code] = text[5:]
1005         if options.quiet or code in self.expected:
1006             # Don't care about expected errors or warnings
1007             return
1008         self.file_errors += 1
1009         if options.counters[code] == 1 or options.repeat:
1010             message("%s:%s:%d: %s" %
1011                     (self.filename, self.line_offset + line_number,
1012                      offset + 1, text))
1013             if options.show_source:
1014                 line = self.lines[line_number - 1]
1015                 message(line.rstrip())
1016                 message(' ' * offset + '^')
1017             if options.show_pep8:
1018                 message(check.__doc__.lstrip('\n').rstrip())
1019
1020
1021 def input_file(filename):
1022     """
1023     Run all checks on a Python source file.
1024     """
1025     if options.verbose:
1026         message('checking ' + filename)
1027     errors = Checker(filename).check_all()
1028
1029
1030 def input_dir(dirname, runner=None):
1031     """
1032     Check all Python source files in this directory and all subdirectories.
1033     """
1034     dirname = dirname.rstrip('/')
1035     if excluded(dirname):
1036         return
1037     if runner is None:
1038         runner = input_file
1039     for root, dirs, files in os.walk(dirname):
1040         if options.verbose:
1041             message('directory ' + root)
1042         options.counters['directories'] += 1
1043         dirs.sort()
1044         for subdir in dirs:
1045             if excluded(subdir):
1046                 dirs.remove(subdir)
1047         files.sort()
1048         for filename in files:
1049             if filename_match(filename) and not excluded(filename):
1050                 options.counters['files'] += 1
1051                 runner(os.path.join(root, filename))
1052
1053
1054 def excluded(filename):
1055     """
1056     Check if options.exclude contains a pattern that matches filename.
1057     """
1058     basename = os.path.basename(filename)
1059     for pattern in options.exclude:
1060         if fnmatch(basename, pattern):
1061             # print basename, 'excluded because it matches', pattern
1062             return True
1063
1064
1065 def filename_match(filename):
1066     """
1067     Check if options.filename contains a pattern that matches filename.
1068     If options.filename is unspecified, this always returns True.
1069     """
1070     if not options.filename:
1071         return True
1072     for pattern in options.filename:
1073         if fnmatch(filename, pattern):
1074             return True
1075
1076
1077 def ignore_code(code):
1078     """
1079     Check if options.ignore contains a prefix of the error code.
1080     If options.select contains a prefix of the error code, do not ignore it.
1081     """
1082     for select in options.select:
1083         if code.startswith(select):
1084             return False
1085     for ignore in options.ignore:
1086         if code.startswith(ignore):
1087             return True
1088
1089
1090 def reset_counters():
1091     for key in list(options.counters.keys()):
1092         if key not in BENCHMARK_KEYS:
1093             del options.counters[key]
1094     options.messages = {}
1095
1096
1097 def get_error_statistics():
1098     """Get error statistics."""
1099     return get_statistics("E")
1100
1101
1102 def get_warning_statistics():
1103     """Get warning statistics."""
1104     return get_statistics("W")
1105
1106
1107 def get_statistics(prefix=''):
1108     """
1109     Get statistics for message codes that start with the prefix.
1110
1111     prefix='' matches all errors and warnings
1112     prefix='E' matches all errors
1113     prefix='W' matches all warnings
1114     prefix='E4' matches all errors that have to do with imports
1115     """
1116     stats = []
1117     keys = list(options.messages.keys())
1118     keys.sort()
1119     for key in keys:
1120         if key.startswith(prefix):
1121             stats.append('%-7s %s %s' %
1122                          (options.counters[key], key, options.messages[key]))
1123     return stats
1124
1125
1126 def get_count(prefix=''):
1127     """Return the total count of errors and warnings."""
1128     keys = list(options.messages.keys())
1129     count = 0
1130     for key in keys:
1131         if key.startswith(prefix):
1132             count += options.counters[key]
1133     return count
1134
1135
1136 def print_statistics(prefix=''):
1137     """Print overall statistics (number of errors and warnings)."""
1138     for line in get_statistics(prefix):
1139         print(line)
1140
1141
1142 def print_benchmark(elapsed):
1143     """
1144     Print benchmark numbers.
1145     """
1146     print('%-7.2f %s' % (elapsed, 'seconds elapsed'))
1147     for key in BENCHMARK_KEYS:
1148         print('%-7d %s per second (%d total)' % (
1149             options.counters[key] / elapsed, key,
1150             options.counters[key]))
1151
1152
1153 def run_tests(filename):
1154     """
1155     Run all the tests from a file.
1156
1157     A test file can provide many tests.  Each test starts with a declaration.
1158     This declaration is a single line starting with '#:'.
1159     It declares codes of expected failures, separated by spaces or 'Okay'
1160     if no failure is expected.
1161     If the file does not contain such declaration, it should pass all tests.
1162     If the declaration is empty, following lines are not checked, until next
1163     declaration.
1164
1165     Examples:
1166
1167      * Only E224 and W701 are expected:         #: E224 W701
1168      * Following example is conform:            #: Okay
1169      * Don't check these lines:                 #:
1170     """
1171     lines = readlines(filename) + ['#:\n']
1172     line_offset = 0
1173     codes = ['Okay']
1174     testcase = []
1175     for index, line in enumerate(lines):
1176         if not line.startswith('#:'):
1177             if codes:
1178                 # Collect the lines of the test case
1179                 testcase.append(line)
1180             continue
1181         if codes and index > 0:
1182             label = '%s:%s:1' % (filename, line_offset + 1)
1183             codes = [c for c in codes if c != 'Okay']
1184             # Run the checker
1185             errors = Checker(filename, testcase).check_all(codes, line_offset)
1186             # Check if the expected errors were found
1187             for code in codes:
1188                 if not options.counters.get(code):
1189                     errors += 1
1190                     message('%s: error %s not found' % (label, code))
1191             if options.verbose and not errors:
1192                 message('%s: passed (%s)' % (label, ' '.join(codes)))
1193             # Keep showing errors for multiple tests
1194             reset_counters()
1195         # output the real line numbers
1196         line_offset = index
1197         # configure the expected errors
1198         codes = line.split()[1:]
1199         # empty the test case buffer
1200         del testcase[:]
1201
1202
1203 def selftest():
1204     """
1205     Test all check functions with test cases in docstrings.
1206     """
1207     count_passed = 0
1208     count_failed = 0
1209     checks = options.physical_checks + options.logical_checks
1210     for name, check, argument_names in checks:
1211         for line in check.__doc__.splitlines():
1212             line = line.lstrip()
1213             match = SELFTEST_REGEX.match(line)
1214             if match is None:
1215                 continue
1216             code, source = match.groups()
1217             checker = Checker(None)
1218             for part in source.split(r'\n'):
1219                 part = part.replace(r'\t', '\t')
1220                 part = part.replace(r'\s', ' ')
1221                 checker.lines.append(part + '\n')
1222             options.quiet = 2
1223             checker.check_all()
1224             error = None
1225             if code == 'Okay':
1226                 if len(options.counters) > len(BENCHMARK_KEYS):
1227                     codes = [key for key in options.counters.keys()
1228                              if key not in BENCHMARK_KEYS]
1229                     error = "incorrectly found %s" % ', '.join(codes)
1230             elif not options.counters.get(code):
1231                 error = "failed to find %s" % code
1232             # Reset the counters
1233             reset_counters()
1234             if not error:
1235                 count_passed += 1
1236             else:
1237                 count_failed += 1
1238                 if len(checker.lines) == 1:
1239                     print("pep8.py: %s: %s" %
1240                           (error, checker.lines[0].rstrip()))
1241                 else:
1242                     print("pep8.py: %s:" % error)
1243                     for line in checker.lines:
1244                         print(line.rstrip())
1245     if options.verbose:
1246         print("%d passed and %d failed." % (count_passed, count_failed))
1247         if count_failed:
1248             print("Test failed.")
1249         else:
1250             print("Test passed.")
1251
1252
1253 def process_options(arglist=None):
1254     """
1255     Process options passed either via arglist or via command line args.
1256     """
1257     global options, args
1258     parser = OptionParser(version=__version__,
1259                           usage="%prog [options] input ...")
1260     parser.add_option('-v', '--verbose', default=0, action='count',
1261                       help="print status messages, or debug with -vv")
1262     parser.add_option('-q', '--quiet', default=0, action='count',
1263                       help="report only file names, or nothing with -qq")
1264     parser.add_option('-r', '--repeat', action='store_true',
1265                       help="show all occurrences of the same error")
1266     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1267                       help="exclude files or directories which match these "
1268                         "comma separated patterns (default: %s)" %
1269                         DEFAULT_EXCLUDE)
1270     parser.add_option('--filename', metavar='patterns', default='*.py',
1271                       help="when parsing directories, only check filenames "
1272                         "matching these comma separated patterns (default: "
1273                         "*.py)")
1274     parser.add_option('--select', metavar='errors', default='',
1275                       help="select errors and warnings (e.g. E,W6)")
1276     parser.add_option('--ignore', metavar='errors', default='',
1277                       help="skip errors and warnings (e.g. E4,W)")
1278     parser.add_option('--show-source', action='store_true',
1279                       help="show source code for each error")
1280     parser.add_option('--show-pep8', action='store_true',
1281                       help="show text of PEP 8 for each error")
1282     parser.add_option('--statistics', action='store_true',
1283                       help="count errors and warnings")
1284     parser.add_option('--count', action='store_true',
1285                       help="print total number of errors and warnings "
1286                         "to standard error and set exit code to 1 if "
1287                         "total is not null")
1288     parser.add_option('--benchmark', action='store_true',
1289                       help="measure processing speed")
1290     parser.add_option('--testsuite', metavar='dir',
1291                       help="run regression tests from dir")
1292     parser.add_option('--doctest', action='store_true',
1293                       help="run doctest on myself")
1294     options, args = parser.parse_args(arglist)
1295     if options.testsuite:
1296         args.append(options.testsuite)
1297     if not args and not options.doctest:
1298         parser.error('input not specified')
1299     options.prog = os.path.basename(sys.argv[0])
1300     options.exclude = options.exclude.split(',')
1301     for index in range(len(options.exclude)):
1302         options.exclude[index] = options.exclude[index].rstrip('/')
1303     if options.filename:
1304         options.filename = options.filename.split(',')
1305     if options.select:
1306         options.select = options.select.split(',')
1307     else:
1308         options.select = []
1309     if options.ignore:
1310         options.ignore = options.ignore.split(',')
1311     elif options.select:
1312         # Ignore all checks which are not explicitly selected
1313         options.ignore = ['']
1314     elif options.testsuite or options.doctest:
1315         # For doctest and testsuite, all checks are required
1316         options.ignore = []
1317     else:
1318         # The default choice: ignore controversial checks
1319         options.ignore = DEFAULT_IGNORE.split(',')
1320     options.physical_checks = find_checks('physical_line')
1321     options.logical_checks = find_checks('logical_line')
1322     options.counters = dict.fromkeys(BENCHMARK_KEYS, 0)
1323     options.messages = {}
1324     return options, args
1325
1326
1327 def _main():
1328     """
1329     Parse options and run checks on Python source.
1330     """
1331     options, args = process_options()
1332     if options.doctest:
1333         import doctest
1334         doctest.testmod(verbose=options.verbose)
1335         selftest()
1336     if options.testsuite:
1337         runner = run_tests
1338     else:
1339         runner = input_file
1340     start_time = time.time()
1341     for path in args:
1342         if os.path.isdir(path):
1343             input_dir(path, runner=runner)
1344         elif not excluded(path):
1345             options.counters['files'] += 1
1346             runner(path)
1347     elapsed = time.time() - start_time
1348     if options.statistics:
1349         print_statistics()
1350     if options.benchmark:
1351         print_benchmark(elapsed)
1352     count = get_count()
1353     if count:
1354         if options.count:
1355             sys.stderr.write(str(count) + '\n')
1356         sys.exit(1)
1357
1358
1359 if __name__ == '__main__':
1360     _main()