lib/pep8/pep8.py

   1 #!/usr/bin/env python
   2 # pep8.py - Check Python source code formatting, according to PEP 8
   3 # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
   4 # Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
   5 #
   6 # Permission is hereby granted, free of charge, to any person
   7 # obtaining a copy of this software and associated documentation files
   8 # (the "Software"), to deal in the Software without restriction,
   9 # including without limitation the rights to use, copy, modify, merge,
  10 # publish, distribute, sublicense, and/or sell copies of the Software,
  11 # and to permit persons to whom the Software is furnished to do so,
  12 # subject to the following conditions:
  13 #
  14 # The above copyright notice and this permission notice shall be
  15 # included in all copies or substantial portions of the Software.
  16 #
  17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  21 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  22 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24 # SOFTWARE.
  25
  26 r"""
  27 Check Python source code formatting, according to PEP 8.
  28
  29 For usage and a list of options, try this:
  30 $ python pep8.py -h
  31
  32 This program and its regression test suite live here:
  33 http://github.com/jcrocholl/pep8
  34
  35 Groups of errors and warnings:
  36 E errors
  37 W warnings
  38 100 indentation
  39 200 whitespace
  40 300 blank lines
  41 400 imports
  42 500 line length
  43 600 deprecation
  44 700 statements
  45 900 syntax error
  46 """
  47 from __future__ import with_statement
  48
  49 __version__ = '1.6.0a0'
  50
  51 import os
  52 import sys
  53 import re
  54 import time
  55 import inspect
  56 import keyword
  57 import tokenize
  58 from optparse import OptionParser
  59 from fnmatch import fnmatch
  60 try:
  61     from configparser import RawConfigParser
  62     from io import TextIOWrapper
  63 except ImportError:
  64     from ConfigParser import RawConfigParser
  65
  66 DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
  67 DEFAULT_IGNORE = 'E123,E226,E24,E704'
  68 if sys.platform == 'win32':
  69     DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
  70 else:
  71     DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
  72                                   os.path.expanduser('~/.config'), 'pep8')
  73 PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
  74 TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
  75 MAX_LINE_LENGTH = 79
  76 REPORT_FORMAT = {
  77     'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s',
  78     'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s',
  79 }
  80
  81 PyCF_ONLY_AST = 1024
  82 SINGLETONS = frozenset(['False', 'None', 'True'])
  83 KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS
  84 UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-'])
  85 ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-'])
  86 WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%'])
  87 WS_NEEDED_OPERATORS = frozenset([
  88     '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
  89     '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
  90 WHITESPACE = frozenset(' \t')
  91 NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
  92 SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
  93 # ERRORTOKEN is triggered by backticks in Python 3
  94 SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
  95 BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
  96
  97 INDENT_REGEX = re.compile(r'([ \t]*)')
  98 RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
  99 RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
 100 ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
 101 DOCSTRING_REGEX = re.compile(r'u?r?["\']')
 102 EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
 103 WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?:  |\t)')
 104 COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
 105 COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^[({ ]+\s+(in|is)\s')
 106 COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
 107                                 r'|\s*\(\s*([^)]*[^ )])\s*\))')
 108 KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
 109 OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)')
 110 LAMBDA_REGEX = re.compile(r'\blambda\b')
 111 HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
 112
 113 # Work around Python < 2.6 behaviour, which does not generate NL after
 114 # a comment which is on a line by itself.
 115 COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
 116
 117
 118 ##############################################################################
 119 # Plugins (check functions) for physical lines
 120 ##############################################################################
 121
 122
 123 def tabs_or_spaces(physical_line, indent_char):
 124     r"""Never mix tabs and spaces.
 125
 126     The most popular way of indenting Python is with spaces only.  The
 127     second-most popular way is with tabs only.  Code indented with a mixture
 128     of tabs and spaces should be converted to using spaces exclusively.  When
 129     invoking the Python command line interpreter with the -t option, it issues
 130     warnings about code that illegally mixes tabs and spaces.  When using -tt
 131     these warnings become errors.  These options are highly recommended!
 132
 133     Okay: if a == 0:\n        a = 1\n        b = 1
 134     E101: if a == 0:\n        a = 1\n\tb = 1
 135     """
 136     indent = INDENT_REGEX.match(physical_line).group(1)
 137     for offset, char in enumerate(indent):
 138         if char != indent_char:
 139             return offset, "E101 indentation contains mixed spaces and tabs"
 140
 141
 142 def tabs_obsolete(physical_line):
 143     r"""For new projects, spaces-only are strongly recommended over tabs.
 144
 145     Okay: if True:\n    return
 146     W191: if True:\n\treturn
 147     """
 148     indent = INDENT_REGEX.match(physical_line).group(1)
 149     if '\t' in indent:
 150         return indent.index('\t'), "W191 indentation contains tabs"
 151
 152
 153 def trailing_whitespace(physical_line):
 154     r"""Trailing whitespace is superfluous.
 155
 156     The warning returned varies on whether the line itself is blank, for easier
 157     filtering for those who want to indent their blank lines.
 158
 159     Okay: spam(1)\n#
 160     W291: spam(1) \n#
 161     W293: class Foo(object):\n    \n    bang = 12
 162     """
 163     physical_line = physical_line.rstrip('\n')    # chr(10), newline
 164     physical_line = physical_line.rstrip('\r')    # chr(13), carriage return
 165     physical_line = physical_line.rstrip('\x0c')  # chr(12), form feed, ^L
 166     stripped = physical_line.rstrip(' \t\v')
 167     if physical_line != stripped:
 168         if stripped:
 169             return len(stripped), "W291 trailing whitespace"
 170         else:
 171             return 0, "W293 blank line contains whitespace"
 172
 173
 174 def trailing_blank_lines(physical_line, lines, line_number, total_lines):
 175     r"""Trailing blank lines are superfluous.
 176
 177     Okay: spam(1)
 178     W391: spam(1)\n
 179
 180     However the last line should end with a new line (warning W292).
 181     """
 182     if line_number == total_lines:
 183         stripped_last_line = physical_line.rstrip()
 184         if not stripped_last_line:
 185             return 0, "W391 blank line at end of file"
 186         if stripped_last_line == physical_line:
 187             return len(physical_line), "W292 no newline at end of file"
 188
 189
 190 def maximum_line_length(physical_line, max_line_length, multiline):
 191     r"""Limit all lines to a maximum of 79 characters.
 192
 193     There are still many devices around that are limited to 80 character
 194     lines; plus, limiting windows to 80 characters makes it possible to have
 195     several windows side-by-side.  The default wrapping on such devices looks
 196     ugly.  Therefore, please limit all lines to a maximum of 79 characters.
 197     For flowing long blocks of text (docstrings or comments), limiting the
 198     length to 72 characters is recommended.
 199
 200     Reports error E501.
 201     """
 202     line = physical_line.rstrip()
 203     length = len(line)
 204     if length > max_line_length and not noqa(line):
 205         # Special case for long URLs in multi-line docstrings or comments,
 206         # but still report the error when the 72 first chars are whitespaces.
 207         chunks = line.split()
 208         if ((len(chunks) == 1 and multiline) or
 209             (len(chunks) == 2 and chunks[0] == '#')) and \
 210                 len(line) - len(chunks[-1]) < max_line_length - 7:
 211             return
 212         if hasattr(line, 'decode'):   # Python 2
 213             # The line could contain multi-byte characters
 214             try:
 215                 length = len(line.decode('utf-8'))
 216             except UnicodeError:
 217                 pass
 218         if length > max_line_length:
 219             return (max_line_length, "E501 line too long "
 220                     "(%d > %d characters)" % (length, max_line_length))
 221
 222
 223 ##############################################################################
 224 # Plugins (check functions) for logical lines
 225 ##############################################################################
 226
 227
 228 def blank_lines(logical_line, blank_lines, indent_level, line_number,
 229                 blank_before, previous_logical, previous_indent_level):
 230     r"""Separate top-level function and class definitions with two blank lines.
 231
 232     Method definitions inside a class are separated by a single blank line.
 233
 234     Extra blank lines may be used (sparingly) to separate groups of related
 235     functions.  Blank lines may be omitted between a bunch of related
 236     one-liners (e.g. a set of dummy implementations).
 237
 238     Use blank lines in functions, sparingly, to indicate logical sections.
 239
 240     Okay: def a():\n    pass\n\n\ndef b():\n    pass
 241     Okay: def a():\n    pass\n\n\n# Foo\n# Bar\n\ndef b():\n    pass
 242
 243     E301: class Foo:\n    b = 0\n    def bar():\n        pass
 244     E302: def a():\n    pass\n\ndef b(n):\n    pass
 245     E303: def a():\n    pass\n\n\n\ndef b(n):\n    pass
 246     E303: def a():\n\n\n\n    pass
 247     E304: @decorator\n\ndef a():\n    pass
 248     """
 249     if line_number < 3 and not previous_logical:
 250         return  # Don't expect blank lines before the first line
 251     if previous_logical.startswith('@'):
 252         if blank_lines:
 253             yield 0, "E304 blank lines found after function decorator"
 254     elif blank_lines > 2 or (indent_level and blank_lines == 2):
 255         yield 0, "E303 too many blank lines (%d)" % blank_lines
 256     elif logical_line.startswith(('def ', 'class ', '@')):
 257         if indent_level:
 258             if not (blank_before or previous_indent_level < indent_level or
 259                     DOCSTRING_REGEX.match(previous_logical)):
 260                 yield 0, "E301 expected 1 blank line, found 0"
 261         elif blank_before != 2:
 262             yield 0, "E302 expected 2 blank lines, found %d" % blank_before
 263
 264
 265 def extraneous_whitespace(logical_line):
 266     r"""Avoid extraneous whitespace.
 267
 268     Avoid extraneous whitespace in these situations:
 269     - Immediately inside parentheses, brackets or braces.
 270     - Immediately before a comma, semicolon, or colon.
 271
 272     Okay: spam(ham[1], {eggs: 2})
 273     E201: spam( ham[1], {eggs: 2})
 274     E201: spam(ham[ 1], {eggs: 2})
 275     E201: spam(ham[1], { eggs: 2})
 276     E202: spam(ham[1], {eggs: 2} )
 277     E202: spam(ham[1 ], {eggs: 2})
 278     E202: spam(ham[1], {eggs: 2 })
 279
 280     E203: if x == 4: print x, y; x, y = y , x
 281     E203: if x == 4: print x, y ; x, y = y, x
 282     E203: if x == 4 : print x, y; x, y = y, x
 283     """
 284     line = logical_line
 285     for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line):
 286         text = match.group()
 287         char = text.strip()
 288         found = match.start()
 289         if text == char + ' ':
 290             # assert char in '([{'
 291             yield found + 1, "E201 whitespace after '%s'" % char
 292         elif line[found - 1] != ',':
 293             code = ('E202' if char in '}])' else 'E203')  # if char in ',;:'
 294             yield found, "%s whitespace before '%s'" % (code, char)
 295
 296
 297 def whitespace_around_keywords(logical_line):
 298     r"""Avoid extraneous whitespace around keywords.
 299
 300     Okay: True and False
 301     E271: True and  False
 302     E272: True  and False
 303     E273: True and\tFalse
 304     E274: True\tand False
 305     """
 306     for match in KEYWORD_REGEX.finditer(logical_line):
 307         before, after = match.groups()
 308
 309         if '\t' in before:
 310             yield match.start(1), "E274 tab before keyword"
 311         elif len(before) > 1:
 312             yield match.start(1), "E272 multiple spaces before keyword"
 313
 314         if '\t' in after:
 315             yield match.start(2), "E273 tab after keyword"
 316         elif len(after) > 1:
 317             yield match.start(2), "E271 multiple spaces after keyword"
 318
 319
 320 def missing_whitespace(logical_line):
 321     r"""Each comma, semicolon or colon should be followed by whitespace.
 322
 323     Okay: [a, b]
 324     Okay: (3,)
 325     Okay: a[1:4]
 326     Okay: a[:4]
 327     Okay: a[1:]
 328     Okay: a[1:4:2]
 329     E231: ['a','b']
 330     E231: foo(bar,baz)
 331     E231: [{'a':'b'}]
 332     """
 333     line = logical_line
 334     for index in range(len(line) - 1):
 335         char = line[index]
 336         if char in ',;:' and line[index + 1] not in WHITESPACE:
 337             before = line[:index]
 338             if char == ':' and before.count('[') > before.count(']') and \
 339                     before.rfind('{') < before.rfind('['):
 340                 continue  # Slice syntax, no space required
 341             if char == ',' and line[index + 1] == ')':
 342                 continue  # Allow tuple with only one element: (3,)
 343             yield index, "E231 missing whitespace after '%s'" % char
 344
 345
 346 def indentation(logical_line, previous_logical, indent_char,
 347                 indent_level, previous_indent_level):
 348     r"""Use 4 spaces per indentation level.
 349
 350     For really old code that you don't want to mess up, you can continue to
 351     use 8-space tabs.
 352
 353     Okay: a = 1
 354     Okay: if a == 0:\n    a = 1
 355     E111:   a = 1
 356     E114:   # a = 1
 357
 358     Okay: for item in items:\n    pass
 359     E112: for item in items:\npass
 360     E115: for item in items:\n# Hi\n    pass
 361
 362     Okay: a = 1\nb = 2
 363     E113: a = 1\n    b = 2
 364     E116: a = 1\n    # b = 2
 365     """
 366     c = 0 if logical_line else 3
 367     tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
 368     if indent_level % 4:
 369         yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
 370     indent_expect = previous_logical.endswith(':')
 371     if indent_expect and indent_level <= previous_indent_level:
 372         yield 0, tmpl % (2 + c, "expected an indented block")
 373     elif not indent_expect and indent_level > previous_indent_level:
 374         yield 0, tmpl % (3 + c, "unexpected indentation")
 375
 376
 377 def continued_indentation(logical_line, tokens, indent_level, hang_closing,
 378                           indent_char, noqa, verbose):
 379     r"""Continuation lines indentation.
 380
 381     Continuation lines should align wrapped elements either vertically
 382     using Python's implicit line joining inside parentheses, brackets
 383     and braces, or using a hanging indent.
 384
 385     When using a hanging indent these considerations should be applied:
 386     - there should be no arguments on the first line, and
 387     - further indentation should be used to clearly distinguish itself as a
 388       continuation line.
 389
 390     Okay: a = (\n)
 391     E123: a = (\n    )
 392
 393     Okay: a = (\n    42)
 394     E121: a = (\n   42)
 395     E122: a = (\n42)
 396     E123: a = (\n    42\n    )
 397     E124: a = (24,\n     42\n)
 398     E125: if (\n    b):\n    pass
 399     E126: a = (\n        42)
 400     E127: a = (24,\n      42)
 401     E128: a = (24,\n    42)
 402     E129: if (a or\n    b):\n    pass
 403     E131: a = (\n    42\n 24)
 404     """
 405     first_row = tokens[0][2][0]
 406     nrows = 1 + tokens[-1][2][0] - first_row
 407     if noqa or nrows == 1:
 408         return
 409
 410     # indent_next tells us whether the next block is indented; assuming
 411     # that it is indented by 4 spaces, then we should not allow 4-space
 412     # indents on the final continuation line; in turn, some other
 413     # indents are allowed to have an extra 4 spaces.
 414     indent_next = logical_line.endswith(':')
 415
 416     row = depth = 0
 417     valid_hangs = (4,) if indent_char != '\t' else (4, 8)
 418     # remember how many brackets were opened on each line
 419     parens = [0] * nrows
 420     # relative indents of physical lines
 421     rel_indent = [0] * nrows
 422     # for each depth, collect a list of opening rows
 423     open_rows = [[0]]
 424     # for each depth, memorize the hanging indentation
 425     hangs = [None]
 426     # visual indents
 427     indent_chances = {}
 428     last_indent = tokens[0][2]
 429     visual_indent = None
 430     # for each depth, memorize the visual indent column
 431     indent = [last_indent[1]]
 432     if verbose >= 3:
 433         print(">>> " + tokens[0][4].rstrip())
 434
 435     for token_type, text, start, end, line in tokens:
 436
 437         newline = row < start[0] - first_row
 438         if newline:
 439             row = start[0] - first_row
 440             newline = not last_token_multiline and token_type not in NEWLINE
 441
 442         if newline:
 443             # this is the beginning of a continuation line.
 444             last_indent = start
 445             if verbose >= 3:
 446                 print("... " + line.rstrip())
 447
 448             # record the initial indent.
 449             rel_indent[row] = expand_indent(line) - indent_level
 450
 451             # identify closing bracket
 452             close_bracket = (token_type == tokenize.OP and text in ']})')
 453
 454             # is the indent relative to an opening bracket line?
 455             for open_row in reversed(open_rows[depth]):
 456                 hang = rel_indent[row] - rel_indent[open_row]
 457                 hanging_indent = hang in valid_hangs
 458                 if hanging_indent:
 459                     break
 460             if hangs[depth]:
 461                 hanging_indent = (hang == hangs[depth])
 462             # is there any chance of visual indent?
 463             visual_indent = (not close_bracket and hang > 0 and
 464                              indent_chances.get(start[1]))
 465
 466             if close_bracket and indent[depth]:
 467                 # closing bracket for visual indent
 468                 if start[1] != indent[depth]:
 469                     yield (start, "E124 closing bracket does not match "
 470                            "visual indentation")
 471             elif close_bracket and not hang:
 472                 # closing bracket matches indentation of opening bracket's line
 473                 if hang_closing:
 474                     yield start, "E133 closing bracket is missing indentation"
 475             elif indent[depth] and start[1] < indent[depth]:
 476                 if visual_indent is not True:
 477                     # visual indent is broken
 478                     yield (start, "E128 continuation line "
 479                            "under-indented for visual indent")
 480             elif hanging_indent or (indent_next and rel_indent[row] == 8):
 481                 # hanging indent is verified
 482                 if close_bracket and not hang_closing:
 483                     yield (start, "E123 closing bracket does not match "
 484                            "indentation of opening bracket's line")
 485                 hangs[depth] = hang
 486             elif visual_indent is True:
 487                 # visual indent is verified
 488                 indent[depth] = start[1]
 489             elif visual_indent in (text, str):
 490                 # ignore token lined up with matching one from a previous line
 491                 pass
 492             else:
 493                 # indent is broken
 494                 if hang <= 0:
 495                     error = "E122", "missing indentation or outdented"
 496                 elif indent[depth]:
 497                     error = "E127", "over-indented for visual indent"
 498                 elif not close_bracket and hangs[depth]:
 499                     error = "E131", "unaligned for hanging indent"
 500                 else:
 501                     hangs[depth] = hang
 502                     if hang > 4:
 503                         error = "E126", "over-indented for hanging indent"
 504                     else:
 505                         error = "E121", "under-indented for hanging indent"
 506                 yield start, "%s continuation line %s" % error
 507
 508         # look for visual indenting
 509         if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
 510                 and not indent[depth]):
 511             indent[depth] = start[1]
 512             indent_chances[start[1]] = True
 513             if verbose >= 4:
 514                 print("bracket depth %s indent to %s" % (depth, start[1]))
 515         # deal with implicit string concatenation
 516         elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
 517               text in ('u', 'ur', 'b', 'br')):
 518             indent_chances[start[1]] = str
 519         # special case for the "if" statement because len("if (") == 4
 520         elif not indent_chances and not row and not depth and text == 'if':
 521             indent_chances[end[1] + 1] = True
 522         elif text == ':' and line[end[1]:].isspace():
 523             open_rows[depth].append(row)
 524
 525         # keep track of bracket depth
 526         if token_type == tokenize.OP:
 527             if text in '([{':
 528                 depth += 1
 529                 indent.append(0)
 530                 hangs.append(None)
 531                 if len(open_rows) == depth:
 532                     open_rows.append([])
 533                 open_rows[depth].append(row)
 534                 parens[row] += 1
 535                 if verbose >= 4:
 536                     print("bracket depth %s seen, col %s, visual min = %s" %
 537                           (depth, start[1], indent[depth]))
 538             elif text in ')]}' and depth > 0:
 539                 # parent indents should not be more than this one
 540                 prev_indent = indent.pop() or last_indent[1]
 541                 hangs.pop()
 542                 for d in range(depth):
 543                     if indent[d] > prev_indent:
 544                         indent[d] = 0
 545                 for ind in list(indent_chances):
 546                     if ind >= prev_indent:
 547                         del indent_chances[ind]
 548                 del open_rows[depth + 1:]
 549                 depth -= 1
 550                 if depth:
 551                     indent_chances[indent[depth]] = True
 552                 for idx in range(row, -1, -1):
 553                     if parens[idx]:
 554                         parens[idx] -= 1
 555                         break
 556             assert len(indent) == depth + 1
 557             if start[1] not in indent_chances:
 558                 # allow to line up tokens
 559                 indent_chances[start[1]] = text
 560
 561         last_token_multiline = (start[0] != end[0])
 562         if last_token_multiline:
 563             rel_indent[end[0] - first_row] = rel_indent[row]
 564
 565     if indent_next and expand_indent(line) == indent_level + 4:
 566         pos = (start[0], indent[0] + 4)
 567         if visual_indent:
 568             code = "E129 visually indented line"
 569         else:
 570             code = "E125 continuation line"
 571         yield pos, "%s with same indent as next logical line" % code
 572
 573
 574 def whitespace_before_parameters(logical_line, tokens):
 575     r"""Avoid extraneous whitespace.
 576
 577     Avoid extraneous whitespace in the following situations:
 578     - before the open parenthesis that starts the argument list of a
 579       function call.
 580     - before the open parenthesis that starts an indexing or slicing.
 581
 582     Okay: spam(1)
 583     E211: spam (1)
 584
 585     Okay: dict['key'] = list[index]
 586     E211: dict ['key'] = list[index]
 587     E211: dict['key'] = list [index]
 588     """
 589     prev_type, prev_text, __, prev_end, __ = tokens[0]
 590     for index in range(1, len(tokens)):
 591         token_type, text, start, end, __ = tokens[index]
 592         if (token_type == tokenize.OP and
 593             text in '([' and
 594             start != prev_end and
 595             (prev_type == tokenize.NAME or prev_text in '}])') and
 596             # Syntax "class A (B):" is allowed, but avoid it
 597             (index < 2 or tokens[index - 2][1] != 'class') and
 598                 # Allow "return (a.foo for a in range(5))"
 599                 not keyword.iskeyword(prev_text)):
 600             yield prev_end, "E211 whitespace before '%s'" % text
 601         prev_type = token_type
 602         prev_text = text
 603         prev_end = end
 604
 605
 606 def whitespace_around_operator(logical_line):
 607     r"""Avoid extraneous whitespace around an operator.
 608
 609     Okay: a = 12 + 3
 610     E221: a = 4  + 5
 611     E222: a = 4 +  5
 612     E223: a = 4\t+ 5
 613     E224: a = 4 +\t5
 614     """
 615     for match in OPERATOR_REGEX.finditer(logical_line):
 616         before, after = match.groups()
 617
 618         if '\t' in before:
 619             yield match.start(1), "E223 tab before operator"
 620         elif len(before) > 1:
 621             yield match.start(1), "E221 multiple spaces before operator"
 622
 623         if '\t' in after:
 624             yield match.start(2), "E224 tab after operator"
 625         elif len(after) > 1:
 626             yield match.start(2), "E222 multiple spaces after operator"
 627
 628
 629 def missing_whitespace_around_operator(logical_line, tokens):
 630     r"""Surround operators with a single space on either side.
 631
 632     - Always surround these binary operators with a single space on
 633       either side: assignment (=), augmented assignment (+=, -= etc.),
 634       comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
 635       Booleans (and, or, not).
 636
 637     - If operators with different priorities are used, consider adding
 638       whitespace around the operators with the lowest priorities.
 639
 640     Okay: i = i + 1
 641     Okay: submitted += 1
 642     Okay: x = x * 2 - 1
 643     Okay: hypot2 = x * x + y * y
 644     Okay: c = (a + b) * (a - b)
 645     Okay: foo(bar, key='word', *args, **kwargs)
 646     Okay: alpha[:-i]
 647
 648     E225: i=i+1
 649     E225: submitted +=1
 650     E225: x = x /2 - 1
 651     E225: z = x **y
 652     E226: c = (a+b) * (a-b)
 653     E226: hypot2 = x*x + y*y
 654     E227: c = a|b
 655     E228: msg = fmt%(errno, errmsg)
 656     """
 657     parens = 0
 658     need_space = False
 659     prev_type = tokenize.OP
 660     prev_text = prev_end = None
 661     for token_type, text, start, end, line in tokens:
 662         if token_type in SKIP_COMMENTS:
 663             continue
 664         if text in ('(', 'lambda'):
 665             parens += 1
 666         elif text == ')':
 667             parens -= 1
 668         if need_space:
 669             if start != prev_end:
 670                 # Found a (probably) needed space
 671                 if need_space is not True and not need_space[1]:
 672                     yield (need_space[0],
 673                            "E225 missing whitespace around operator")
 674                 need_space = False
 675             elif text == '>' and prev_text in ('<', '-'):
 676                 # Tolerate the "<>" operator, even if running Python 3
 677                 # Deal with Python 3's annotated return value "->"
 678                 pass
 679             else:
 680                 if need_space is True or need_space[1]:
 681                     # A needed trailing space was not found
 682                     yield prev_end, "E225 missing whitespace around operator"
 683                 else:
 684                     code, optype = 'E226', 'arithmetic'
 685                     if prev_text == '%':
 686                         code, optype = 'E228', 'modulo'
 687                     elif prev_text not in ARITHMETIC_OP:
 688                         code, optype = 'E227', 'bitwise or shift'
 689                     yield (need_space[0], "%s missing whitespace "
 690                            "around %s operator" % (code, optype))
 691                 need_space = False
 692         elif token_type == tokenize.OP and prev_end is not None:
 693             if text == '=' and parens:
 694                 # Allow keyword args or defaults: foo(bar=None).
 695                 pass
 696             elif text in WS_NEEDED_OPERATORS:
 697                 need_space = True
 698             elif text in UNARY_OPERATORS:
 699                 # Check if the operator is being used as a binary operator
 700                 # Allow unary operators: -123, -x, +1.
 701                 # Allow argument unpacking: foo(*args, **kwargs).
 702                 if (prev_text in '}])' if prev_type == tokenize.OP
 703                         else prev_text not in KEYWORDS):
 704                     need_space = None
 705             elif text in WS_OPTIONAL_OPERATORS:
 706                 need_space = None
 707
 708             if need_space is None:
 709                 # Surrounding space is optional, but ensure that
 710                 # trailing space matches opening space
 711                 need_space = (prev_end, start != prev_end)
 712             elif need_space and start == prev_end:
 713                 # A needed opening space was not found
 714                 yield prev_end, "E225 missing whitespace around operator"
 715                 need_space = False
 716         prev_type = token_type
 717         prev_text = text
 718         prev_end = end
 719
 720
 721 def whitespace_around_comma(logical_line):
 722     r"""Avoid extraneous whitespace after a comma or a colon.
 723
 724     Note: these checks are disabled by default
 725
 726     Okay: a = (1, 2)
 727     E241: a = (1,  2)
 728     E242: a = (1,\t2)
 729     """
 730     line = logical_line
 731     for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line):
 732         found = m.start() + 1
 733         if '\t' in m.group():
 734             yield found, "E242 tab after '%s'" % m.group()[0]
 735         else:
 736             yield found, "E241 multiple spaces after '%s'" % m.group()[0]
 737
 738
 739 def whitespace_around_named_parameter_equals(logical_line, tokens):
 740     r"""Don't use spaces around the '=' sign in function arguments.
 741
 742     Don't use spaces around the '=' sign when used to indicate a
 743     keyword argument or a default parameter value.
 744
 745     Okay: def complex(real, imag=0.0):
 746     Okay: return magic(r=real, i=imag)
 747     Okay: boolean(a == b)
 748     Okay: boolean(a != b)
 749     Okay: boolean(a <= b)
 750     Okay: boolean(a >= b)
 751
 752     E251: def complex(real, imag = 0.0):
 753     E251: return magic(r = real, i = imag)
 754     """
 755     parens = 0
 756     no_space = False
 757     prev_end = None
 758     message = "E251 unexpected spaces around keyword / parameter equals"
 759     for token_type, text, start, end, line in tokens:
 760         if token_type == tokenize.NL:
 761             continue
 762         if no_space:
 763             no_space = False
 764             if start != prev_end:
 765                 yield (prev_end, message)
 766         elif token_type == tokenize.OP:
 767             if text == '(':
 768                 parens += 1
 769             elif text == ')':
 770                 parens -= 1
 771             elif parens and text == '=':
 772                 no_space = True
 773                 if start != prev_end:
 774                     yield (prev_end, message)
 775         prev_end = end
 776
 777
 778 def whitespace_before_comment(logical_line, tokens):
 779     r"""Separate inline comments by at least two spaces.
 780
 781     An inline comment is a comment on the same line as a statement.  Inline
 782     comments should be separated by at least two spaces from the statement.
 783     They should start with a # and a single space.
 784
 785     Each line of a block comment starts with a # and a single space
 786     (unless it is indented text inside the comment).
 787
 788     Okay: x = x + 1  # Increment x
 789     Okay: x = x + 1    # Increment x
 790     Okay: # Block comment
 791     E261: x = x + 1 # Increment x
 792     E262: x = x + 1  #Increment x
 793     E262: x = x + 1  #  Increment x
 794     E265: #Block comment
 795     E266: ### Block comment
 796     """
 797     prev_end = (0, 0)
 798     for token_type, text, start, end, line in tokens:
 799         if token_type == tokenize.COMMENT:
 800             inline_comment = line[:start[1]].strip()
 801             if inline_comment:
 802                 if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
 803                     yield (prev_end,
 804                            "E261 at least two spaces before inline comment")
 805             symbol, sp, comment = text.partition(' ')
 806             bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
 807             if inline_comment:
 808                 if bad_prefix or comment[:1] in WHITESPACE:
 809                     yield start, "E262 inline comment should start with '# '"
 810             elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
 811                 if bad_prefix != '#':
 812                     yield start, "E265 block comment should start with '# '"
 813                 elif comment:
 814                     yield start, "E266 too many leading '#' for block comment"
 815         elif token_type != tokenize.NL:
 816             prev_end = end
 817
 818
 819 def imports_on_separate_lines(logical_line):
 820     r"""Imports should usually be on separate lines.
 821
 822     Okay: import os\nimport sys
 823     E401: import sys, os
 824
 825     Okay: from subprocess import Popen, PIPE
 826     Okay: from myclas import MyClass
 827     Okay: from foo.bar.yourclass import YourClass
 828     Okay: import myclass
 829     Okay: import foo.bar.yourclass
 830     """
 831     line = logical_line
 832     if line.startswith('import '):
 833         found = line.find(',')
 834         if -1 < found and ';' not in line[:found]:
 835             yield found, "E401 multiple imports on one line"
 836
 837
 838 def compound_statements(logical_line):
 839     r"""Compound statements (on the same line) are generally discouraged.
 840
 841     While sometimes it's okay to put an if/for/while with a small body
 842     on the same line, never do this for multi-clause statements.
 843     Also avoid folding such long lines!
 844
 845     Always use a def statement instead of an assignment statement that
 846     binds a lambda expression directly to a name.
 847
 848     Okay: if foo == 'blah':\n    do_blah_thing()
 849     Okay: do_one()
 850     Okay: do_two()
 851     Okay: do_three()
 852
 853     E701: if foo == 'blah': do_blah_thing()
 854     E701: for x in lst: total += x
 855     E701: while t < 10: t = delay()
 856     E701: if foo == 'blah': do_blah_thing()
 857     E701: else: do_non_blah_thing()
 858     E701: try: something()
 859     E701: finally: cleanup()
 860     E701: if foo == 'blah': one(); two(); three()
 861     E702: do_one(); do_two(); do_three()
 862     E703: do_four();  # useless semicolon
 863     E704: def f(x): return 2*x
 864     E731: f = lambda x: 2*x
 865     """
 866     line = logical_line
 867     last_char = len(line) - 1
 868     found = line.find(':')
 869     while -1 < found < last_char:
 870         before = line[:found]
 871         if ((before.count('{') <= before.count('}') and   # {'a': 1} (dict)
 872              before.count('[') <= before.count(']') and   # [1:2] (slice)
 873              before.count('(') <= before.count(')'))):    # (annotation)
 874             if LAMBDA_REGEX.search(before):
 875                 yield 0, "E731 do not assign a lambda expression, use a def"
 876                 break
 877             if before.startswith('def '):
 878                 yield 0, "E704 multiple statements on one line (def)"
 879             else:
 880                 yield found, "E701 multiple statements on one line (colon)"
 881         found = line.find(':', found + 1)
 882     found = line.find(';')
 883     while -1 < found:
 884         if found < last_char:
 885             yield found, "E702 multiple statements on one line (semicolon)"
 886         else:
 887             yield found, "E703 statement ends with a semicolon"
 888         found = line.find(';', found + 1)
 889
 890
 891 def explicit_line_join(logical_line, tokens):
 892     r"""Avoid explicit line join between brackets.
 893
 894     The preferred way of wrapping long lines is by using Python's implied line
 895     continuation inside parentheses, brackets and braces.  Long lines can be
 896     broken over multiple lines by wrapping expressions in parentheses.  These
 897     should be used in preference to using a backslash for line continuation.
 898
 899     E502: aaa = [123, \\n       123]
 900     E502: aaa = ("bbb " \\n       "ccc")
 901
 902     Okay: aaa = [123,\n       123]
 903     Okay: aaa = ("bbb "\n       "ccc")
 904     Okay: aaa = "bbb " \\n    "ccc"
 905     """
 906     prev_start = prev_end = parens = 0
 907     for token_type, text, start, end, line in tokens:
 908         if start[0] != prev_start and parens and backslash:
 909             yield backslash, "E502 the backslash is redundant between brackets"
 910         if end[0] != prev_end:
 911             if line.rstrip('\r\n').endswith('\\'):
 912                 backslash = (end[0], len(line.splitlines()[-1]) - 1)
 913             else:
 914                 backslash = None
 915             prev_start = prev_end = end[0]
 916         else:
 917             prev_start = start[0]
 918         if token_type == tokenize.OP:
 919             if text in '([{':
 920                 parens += 1
 921             elif text in ')]}':
 922                 parens -= 1
 923
 924
 925 def comparison_to_singleton(logical_line, noqa):
 926     r"""Comparison to singletons should use "is" or "is not".
 927
 928     Comparisons to singletons like None should always be done
 929     with "is" or "is not", never the equality operators.
 930
 931     Okay: if arg is not None:
 932     E711: if arg != None:
 933     E712: if arg == True:
 934
 935     Also, beware of writing if x when you really mean if x is not None --
 936     e.g. when testing whether a variable or argument that defaults to None was
 937     set to some other value.  The other value might have a type (such as a
 938     container) that could be false in a boolean context!
 939     """
 940     match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
 941     if match:
 942         same = (match.group(1) == '==')
 943         singleton = match.group(2)
 944         msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
 945         if singleton in ('None',):
 946             code = 'E711'
 947         else:
 948             code = 'E712'
 949             nonzero = ((singleton == 'True' and same) or
 950                        (singleton == 'False' and not same))
 951             msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
 952         yield match.start(1), ("%s comparison to %s should be %s" %
 953                                (code, singleton, msg))
 954
 955
 956 def comparison_negative(logical_line):
 957     r"""Negative comparison should be done using "not in" and "is not".
 958
 959     Okay: if x not in y:\n    pass
 960     Okay: assert (X in Y or X is Z)
 961     Okay: if not (X in Y):\n    pass
 962     Okay: zz = x is not y
 963     E713: Z = not X in Y
 964     E713: if not X.B in Y:\n    pass
 965     E714: if not X is Y:\n    pass
 966     E714: Z = not X.B is Y
 967     """
 968     match = COMPARE_NEGATIVE_REGEX.search(logical_line)
 969     if match:
 970         pos = match.start(1)
 971         if match.group(2) == 'in':
 972             yield pos, "E713 test for membership should be 'not in'"
 973         else:
 974             yield pos, "E714 test for object identity should be 'is not'"
 975
 976
 977 def comparison_type(logical_line):
 978     r"""Object type comparisons should always use isinstance().
 979
 980     Do not compare types directly.
 981
 982     Okay: if isinstance(obj, int):
 983     E721: if type(obj) is type(1):
 984
 985     When checking if an object is a string, keep in mind that it might be a
 986     unicode string too! In Python 2.3, str and unicode have a common base
 987     class, basestring, so you can do:
 988
 989     Okay: if isinstance(obj, basestring):
 990     Okay: if type(a1) is type(b1):
 991     """
 992     match = COMPARE_TYPE_REGEX.search(logical_line)
 993     if match:
 994         inst = match.group(1)
 995         if inst and isidentifier(inst) and inst not in SINGLETONS:
 996             return  # Allow comparison for types which are not obvious
 997         yield match.start(), "E721 do not compare types, use 'isinstance()'"
 998
 999
1000 def python_3000_has_key(logical_line, noqa):
1001     r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
1002
1003     Okay: if "alph" in d:\n    print d["alph"]
1004     W601: assert d.has_key('alph')
1005     """
1006     pos = logical_line.find('.has_key(')
1007     if pos > -1 and not noqa:
1008         yield pos, "W601 .has_key() is deprecated, use 'in'"
1009
1010
1011 def python_3000_raise_comma(logical_line):
1012     r"""When raising an exception, use "raise ValueError('message')".
1013
1014     The older form is removed in Python 3.
1015
1016     Okay: raise DummyError("Message")
1017     W602: raise DummyError, "Message"
1018     """
1019     match = RAISE_COMMA_REGEX.match(logical_line)
1020     if match and not RERAISE_COMMA_REGEX.match(logical_line):
1021         yield match.end() - 1, "W602 deprecated form of raising exception"
1022
1023
1024 def python_3000_not_equal(logical_line):
1025     r"""New code should always use != instead of <>.
1026
1027     The older syntax is removed in Python 3.
1028
1029     Okay: if a != 'no':
1030     W603: if a <> 'no':
1031     """
1032     pos = logical_line.find('<>')
1033     if pos > -1:
1034         yield pos, "W603 '<>' is deprecated, use '!='"
1035
1036
1037 def python_3000_backticks(logical_line):
1038     r"""Backticks are removed in Python 3: use repr() instead.
1039
1040     Okay: val = repr(1 + 2)
1041     W604: val = `1 + 2`
1042     """
1043     pos = logical_line.find('`')
1044     if pos > -1:
1045         yield pos, "W604 backticks are deprecated, use 'repr()'"
1046
1047
1048 ##############################################################################
1049 # Helper functions
1050 ##############################################################################
1051
1052
1053 if '' == ''.encode():
1054     # Python 2: implicit encoding.
1055     def readlines(filename):
1056         """Read the source code."""
1057         with open(filename, 'rU') as f:
1058             return f.readlines()
1059     isidentifier = re.compile(r'[a-zA-Z_]\w*').match
1060     stdin_get_value = sys.stdin.read
1061 else:
1062     # Python 3
1063     def readlines(filename):
1064         """Read the source code."""
1065         try:
1066             with open(filename, 'rb') as f:
1067                 (coding, lines) = tokenize.detect_encoding(f.readline)
1068                 f = TextIOWrapper(f, coding, line_buffering=True)
1069                 return [l.decode(coding) for l in lines] + f.readlines()
1070         except (LookupError, SyntaxError, UnicodeError):
1071             # Fall back if file encoding is improperly declared
1072             with open(filename, encoding='latin-1') as f:
1073                 return f.readlines()
1074     isidentifier = str.isidentifier
1075
1076     def stdin_get_value():
1077         return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
1078 noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
1079
1080
1081 def expand_indent(line):
1082     r"""Return the amount of indentation.
1083
1084     Tabs are expanded to the next multiple of 8.
1085
1086     >>> expand_indent('    ')
1087     4
1088     >>> expand_indent('\t')
1089     8
1090     >>> expand_indent('       \t')
1091     8
1092     >>> expand_indent('        \t')
1093     16
1094     """
1095     if '\t' not in line:
1096         return len(line) - len(line.lstrip())
1097     result = 0
1098     for char in line:
1099         if char == '\t':
1100             result = result // 8 * 8 + 8
1101         elif char == ' ':
1102             result += 1
1103         else:
1104             break
1105     return result
1106
1107
1108 def mute_string(text):
1109     """Replace contents with 'xxx' to prevent syntax matching.
1110
1111     >>> mute_string('"abc"')
1112     '"xxx"'
1113     >>> mute_string("'''abc'''")
1114     "'''xxx'''"
1115     >>> mute_string("r'abc'")
1116     "r'xxx'"
1117     """
1118     # String modifiers (e.g. u or r)
1119     start = text.index(text[-1]) + 1
1120     end = len(text) - 1
1121     # Triple quotes
1122     if text[-3:] in ('"""', "'''"):
1123         start += 2
1124         end -= 2
1125     return text[:start] + 'x' * (end - start) + text[end:]
1126
1127
1128 def parse_udiff(diff, patterns=None, parent='.'):
1129     """Return a dictionary of matching lines."""
1130     # For each file of the diff, the entry key is the filename,
1131     # and the value is a set of row numbers to consider.
1132     rv = {}
1133     path = nrows = None
1134     for line in diff.splitlines():
1135         if nrows:
1136             if line[:1] != '-':
1137                 nrows -= 1
1138             continue
1139         if line[:3] == '@@ ':
1140             hunk_match = HUNK_REGEX.match(line)
1141             (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
1142             rv[path].update(range(row, row + nrows))
1143         elif line[:3] == '+++':
1144             path = line[4:].split('\t', 1)[0]
1145             if path[:2] == 'b/':
1146                 path = path[2:]
1147             rv[path] = set()
1148     return dict([(os.path.join(parent, path), rows)
1149                  for (path, rows) in rv.items()
1150                  if rows and filename_match(path, patterns)])
1151
1152
1153 def normalize_paths(value, parent=os.curdir):
1154     """Parse a comma-separated list of paths.
1155
1156     Return a list of absolute paths.
1157     """
1158     if not value or isinstance(value, list):
1159         return value
1160     paths = []
1161     for path in value.split(','):
1162         if '/' in path:
1163             path = os.path.abspath(os.path.join(parent, path))
1164         paths.append(path.rstrip('/'))
1165     return paths
1166
1167
1168 def filename_match(filename, patterns, default=True):
1169     """Check if patterns contains a pattern that matches filename.
1170
1171     If patterns is unspecified, this always returns True.
1172     """
1173     if not patterns:
1174         return default
1175     return any(fnmatch(filename, pattern) for pattern in patterns)
1176
1177
1178 if COMMENT_WITH_NL:
1179     def _is_eol_token(token):
1180         return (token[0] in NEWLINE or
1181                 (token[0] == tokenize.COMMENT and token[1] == token[4]))
1182 else:
1183     def _is_eol_token(token):
1184         return token[0] in NEWLINE
1185
1186
1187 ##############################################################################
1188 # Framework to run all checks
1189 ##############################################################################
1190
1191
1192 _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
1193
1194
1195 def register_check(check, codes=None):
1196     """Register a new check object."""
1197     def _add_check(check, kind, codes, args):
1198         if check in _checks[kind]:
1199             _checks[kind][check][0].extend(codes or [])
1200         else:
1201             _checks[kind][check] = (codes or [''], args)
1202     if inspect.isfunction(check):
1203         args = inspect.getargspec(check)[0]
1204         if args and args[0] in ('physical_line', 'logical_line'):
1205             if codes is None:
1206                 codes = ERRORCODE_REGEX.findall(check.__doc__ or '')
1207             _add_check(check, args[0], codes, args)
1208     elif inspect.isclass(check):
1209         if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']:
1210             _add_check(check, 'tree', codes, None)
1211
1212
1213 def init_checks_registry():
1214     """Register all globally visible functions.
1215
1216     The first argument name is either 'physical_line' or 'logical_line'.
1217     """
1218     mod = inspect.getmodule(register_check)
1219     for (name, function) in inspect.getmembers(mod, inspect.isfunction):
1220         register_check(function)
1221 init_checks_registry()
1222
1223
1224 class Checker(object):
1225     """Load a Python source file, tokenize it, check coding style."""
1226
1227     def __init__(self, filename=None, lines=None,
1228                  options=None, report=None, **kwargs):
1229         if options is None:
1230             options = StyleGuide(kwargs).options
1231         else:
1232             assert not kwargs
1233         self._io_error = None
1234         self._physical_checks = options.physical_checks
1235         self._logical_checks = options.logical_checks
1236         self._ast_checks = options.ast_checks
1237         self.max_line_length = options.max_line_length
1238         self.multiline = False  # in a multiline string?
1239         self.hang_closing = options.hang_closing
1240         self.verbose = options.verbose
1241         self.filename = filename
1242         if filename is None:
1243             self.filename = 'stdin'
1244             self.lines = lines or []
1245         elif filename == '-':
1246             self.filename = 'stdin'
1247             self.lines = stdin_get_value().splitlines(True)
1248         elif lines is None:
1249             try:
1250                 self.lines = readlines(filename)
1251             except IOError:
1252                 (exc_type, exc) = sys.exc_info()[:2]
1253                 self._io_error = '%s: %s' % (exc_type.__name__, exc)
1254                 self.lines = []
1255         else:
1256             self.lines = lines
1257         if self.lines:
1258             ord0 = ord(self.lines[0][0])
1259             if ord0 in (0xef, 0xfeff):  # Strip the UTF-8 BOM
1260                 if ord0 == 0xfeff:
1261                     self.lines[0] = self.lines[0][1:]
1262                 elif self.lines[0][:3] == '\xef\xbb\xbf':
1263                     self.lines[0] = self.lines[0][3:]
1264         self.report = report or options.report
1265         self.report_error = self.report.error
1266
1267     def report_invalid_syntax(self):
1268         """Check if the syntax is valid."""
1269         (exc_type, exc) = sys.exc_info()[:2]
1270         if len(exc.args) > 1:
1271             offset = exc.args[1]
1272             if len(offset) > 2:
1273                 offset = offset[1:3]
1274         else:
1275             offset = (1, 0)
1276         self.report_error(offset[0], offset[1] or 0,
1277                           'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
1278                           self.report_invalid_syntax)
1279
1280     def readline(self):
1281         """Get the next line from the input buffer."""
1282         if self.line_number >= self.total_lines:
1283             return ''
1284         line = self.lines[self.line_number]
1285         self.line_number += 1
1286         if self.indent_char is None and line[:1] in WHITESPACE:
1287             self.indent_char = line[0]
1288         return line
1289
1290     def run_check(self, check, argument_names):
1291         """Run a check plugin."""
1292         arguments = []
1293         for name in argument_names:
1294             arguments.append(getattr(self, name))
1295         return check(*arguments)
1296
1297     def check_physical(self, line):
1298         """Run all physical checks on a raw input line."""
1299         self.physical_line = line
1300         for name, check, argument_names in self._physical_checks:
1301             result = self.run_check(check, argument_names)
1302             if result is not None:
1303                 (offset, text) = result
1304                 self.report_error(self.line_number, offset, text, check)
1305                 if text[:4] == 'E101':
1306                     self.indent_char = line[0]
1307
1308     def build_tokens_line(self):
1309         """Build a logical line from tokens."""
1310         logical = []
1311         comments = []
1312         length = 0
1313         prev_row = prev_col = mapping = None
1314         for token_type, text, start, end, line in self.tokens:
1315             if token_type in SKIP_TOKENS:
1316                 continue
1317             if not mapping:
1318                 mapping = [(0, start)]
1319             if token_type == tokenize.COMMENT:
1320                 comments.append(text)
1321                 continue
1322             if token_type == tokenize.STRING:
1323                 text = mute_string(text)
1324             if prev_row:
1325                 (start_row, start_col) = start
1326                 if prev_row != start_row:    # different row
1327                     prev_text = self.lines[prev_row - 1][prev_col - 1]
1328                     if prev_text == ',' or (prev_text not in '{[('
1329                                             and text not in '}])'):
1330                         text = ' ' + text
1331                 elif prev_col != start_col:  # different column
1332                     text = line[prev_col:start_col] + text
1333             logical.append(text)
1334             length += len(text)
1335             mapping.append((length, end))
1336             (prev_row, prev_col) = end
1337         self.logical_line = ''.join(logical)
1338         self.noqa = comments and noqa(''.join(comments))
1339         return mapping
1340
1341     def check_logical(self):
1342         """Build a line from tokens and run all logical checks on it."""
1343         self.report.increment_logical_line()
1344         mapping = self.build_tokens_line()
1345         (start_row, start_col) = mapping[0][1]
1346         start_line = self.lines[start_row - 1]
1347         self.indent_level = expand_indent(start_line[:start_col])
1348         if self.blank_before < self.blank_lines:
1349             self.blank_before = self.blank_lines
1350         if self.verbose >= 2:
1351             print(self.logical_line[:80].rstrip())
1352         for name, check, argument_names in self._logical_checks:
1353             if self.verbose >= 4:
1354                 print('   ' + name)
1355             for offset, text in self.run_check(check, argument_names) or ():
1356                 if not isinstance(offset, tuple):
1357                     for token_offset, pos in mapping:
1358                         if offset <= token_offset:
1359                             break
1360                     offset = (pos[0], pos[1] + offset - token_offset)
1361                 self.report_error(offset[0], offset[1], text, check)
1362         if self.logical_line:
1363             self.previous_indent_level = self.indent_level
1364             self.previous_logical = self.logical_line
1365         self.blank_lines = 0
1366         self.tokens = []
1367
1368     def check_ast(self):
1369         """Build the file's AST and run all AST checks."""
1370         try:
1371             tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
1372         except (SyntaxError, TypeError):
1373             return self.report_invalid_syntax()
1374         for name, cls, __ in self._ast_checks:
1375             checker = cls(tree, self.filename)
1376             for lineno, offset, text, check in checker.run():
1377                 if not self.lines or not noqa(self.lines[lineno - 1]):
1378                     self.report_error(lineno, offset, text, check)
1379
1380     def generate_tokens(self):
1381         """Tokenize the file, run physical line checks and yield tokens."""
1382         if self._io_error:
1383             self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
1384         tokengen = tokenize.generate_tokens(self.readline)
1385         try:
1386             for token in tokengen:
1387                 if token[2][0] > self.total_lines:
1388                     return
1389                 self.maybe_check_physical(token)
1390                 yield token
1391         except (SyntaxError, tokenize.TokenError):
1392             self.report_invalid_syntax()
1393
1394     def maybe_check_physical(self, token):
1395         """If appropriate (based on token), check current physical line(s)."""
1396         # Called after every token, but act only on end of line.
1397         if _is_eol_token(token):
1398             # Obviously, a newline token ends a single physical line.
1399             self.check_physical(token[4])
1400         elif token[0] == tokenize.STRING and '\n' in token[1]:
1401             # Less obviously, a string that contains newlines is a
1402             # multiline string, either triple-quoted or with internal
1403             # newlines backslash-escaped. Check every physical line in the
1404             # string *except* for the last one: its newline is outside of
1405             # the multiline string, so we consider it a regular physical
1406             # line, and will check it like any other physical line.
1407             #
1408             # Subtleties:
1409             # - we don't *completely* ignore the last line; if it contains
1410             #   the magical "# noqa" comment, we disable all physical
1411             #   checks for the entire multiline string
1412             # - have to wind self.line_number back because initially it
1413             #   points to the last line of the string, and we want
1414             #   check_physical() to give accurate feedback
1415             if noqa(token[4]):
1416                 return
1417             self.multiline = True
1418             self.line_number = token[2][0]
1419             for line in token[1].split('\n')[:-1]:
1420                 self.check_physical(line + '\n')
1421                 self.line_number += 1
1422             self.multiline = False
1423
1424     def check_all(self, expected=None, line_offset=0):
1425         """Run all checks on the input file."""
1426         self.report.init_file(self.filename, self.lines, expected, line_offset)
1427         self.total_lines = len(self.lines)
1428         if self._ast_checks:
1429             self.check_ast()
1430         self.line_number = 0
1431         self.indent_char = None
1432         self.indent_level = self.previous_indent_level = 0
1433         self.previous_logical = ''
1434         self.tokens = []
1435         self.blank_lines = self.blank_before = 0
1436         parens = 0
1437         for token in self.generate_tokens():
1438             self.tokens.append(token)
1439             token_type, text = token[0:2]
1440             if self.verbose >= 3:
1441                 if token[2][0] == token[3][0]:
1442                     pos = '[%s:%s]' % (token[2][1] or '', token[3][1])
1443                 else:
1444                     pos = 'l.%s' % token[3][0]
1445                 print('l.%s\t%s\t%s\t%r' %
1446                       (token[2][0], pos, tokenize.tok_name[token[0]], text))
1447             if token_type == tokenize.OP:
1448                 if text in '([{':
1449                     parens += 1
1450                 elif text in '}])':
1451                     parens -= 1
1452             elif not parens:
1453                 if token_type in NEWLINE:
1454                     if token_type == tokenize.NEWLINE:
1455                         self.check_logical()
1456                         self.blank_before = 0
1457                     elif len(self.tokens) == 1:
1458                         # The physical line contains only this token.
1459                         self.blank_lines += 1
1460                         del self.tokens[0]
1461                     else:
1462                         self.check_logical()
1463                 elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
1464                     if len(self.tokens) == 1:
1465                         # The comment also ends a physical line
1466                         token = list(token)
1467                         token[1] = text.rstrip('\r\n')
1468                         token[3] = (token[2][0], token[2][1] + len(token[1]))
1469                         self.tokens = [tuple(token)]
1470                         self.check_logical()
1471         if self.tokens:
1472             self.check_physical(self.lines[-1])
1473             self.check_logical()
1474         return self.report.get_file_results()
1475
1476
1477 class BaseReport(object):
1478     """Collect the results of the checks."""
1479
1480     print_filename = False
1481
1482     def __init__(self, options):
1483         self._benchmark_keys = options.benchmark_keys
1484         self._ignore_code = options.ignore_code
1485         # Results
1486         self.elapsed = 0
1487         self.total_errors = 0
1488         self.counters = dict.fromkeys(self._benchmark_keys, 0)
1489         self.messages = {}
1490
1491     def start(self):
1492         """Start the timer."""
1493         self._start_time = time.time()
1494
1495     def stop(self):
1496         """Stop the timer."""
1497         self.elapsed = time.time() - self._start_time
1498
1499     def init_file(self, filename, lines, expected, line_offset):
1500         """Signal a new file."""
1501         self.filename = filename
1502         self.lines = lines
1503         self.expected = expected or ()
1504         self.line_offset = line_offset
1505         self.file_errors = 0
1506         self.counters['files'] += 1
1507         self.counters['physical lines'] += len(lines)
1508
1509     def increment_logical_line(self):
1510         """Signal a new logical line."""
1511         self.counters['logical lines'] += 1
1512
1513     def error(self, line_number, offset, text, check):
1514         """Report an error, according to options."""
1515         code = text[:4]
1516         if self._ignore_code(code):
1517             return
1518         if code in self.counters:
1519             self.counters[code] += 1
1520         else:
1521             self.counters[code] = 1
1522             self.messages[code] = text[5:]
1523         # Don't care about expected errors or warnings
1524         if code in self.expected:
1525             return
1526         if self.print_filename and not self.file_errors:
1527             print(self.filename)
1528         self.file_errors += 1
1529         self.total_errors += 1
1530         return code
1531
1532     def get_file_results(self):
1533         """Return the count of errors and warnings for this file."""
1534         return self.file_errors
1535
1536     def get_count(self, prefix=''):
1537         """Return the total count of errors and warnings."""
1538         return sum([self.counters[key]
1539                     for key in self.messages if key.startswith(prefix)])
1540
1541     def get_statistics(self, prefix=''):
1542         """Get statistics for message codes that start with the prefix.
1543
1544         prefix='' matches all errors and warnings
1545         prefix='E' matches all errors
1546         prefix='W' matches all warnings
1547         prefix='E4' matches all errors that have to do with imports
1548         """
1549         return ['%-7s %s %s' % (self.counters[key], key, self.messages[key])
1550                 for key in sorted(self.messages) if key.startswith(prefix)]
1551
1552     def print_statistics(self, prefix=''):
1553         """Print overall statistics (number of errors and warnings)."""
1554         for line in self.get_statistics(prefix):
1555             print(line)
1556
1557     def print_benchmark(self):
1558         """Print benchmark numbers."""
1559         print('%-7.2f %s' % (self.elapsed, 'seconds elapsed'))
1560         if self.elapsed:
1561             for key in self._benchmark_keys:
1562                 print('%-7d %s per second (%d total)' %
1563                       (self.counters[key] / self.elapsed, key,
1564                        self.counters[key]))
1565
1566
1567 class FileReport(BaseReport):
1568     """Collect the results of the checks and print only the filenames."""
1569     print_filename = True
1570
1571
1572 class StandardReport(BaseReport):
1573     """Collect and print the results of the checks."""
1574
1575     def __init__(self, options):
1576         super(StandardReport, self).__init__(options)
1577         self._fmt = REPORT_FORMAT.get(options.format.lower(),
1578                                       options.format)
1579         self._repeat = options.repeat
1580         self._show_source = options.show_source
1581         self._show_pep8 = options.show_pep8
1582
1583     def init_file(self, filename, lines, expected, line_offset):
1584         """Signal a new file."""
1585         self._deferred_print = []
1586         return super(StandardReport, self).init_file(
1587             filename, lines, expected, line_offset)
1588
1589     def error(self, line_number, offset, text, check):
1590         """Report an error, according to options."""
1591         code = super(StandardReport, self).error(line_number, offset,
1592                                                  text, check)
1593         if code and (self.counters[code] == 1 or self._repeat):
1594             self._deferred_print.append(
1595                 (line_number, offset, code, text[5:], check.__doc__))
1596         return code
1597
1598     def get_file_results(self):
1599         """Print the result and return the overall count for this file."""
1600         self._deferred_print.sort()
1601         for line_number, offset, code, text, doc in self._deferred_print:
1602             print(self._fmt % {
1603                 'path': self.filename,
1604                 'row': self.line_offset + line_number, 'col': offset + 1,
1605                 'code': code, 'text': text,
1606             })
1607             if self._show_source:
1608                 if line_number > len(self.lines):
1609                     line = ''
1610                 else:
1611                     line = self.lines[line_number - 1]
1612                 print(line.rstrip())
1613                 print(re.sub(r'\S', ' ', line[:offset]) + '^')
1614             if self._show_pep8 and doc:
1615                 print('    ' + doc.strip())
1616         return self.file_errors
1617
1618
1619 class DiffReport(StandardReport):
1620     """Collect and print the results for the changed lines only."""
1621
1622     def __init__(self, options):
1623         super(DiffReport, self).__init__(options)
1624         self._selected = options.selected_lines
1625
1626     def error(self, line_number, offset, text, check):
1627         if line_number not in self._selected[self.filename]:
1628             return
1629         return super(DiffReport, self).error(line_number, offset, text, check)
1630
1631
1632 class StyleGuide(object):
1633     """Initialize a PEP-8 instance with few options."""
1634
1635     def __init__(self, *args, **kwargs):
1636         # build options from the command line
1637         self.checker_class = kwargs.pop('checker_class', Checker)
1638         parse_argv = kwargs.pop('parse_argv', False)
1639         config_file = kwargs.pop('config_file', None)
1640         parser = kwargs.pop('parser', None)
1641         # build options from dict
1642         options_dict = dict(*args, **kwargs)
1643         arglist = None if parse_argv else options_dict.get('paths', None)
1644         options, self.paths = process_options(
1645             arglist, parse_argv, config_file, parser)
1646         if options_dict:
1647             options.__dict__.update(options_dict)
1648             if 'paths' in options_dict:
1649                 self.paths = options_dict['paths']
1650
1651         self.runner = self.input_file
1652         self.options = options
1653
1654         if not options.reporter:
1655             options.reporter = BaseReport if options.quiet else StandardReport
1656
1657         options.select = tuple(options.select or ())
1658         if not (options.select or options.ignore or
1659                 options.testsuite or options.doctest) and DEFAULT_IGNORE:
1660             # The default choice: ignore controversial checks
1661             options.ignore = tuple(DEFAULT_IGNORE.split(','))
1662         else:
1663             # Ignore all checks which are not explicitly selected
1664             options.ignore = ('',) if options.select else tuple(options.ignore)
1665         options.benchmark_keys = BENCHMARK_KEYS[:]
1666         options.ignore_code = self.ignore_code
1667         options.physical_checks = self.get_checks('physical_line')
1668         options.logical_checks = self.get_checks('logical_line')
1669         options.ast_checks = self.get_checks('tree')
1670         self.init_report()
1671
1672     def init_report(self, reporter=None):
1673         """Initialize the report instance."""
1674         self.options.report = (reporter or self.options.reporter)(self.options)
1675         return self.options.report
1676
1677     def check_files(self, paths=None):
1678         """Run all checks on the paths."""
1679         if paths is None:
1680             paths = self.paths
1681         report = self.options.report
1682         runner = self.runner
1683         report.start()
1684         try:
1685             for path in paths:
1686                 if os.path.isdir(path):
1687                     self.input_dir(path)
1688                 elif not self.excluded(path):
1689                     runner(path)
1690         except KeyboardInterrupt:
1691             print('... stopped')
1692         report.stop()
1693         return report
1694
1695     def input_file(self, filename, lines=None, expected=None, line_offset=0):
1696         """Run all checks on a Python source file."""
1697         if self.options.verbose:
1698             print('checking %s' % filename)
1699         fchecker = self.checker_class(
1700             filename, lines=lines, options=self.options)
1701         return fchecker.check_all(expected=expected, line_offset=line_offset)
1702
1703     def input_dir(self, dirname):
1704         """Check all files in this directory and all subdirectories."""
1705         dirname = dirname.rstrip('/')
1706         if self.excluded(dirname):
1707             return 0
1708         counters = self.options.report.counters
1709         verbose = self.options.verbose
1710         filepatterns = self.options.filename
1711         runner = self.runner
1712         for root, dirs, files in os.walk(dirname):
1713             if verbose:
1714                 print('directory ' + root)
1715             counters['directories'] += 1
1716             for subdir in sorted(dirs):
1717                 if self.excluded(subdir, root):
1718                     dirs.remove(subdir)
1719             for filename in sorted(files):
1720                 # contain a pattern that matches?
1721                 if ((filename_match(filename, filepatterns) and
1722                      not self.excluded(filename, root))):
1723                     runner(os.path.join(root, filename))
1724
1725     def excluded(self, filename, parent=None):
1726         """Check if the file should be excluded.
1727
1728         Check if 'options.exclude' contains a pattern that matches filename.
1729         """
1730         if not self.options.exclude:
1731             return False
1732         basename = os.path.basename(filename)
1733         if filename_match(basename, self.options.exclude):
1734             return True
1735         if parent:
1736             filename = os.path.join(parent, filename)
1737         filename = os.path.abspath(filename)
1738         return filename_match(filename, self.options.exclude)
1739
1740     def ignore_code(self, code):
1741         """Check if the error code should be ignored.
1742
1743         If 'options.select' contains a prefix of the error code,
1744         return False.  Else, if 'options.ignore' contains a prefix of
1745         the error code, return True.
1746         """
1747         if len(code) < 4 and any(s.startswith(code)
1748                                  for s in self.options.select):
1749             return False
1750         return (code.startswith(self.options.ignore) and
1751                 not code.startswith(self.options.select))
1752
1753     def get_checks(self, argument_name):
1754         """Get all the checks for this category.
1755
1756         Find all globally visible functions where the first argument name
1757         starts with argument_name and which contain selected tests.
1758         """
1759         checks = []
1760         for check, attrs in _checks[argument_name].items():
1761             (codes, args) = attrs
1762             if any(not (code and self.ignore_code(code)) for code in codes):
1763                 checks.append((check.__name__, check, args))
1764         return sorted(checks)
1765
1766
1767 def get_parser(prog='pep8', version=__version__):
1768     parser = OptionParser(prog=prog, version=version,
1769                           usage="%prog [options] input ...")
1770     parser.config_options = [
1771         'exclude', 'filename', 'select', 'ignore', 'max-line-length',
1772         'hang-closing', 'count', 'format', 'quiet', 'show-pep8',
1773         'show-source', 'statistics', 'verbose']
1774     parser.add_option('-v', '--verbose', default=0, action='count',
1775                       help="print status messages, or debug with -vv")
1776     parser.add_option('-q', '--quiet', default=0, action='count',
1777                       help="report only file names, or nothing with -qq")
1778     parser.add_option('-r', '--repeat', default=True, action='store_true',
1779                       help="(obsolete) show all occurrences of the same error")
1780     parser.add_option('--first', action='store_false', dest='repeat',
1781                       help="show first occurrence of each error")
1782     parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE,
1783                       help="exclude files or directories which match these "
1784                            "comma separated patterns (default: %default)")
1785     parser.add_option('--filename', metavar='patterns', default='*.py',
1786                       help="when parsing directories, only check filenames "
1787                            "matching these comma separated patterns "
1788                            "(default: %default)")
1789     parser.add_option('--select', metavar='errors', default='',
1790                       help="select errors and warnings (e.g. E,W6)")
1791     parser.add_option('--ignore', metavar='errors', default='',
1792                       help="skip errors and warnings (e.g. E4,W)")
1793     parser.add_option('--show-source', action='store_true',
1794                       help="show source code for each error")
1795     parser.add_option('--show-pep8', action='store_true',
1796                       help="show text of PEP 8 for each error "
1797                            "(implies --first)")
1798     parser.add_option('--statistics', action='store_true',
1799                       help="count errors and warnings")
1800     parser.add_option('--count', action='store_true',
1801                       help="print total number of errors and warnings "
1802                            "to standard error and set exit code to 1 if "
1803                            "total is not null")
1804     parser.add_option('--max-line-length', type='int', metavar='n',
1805                       default=MAX_LINE_LENGTH,
1806                       help="set maximum allowed line length "
1807                            "(default: %default)")
1808     parser.add_option('--hang-closing', action='store_true',
1809                       help="hang closing bracket instead of matching "
1810                            "indentation of opening bracket's line")
1811     parser.add_option('--format', metavar='format', default='default',
1812                       help="set the error format [default|pylint|<custom>]")
1813     parser.add_option('--diff', action='store_true',
1814                       help="report only lines changed according to the "
1815                            "unified diff received on STDIN")
1816     group = parser.add_option_group("Testing Options")
1817     if os.path.exists(TESTSUITE_PATH):
1818         group.add_option('--testsuite', metavar='dir',
1819                          help="run regression tests from dir")
1820         group.add_option('--doctest', action='store_true',
1821                          help="run doctest on myself")
1822     group.add_option('--benchmark', action='store_true',
1823                      help="measure processing speed")
1824     return parser
1825
1826
1827 def read_config(options, args, arglist, parser):
1828     """Read both user configuration and local configuration."""
1829     config = RawConfigParser()
1830
1831     user_conf = options.config
1832     if user_conf and os.path.isfile(user_conf):
1833         if options.verbose:
1834             print('user configuration: %s' % user_conf)
1835         config.read(user_conf)
1836
1837     local_dir = os.curdir
1838     parent = tail = args and os.path.abspath(os.path.commonprefix(args))
1839     while tail:
1840         if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
1841             local_dir = parent
1842             if options.verbose:
1843                 print('local configuration: in %s' % parent)
1844             break
1845         (parent, tail) = os.path.split(parent)
1846
1847     pep8_section = parser.prog
1848     if config.has_section(pep8_section):
1849         option_list = dict([(o.dest, o.type or o.action)
1850                             for o in parser.option_list])
1851
1852         # First, read the default values
1853         (new_options, __) = parser.parse_args([])
1854
1855         # Second, parse the configuration
1856         for opt in config.options(pep8_section):
1857             if opt.replace('_', '-') not in parser.config_options:
1858                 print("  unknown option '%s' ignored" % opt)
1859                 continue
1860             if options.verbose > 1:
1861                 print("  %s = %s" % (opt, config.get(pep8_section, opt)))
1862             normalized_opt = opt.replace('-', '_')
1863             opt_type = option_list[normalized_opt]
1864             if opt_type in ('int', 'count'):
1865                 value = config.getint(pep8_section, opt)
1866             elif opt_type == 'string':
1867                 value = config.get(pep8_section, opt)
1868                 if normalized_opt == 'exclude':
1869                     value = normalize_paths(value, local_dir)
1870             else:
1871                 assert opt_type in ('store_true', 'store_false')
1872                 value = config.getboolean(pep8_section, opt)
1873             setattr(new_options, normalized_opt, value)
1874
1875         # Third, overwrite with the command-line options
1876         (options, __) = parser.parse_args(arglist, values=new_options)
1877     options.doctest = options.testsuite = False
1878     return options
1879
1880
1881 def process_options(arglist=None, parse_argv=False, config_file=None,
1882                     parser=None):
1883     """Process options passed either via arglist or via command line args."""
1884     if not parser:
1885         parser = get_parser()
1886     if not parser.has_option('--config'):
1887         if config_file is True:
1888             config_file = DEFAULT_CONFIG
1889         group = parser.add_option_group("Configuration", description=(
1890             "The project options are read from the [%s] section of the "
1891             "tox.ini file or the setup.cfg file located in any parent folder "
1892             "of the path(s) being processed.  Allowed options are: %s." %
1893             (parser.prog, ', '.join(parser.config_options))))
1894         group.add_option('--config', metavar='path', default=config_file,
1895                          help="user config file location (default: %default)")
1896     # Don't read the command line if the module is used as a library.
1897     if not arglist and not parse_argv:
1898         arglist = []
1899     # If parse_argv is True and arglist is None, arguments are
1900     # parsed from the command line (sys.argv)
1901     (options, args) = parser.parse_args(arglist)
1902     options.reporter = None
1903
1904     if options.ensure_value('testsuite', False):
1905         args.append(options.testsuite)
1906     elif not options.ensure_value('doctest', False):
1907         if parse_argv and not args:
1908             if options.diff or any(os.path.exists(name)
1909                                    for name in PROJECT_CONFIG):
1910                 args = ['.']
1911             else:
1912                 parser.error('input not specified')
1913         options = read_config(options, args, arglist, parser)
1914         options.reporter = parse_argv and options.quiet == 1 and FileReport
1915
1916     options.filename = options.filename and options.filename.split(',')
1917     options.exclude = normalize_paths(options.exclude)
1918     options.select = options.select and options.select.split(',')
1919     options.ignore = options.ignore and options.ignore.split(',')
1920
1921     if options.diff:
1922         options.reporter = DiffReport
1923         stdin = stdin_get_value()
1924         options.selected_lines = parse_udiff(stdin, options.filename, args[0])
1925         args = sorted(options.selected_lines)
1926
1927     return options, args
1928
1929
1930 def _main():
1931     """Parse options and run checks on Python source."""
1932     import signal
1933
1934     # Handle "Broken pipe" gracefully
1935     try:
1936         signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
1937     except AttributeError:
1938         pass    # not supported on Windows
1939
1940     pep8style = StyleGuide(parse_argv=True, config_file=True)
1941     options = pep8style.options
1942     if options.doctest or options.testsuite:
1943         from testsuite.support import run_tests
1944         report = run_tests(pep8style)
1945     else:
1946         report = pep8style.check_files()
1947     if options.statistics:
1948         report.print_statistics()
1949     if options.benchmark:
1950         report.print_benchmark()
1951     if options.testsuite and not options.quiet:
1952         report.print_results()
1953     if report.total_errors:
1954         if options.count:
1955             sys.stderr.write(str(report.total_errors) + '\n')
1956         sys.exit(1)
1957
1958 if __name__ == '__main__':
1959     _main()