tools/cpplint.py

   1 #!/usr/bin/python
   2 #
   3 # Copyright (c) 2009 Google Inc. All rights reserved.
   4 #
   5 # Redistribution and use in source and binary forms, with or without
   6 # modification, are permitted provided that the following conditions are
   7 # met:
   8 #
   9 #    * Redistributions of source code must retain the above copyright
  10 # notice, this list of conditions and the following disclaimer.
  11 #    * Redistributions in binary form must reproduce the above
  12 # copyright notice, this list of conditions and the following disclaimer
  13 # in the documentation and/or other materials provided with the
  14 # distribution.
  15 #    * Neither the name of Google Inc. nor the names of its
  16 # contributors may be used to endorse or promote products derived from
  17 # this software without specific prior written permission.
  18 #
  19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31 """Does google-lint on c++ files.
  32
  33 The goal of this script is to identify places in the code that *may*
  34 be in non-compliance with google style.  It does not attempt to fix
  35 up these problems -- the point is to educate.  It does also not
  36 attempt to find all problems, or to ensure that everything it does
  37 find is legitimately a problem.
  38
  39 In particular, we can get very confused by /* and // inside strings!
  40 We do a small hack, which is to ignore //'s with "'s after them on the
  41 same line, but it is far from perfect (in either direction).
  42 """
  43
  44 import codecs
  45 import copy
  46 import getopt
  47 import math  # for log
  48 import os
  49 import re
  50 import sre_compile
  51 import string
  52 import sys
  53 import unicodedata
  54
  55
  56 _USAGE = """
  57 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
  58                    [--counting=total|toplevel|detailed] [--root=subdir]
  59                    [--linelength=digits]
  60         <file> [file] ...
  61
  62   The style guidelines this tries to follow are those in
  63     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
  64
  65   Every problem is given a confidence score from 1-5, with 5 meaning we are
  66   certain of the problem, and 1 meaning it could be a legitimate construct.
  67   This will miss some errors, and is not a substitute for a code review.
  68
  69   To suppress false-positive errors of a certain category, add a
  70   'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
  71   suppresses errors of all categories on that line.
  72
  73   The files passed in will be linted; at least one file must be provided.
  74   Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
  75   extensions with the --extensions flag.
  76
  77   Flags:
  78
  79     output=vs7
  80       By default, the output is formatted to ease emacs parsing.  Visual Studio
  81       compatible output (vs7) may also be used.  Other formats are unsupported.
  82
  83     verbose=#
  84       Specify a number 0-5 to restrict errors to certain verbosity levels.
  85
  86     filter=-x,+y,...
  87       Specify a comma-separated list of category-filters to apply: only
  88       error messages whose category names pass the filters will be printed.
  89       (Category names are printed with the message and look like
  90       "[whitespace/indent]".)  Filters are evaluated left to right.
  91       "-FOO" and "FOO" means "do not print categories that start with FOO".
  92       "+FOO" means "do print categories that start with FOO".
  93
  94       Examples: --filter=-whitespace,+whitespace/braces
  95                 --filter=whitespace,runtime/printf,+runtime/printf_format
  96                 --filter=-,+build/include_what_you_use
  97
  98       To see a list of all the categories used in cpplint, pass no arg:
  99          --filter=
 100
 101     counting=total|toplevel|detailed
 102       The total number of errors found is always printed. If
 103       'toplevel' is provided, then the count of errors in each of
 104       the top-level categories like 'build' and 'whitespace' will
 105       also be printed. If 'detailed' is provided, then a count
 106       is provided for each category like 'build/class'.
 107
 108     root=subdir
 109       The root directory used for deriving header guard CPP variable.
 110       By default, the header guard CPP variable is calculated as the relative
 111       path to the directory that contains .git, .hg, or .svn.  When this flag
 112       is specified, the relative path is calculated from the specified
 113       directory. If the specified directory does not exist, this flag is
 114       ignored.
 115
 116       Examples:
 117         Assuing that src/.git exists, the header guard CPP variables for
 118         src/chrome/browser/ui/browser.h are:
 119
 120         No flag => CHROME_BROWSER_UI_BROWSER_H_
 121         --root=chrome => BROWSER_UI_BROWSER_H_
 122         --root=chrome/browser => UI_BROWSER_H_
 123
 124     linelength=digits
 125       This is the allowed line length for the project. The default value is
 126       80 characters.
 127
 128       Examples:
 129         --linelength=120
 130
 131     extensions=extension,extension,...
 132       The allowed file extensions that cpplint will check
 133
 134       Examples:
 135         --extensions=hpp,cpp
 136 """
 137
 138 # We categorize each error message we print.  Here are the categories.
 139 # We want an explicit list so we can list them all in cpplint --filter=.
 140 # If you add a new error message with a new category, add it to the list
 141 # here!  cpplint_unittest.py should tell you if you forget to do this.
 142 _ERROR_CATEGORIES = [
 143   'build/class',
 144   'build/deprecated',
 145   'build/endif_comment',
 146   'build/explicit_make_pair',
 147   'build/forward_decl',
 148   'build/header_guard',
 149   'build/include',
 150   'build/include_alpha',
 151   'build/include_order',
 152   'build/include_what_you_use',
 153   'build/namespaces',
 154   'build/printf_format',
 155   'build/storage_class',
 156   'legal/copyright',
 157   'readability/alt_tokens',
 158   'readability/braces',
 159   'readability/casting',
 160   'readability/check',
 161   'readability/constructors',
 162   'readability/fn_size',
 163   'readability/function',
 164   'readability/multiline_comment',
 165   'readability/multiline_string',
 166   'readability/namespace',
 167   'readability/nolint',
 168   'readability/nul',
 169   'readability/streams',
 170   'readability/todo',
 171   'readability/utf8',
 172   'runtime/arrays',
 173   'runtime/casting',
 174   'runtime/explicit',
 175   'runtime/int',
 176   'runtime/init',
 177   'runtime/invalid_increment',
 178   'runtime/member_string_references',
 179   'runtime/memset',
 180   'runtime/operator',
 181   'runtime/printf',
 182   'runtime/printf_format',
 183   'runtime/references',
 184   'runtime/sizeof',
 185   'runtime/string',
 186   'runtime/threadsafe_fn',
 187   'runtime/vlog',
 188   'whitespace/blank_line',
 189   'whitespace/braces',
 190   'whitespace/comma',
 191   'whitespace/comments',
 192   'whitespace/empty_conditional_body',
 193   'whitespace/empty_loop_body',
 194   'whitespace/end_of_line',
 195   'whitespace/ending_newline',
 196   'whitespace/forcolon',
 197   'whitespace/indent',
 198   'whitespace/line_length',
 199   'whitespace/newline',
 200   'whitespace/operators',
 201   'whitespace/parens',
 202   'whitespace/semicolon',
 203   'whitespace/tab',
 204   'whitespace/todo'
 205   ]
 206
 207 # The default state of the category filter. This is overrided by the --filter=
 208 # flag. By default all errors are on, so only add here categories that should be
 209 # off by default (i.e., categories that must be enabled by the --filter= flags).
 210 # All entries here should start with a '-' or '+', as in the --filter= flag.
 211 _DEFAULT_FILTERS = ['-build/include_alpha']
 212
 213 # We used to check for high-bit characters, but after much discussion we
 214 # decided those were OK, as long as they were in UTF-8 and didn't represent
 215 # hard-coded international strings, which belong in a separate i18n file.
 216
 217
 218 # C++ headers
 219 _CPP_HEADERS = frozenset([
 220     # Legacy
 221     'algobase.h',
 222     'algo.h',
 223     'alloc.h',
 224     'builtinbuf.h',
 225     'bvector.h',
 226     'complex.h',
 227     'defalloc.h',
 228     'deque.h',
 229     'editbuf.h',
 230     'fstream.h',
 231     'function.h',
 232     'hash_map',
 233     'hash_map.h',
 234     'hash_set',
 235     'hash_set.h',
 236     'hashtable.h',
 237     'heap.h',
 238     'indstream.h',
 239     'iomanip.h',
 240     'iostream.h',
 241     'istream.h',
 242     'iterator.h',
 243     'list.h',
 244     'map.h',
 245     'multimap.h',
 246     'multiset.h',
 247     'ostream.h',
 248     'pair.h',
 249     'parsestream.h',
 250     'pfstream.h',
 251     'procbuf.h',
 252     'pthread_alloc',
 253     'pthread_alloc.h',
 254     'rope',
 255     'rope.h',
 256     'ropeimpl.h',
 257     'set.h',
 258     'slist',
 259     'slist.h',
 260     'stack.h',
 261     'stdiostream.h',
 262     'stl_alloc.h',
 263     'stl_relops.h',
 264     'streambuf.h',
 265     'stream.h',
 266     'strfile.h',
 267     'strstream.h',
 268     'tempbuf.h',
 269     'tree.h',
 270     'type_traits.h',
 271     'vector.h',
 272     # 17.6.1.2 C++ library headers
 273     'algorithm',
 274     'array',
 275     'atomic',
 276     'bitset',
 277     'chrono',
 278     'codecvt',
 279     'complex',
 280     'condition_variable',
 281     'deque',
 282     'exception',
 283     'forward_list',
 284     'fstream',
 285     'functional',
 286     'future',
 287     'initializer_list',
 288     'iomanip',
 289     'ios',
 290     'iosfwd',
 291     'iostream',
 292     'istream',
 293     'iterator',
 294     'limits',
 295     'list',
 296     'locale',
 297     'map',
 298     'memory',
 299     'mutex',
 300     'new',
 301     'numeric',
 302     'ostream',
 303     'queue',
 304     'random',
 305     'ratio',
 306     'regex',
 307     'set',
 308     'sstream',
 309     'stack',
 310     'stdexcept',
 311     'streambuf',
 312     'string',
 313     'strstream',
 314     'system_error',
 315     'thread',
 316     'tuple',
 317     'typeindex',
 318     'typeinfo',
 319     'type_traits',
 320     'unordered_map',
 321     'unordered_set',
 322     'utility',
 323     'valarray',
 324     'vector',
 325     # 17.6.1.2 C++ headers for C library facilities
 326     'cassert',
 327     'ccomplex',
 328     'cctype',
 329     'cerrno',
 330     'cfenv',
 331     'cfloat',
 332     'cinttypes',
 333     'ciso646',
 334     'climits',
 335     'clocale',
 336     'cmath',
 337     'csetjmp',
 338     'csignal',
 339     'cstdalign',
 340     'cstdarg',
 341     'cstdbool',
 342     'cstddef',
 343     'cstdint',
 344     'cstdio',
 345     'cstdlib',
 346     'cstring',
 347     'ctgmath',
 348     'ctime',
 349     'cuchar',
 350     'cwchar',
 351     'cwctype',
 352     ])
 353
 354 # Assertion macros.  These are defined in base/logging.h and
 355 # testing/base/gunit.h.  Note that the _M versions need to come first
 356 # for substring matching to work.
 357 _CHECK_MACROS = [
 358     'DCHECK', 'CHECK',
 359     'EXPECT_TRUE_M', 'EXPECT_TRUE',
 360     'ASSERT_TRUE_M', 'ASSERT_TRUE',
 361     'EXPECT_FALSE_M', 'EXPECT_FALSE',
 362     'ASSERT_FALSE_M', 'ASSERT_FALSE',
 363     ]
 364
 365 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
 366 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
 367
 368 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
 369                         ('>=', 'GE'), ('>', 'GT'),
 370                         ('<=', 'LE'), ('<', 'LT')]:
 371   _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
 372   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
 373   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
 374   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
 375   _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
 376   _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
 377
 378 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
 379                             ('>=', 'LT'), ('>', 'LE'),
 380                             ('<=', 'GT'), ('<', 'GE')]:
 381   _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
 382   _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
 383   _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
 384   _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
 385
 386 # Alternative tokens and their replacements.  For full list, see section 2.5
 387 # Alternative tokens [lex.digraph] in the C++ standard.
 388 #
 389 # Digraphs (such as '%:') are not included here since it's a mess to
 390 # match those on a word boundary.
 391 _ALT_TOKEN_REPLACEMENT = {
 392     'and': '&&',
 393     'bitor': '|',
 394     'or': '||',
 395     'xor': '^',
 396     'compl': '~',
 397     'bitand': '&',
 398     'and_eq': '&=',
 399     'or_eq': '|=',
 400     'xor_eq': '^=',
 401     'not': '!',
 402     'not_eq': '!='
 403     }
 404
 405 # Compile regular expression that matches all the above keywords.  The "[ =()]"
 406 # bit is meant to avoid matching these keywords outside of boolean expressions.
 407 #
 408 # False positives include C-style multi-line comments and multi-line strings
 409 # but those have always been troublesome for cpplint.
 410 _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
 411     r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
 412
 413
 414 # These constants define types of headers for use with
 415 # _IncludeState.CheckNextIncludeOrder().
 416 _C_SYS_HEADER = 1
 417 _CPP_SYS_HEADER = 2
 418 _LIKELY_MY_HEADER = 3
 419 _POSSIBLE_MY_HEADER = 4
 420 _OTHER_HEADER = 5
 421
 422 # These constants define the current inline assembly state
 423 _NO_ASM = 0       # Outside of inline assembly block
 424 _INSIDE_ASM = 1   # Inside inline assembly block
 425 _END_ASM = 2      # Last line of inline assembly block
 426 _BLOCK_ASM = 3    # The whole block is an inline assembly block
 427
 428 # Match start of assembly blocks
 429 _MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
 430                         r'(?:\s+(volatile|__volatile__))?'
 431                         r'\s*[{(]')
 432
 433
 434 _regexp_compile_cache = {}
 435
 436 # Finds occurrences of NOLINT or NOLINT(...).
 437 _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
 438
 439 # {str, set(int)}: a map from error categories to sets of linenumbers
 440 # on which those errors are expected and should be suppressed.
 441 _error_suppressions = {}
 442
 443 # The root directory used for deriving header guard CPP variable.
 444 # This is set by --root flag.
 445 _root = None
 446
 447 # The allowed line length of files.
 448 # This is set by --linelength flag.
 449 _line_length = 80
 450
 451 # The allowed extensions for file names
 452 # This is set by --extensions flag.
 453 _valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
 454
 455 def ParseNolintSuppressions(filename, raw_line, linenum, error):
 456   """Updates the global list of error-suppressions.
 457
 458   Parses any NOLINT comments on the current line, updating the global
 459   error_suppressions store.  Reports an error if the NOLINT comment
 460   was malformed.
 461
 462   Args:
 463     filename: str, the name of the input file.
 464     raw_line: str, the line of input text, with comments.
 465     linenum: int, the number of the current line.
 466     error: function, an error handler.
 467   """
 468   # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
 469   matched = _RE_SUPPRESSION.search(raw_line)
 470   if matched:
 471     category = matched.group(1)
 472     if category in (None, '(*)'):  # => "suppress all"
 473       _error_suppressions.setdefault(None, set()).add(linenum)
 474     else:
 475       if category.startswith('(') and category.endswith(')'):
 476         category = category[1:-1]
 477         if category in _ERROR_CATEGORIES:
 478           _error_suppressions.setdefault(category, set()).add(linenum)
 479         else:
 480           error(filename, linenum, 'readability/nolint', 5,
 481                 'Unknown NOLINT error category: %s' % category)
 482
 483
 484 def ResetNolintSuppressions():
 485   "Resets the set of NOLINT suppressions to empty."
 486   _error_suppressions.clear()
 487
 488
 489 def IsErrorSuppressedByNolint(category, linenum):
 490   """Returns true if the specified error category is suppressed on this line.
 491
 492   Consults the global error_suppressions map populated by
 493   ParseNolintSuppressions/ResetNolintSuppressions.
 494
 495   Args:
 496     category: str, the category of the error.
 497     linenum: int, the current line number.
 498   Returns:
 499     bool, True iff the error should be suppressed due to a NOLINT comment.
 500   """
 501   return (linenum in _error_suppressions.get(category, set()) or
 502           linenum in _error_suppressions.get(None, set()))
 503
 504 def Match(pattern, s):
 505   """Matches the string with the pattern, caching the compiled regexp."""
 506   # The regexp compilation caching is inlined in both Match and Search for
 507   # performance reasons; factoring it out into a separate function turns out
 508   # to be noticeably expensive.
 509   if pattern not in _regexp_compile_cache:
 510     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
 511   return _regexp_compile_cache[pattern].match(s)
 512
 513
 514 def ReplaceAll(pattern, rep, s):
 515   """Replaces instances of pattern in a string with a replacement.
 516
 517   The compiled regex is kept in a cache shared by Match and Search.
 518
 519   Args:
 520     pattern: regex pattern
 521     rep: replacement text
 522     s: search string
 523
 524   Returns:
 525     string with replacements made (or original string if no replacements)
 526   """
 527   if pattern not in _regexp_compile_cache:
 528     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
 529   return _regexp_compile_cache[pattern].sub(rep, s)
 530
 531
 532 def Search(pattern, s):
 533   """Searches the string for the pattern, caching the compiled regexp."""
 534   if pattern not in _regexp_compile_cache:
 535     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
 536   return _regexp_compile_cache[pattern].search(s)
 537
 538
 539 class _IncludeState(dict):
 540   """Tracks line numbers for includes, and the order in which includes appear.
 541
 542   As a dict, an _IncludeState object serves as a mapping between include
 543   filename and line number on which that file was included.
 544
 545   Call CheckNextIncludeOrder() once for each header in the file, passing
 546   in the type constants defined above. Calls in an illegal order will
 547   raise an _IncludeError with an appropriate error message.
 548
 549   """
 550   # self._section will move monotonically through this set. If it ever
 551   # needs to move backwards, CheckNextIncludeOrder will raise an error.
 552   _INITIAL_SECTION = 0
 553   _MY_H_SECTION = 1
 554   _C_SECTION = 2
 555   _CPP_SECTION = 3
 556   _OTHER_H_SECTION = 4
 557
 558   _TYPE_NAMES = {
 559       _C_SYS_HEADER: 'C system header',
 560       _CPP_SYS_HEADER: 'C++ system header',
 561       _LIKELY_MY_HEADER: 'header this file implements',
 562       _POSSIBLE_MY_HEADER: 'header this file may implement',
 563       _OTHER_HEADER: 'other header',
 564       }
 565   _SECTION_NAMES = {
 566       _INITIAL_SECTION: "... nothing. (This can't be an error.)",
 567       _MY_H_SECTION: 'a header this file implements',
 568       _C_SECTION: 'C system header',
 569       _CPP_SECTION: 'C++ system header',
 570       _OTHER_H_SECTION: 'other header',
 571       }
 572
 573   def __init__(self):
 574     dict.__init__(self)
 575     self.ResetSection()
 576
 577   def ResetSection(self):
 578     # The name of the current section.
 579     self._section = self._INITIAL_SECTION
 580     # The path of last found header.
 581     self._last_header = ''
 582
 583   def SetLastHeader(self, header_path):
 584     self._last_header = header_path
 585
 586   def CanonicalizeAlphabeticalOrder(self, header_path):
 587     """Returns a path canonicalized for alphabetical comparison.
 588
 589     - replaces "-" with "_" so they both cmp the same.
 590     - removes '-inl' since we don't require them to be after the main header.
 591     - lowercase everything, just in case.
 592
 593     Args:
 594       header_path: Path to be canonicalized.
 595
 596     Returns:
 597       Canonicalized path.
 598     """
 599     return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
 600
 601   def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
 602     """Check if a header is in alphabetical order with the previous header.
 603
 604     Args:
 605       clean_lines: A CleansedLines instance containing the file.
 606       linenum: The number of the line to check.
 607       header_path: Canonicalized header to be checked.
 608
 609     Returns:
 610       Returns true if the header is in alphabetical order.
 611     """
 612     # If previous section is different from current section, _last_header will
 613     # be reset to empty string, so it's always less than current header.
 614     #
 615     # If previous line was a blank line, assume that the headers are
 616     # intentionally sorted the way they are.
 617     if (self._last_header > header_path and
 618         not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
 619       return False
 620     return True
 621
 622   def CheckNextIncludeOrder(self, header_type):
 623     """Returns a non-empty error message if the next header is out of order.
 624
 625     This function also updates the internal state to be ready to check
 626     the next include.
 627
 628     Args:
 629       header_type: One of the _XXX_HEADER constants defined above.
 630
 631     Returns:
 632       The empty string if the header is in the right order, or an
 633       error message describing what's wrong.
 634
 635     """
 636     error_message = ('Found %s after %s' %
 637                      (self._TYPE_NAMES[header_type],
 638                       self._SECTION_NAMES[self._section]))
 639
 640     last_section = self._section
 641
 642     if header_type == _C_SYS_HEADER:
 643       if self._section <= self._C_SECTION:
 644         self._section = self._C_SECTION
 645       else:
 646         self._last_header = ''
 647         return error_message
 648     elif header_type == _CPP_SYS_HEADER:
 649       if self._section <= self._CPP_SECTION:
 650         self._section = self._CPP_SECTION
 651       else:
 652         self._last_header = ''
 653         return error_message
 654     elif header_type == _LIKELY_MY_HEADER:
 655       if self._section <= self._MY_H_SECTION:
 656         self._section = self._MY_H_SECTION
 657       else:
 658         self._section = self._OTHER_H_SECTION
 659     elif header_type == _POSSIBLE_MY_HEADER:
 660       if self._section <= self._MY_H_SECTION:
 661         self._section = self._MY_H_SECTION
 662       else:
 663         # This will always be the fallback because we're not sure
 664         # enough that the header is associated with this file.
 665         self._section = self._OTHER_H_SECTION
 666     else:
 667       assert header_type == _OTHER_HEADER
 668       self._section = self._OTHER_H_SECTION
 669
 670     if last_section != self._section:
 671       self._last_header = ''
 672
 673     return ''
 674
 675
 676 class _CppLintState(object):
 677   """Maintains module-wide state.."""
 678
 679   def __init__(self):
 680     self.verbose_level = 1  # global setting.
 681     self.error_count = 0    # global count of reported errors
 682     # filters to apply when emitting error messages
 683     self.filters = _DEFAULT_FILTERS[:]
 684     self.counting = 'total'  # In what way are we counting errors?
 685     self.errors_by_category = {}  # string to int dict storing error counts
 686
 687     # output format:
 688     # "emacs" - format that emacs can parse (default)
 689     # "vs7" - format that Microsoft Visual Studio 7 can parse
 690     self.output_format = 'emacs'
 691
 692   def SetOutputFormat(self, output_format):
 693     """Sets the output format for errors."""
 694     self.output_format = output_format
 695
 696   def SetVerboseLevel(self, level):
 697     """Sets the module's verbosity, and returns the previous setting."""
 698     last_verbose_level = self.verbose_level
 699     self.verbose_level = level
 700     return last_verbose_level
 701
 702   def SetCountingStyle(self, counting_style):
 703     """Sets the module's counting options."""
 704     self.counting = counting_style
 705
 706   def SetFilters(self, filters):
 707     """Sets the error-message filters.
 708
 709     These filters are applied when deciding whether to emit a given
 710     error message.
 711
 712     Args:
 713       filters: A string of comma-separated filters (eg "+whitespace/indent").
 714                Each filter should start with + or -; else we die.
 715
 716     Raises:
 717       ValueError: The comma-separated filters did not all start with '+' or '-'.
 718                   E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
 719     """
 720     # Default filters always have less priority than the flag ones.
 721     self.filters = _DEFAULT_FILTERS[:]
 722     for filt in filters.split(','):
 723       clean_filt = filt.strip()
 724       if clean_filt:
 725         self.filters.append(clean_filt)
 726     for filt in self.filters:
 727       if not (filt.startswith('+') or filt.startswith('-')):
 728         raise ValueError('Every filter in --filters must start with + or -'
 729                          ' (%s does not)' % filt)
 730
 731   def ResetErrorCounts(self):
 732     """Sets the module's error statistic back to zero."""
 733     self.error_count = 0
 734     self.errors_by_category = {}
 735
 736   def IncrementErrorCount(self, category):
 737     """Bumps the module's error statistic."""
 738     self.error_count += 1
 739     if self.counting in ('toplevel', 'detailed'):
 740       if self.counting != 'detailed':
 741         category = category.split('/')[0]
 742       if category not in self.errors_by_category:
 743         self.errors_by_category[category] = 0
 744       self.errors_by_category[category] += 1
 745
 746   def PrintErrorCounts(self):
 747     """Print a summary of errors by category, and the total."""
 748     for category, count in self.errors_by_category.iteritems():
 749       sys.stderr.write('Category \'%s\' errors found: %d\n' %
 750                        (category, count))
 751     sys.stderr.write('Total errors found: %d\n' % self.error_count)
 752
 753 _cpplint_state = _CppLintState()
 754
 755
 756 def _OutputFormat():
 757   """Gets the module's output format."""
 758   return _cpplint_state.output_format
 759
 760
 761 def _SetOutputFormat(output_format):
 762   """Sets the module's output format."""
 763   _cpplint_state.SetOutputFormat(output_format)
 764
 765
 766 def _VerboseLevel():
 767   """Returns the module's verbosity setting."""
 768   return _cpplint_state.verbose_level
 769
 770
 771 def _SetVerboseLevel(level):
 772   """Sets the module's verbosity, and returns the previous setting."""
 773   return _cpplint_state.SetVerboseLevel(level)
 774
 775
 776 def _SetCountingStyle(level):
 777   """Sets the module's counting options."""
 778   _cpplint_state.SetCountingStyle(level)
 779
 780
 781 def _Filters():
 782   """Returns the module's list of output filters, as a list."""
 783   return _cpplint_state.filters
 784
 785
 786 def _SetFilters(filters):
 787   """Sets the module's error-message filters.
 788
 789   These filters are applied when deciding whether to emit a given
 790   error message.
 791
 792   Args:
 793     filters: A string of comma-separated filters (eg "whitespace/indent").
 794              Each filter should start with + or -; else we die.
 795   """
 796   _cpplint_state.SetFilters(filters)
 797
 798
 799 class _FunctionState(object):
 800   """Tracks current function name and the number of lines in its body."""
 801
 802   _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
 803   _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
 804
 805   def __init__(self):
 806     self.in_a_function = False
 807     self.lines_in_function = 0
 808     self.current_function = ''
 809
 810   def Begin(self, function_name):
 811     """Start analyzing function body.
 812
 813     Args:
 814       function_name: The name of the function being tracked.
 815     """
 816     self.in_a_function = True
 817     self.lines_in_function = 0
 818     self.current_function = function_name
 819
 820   def Count(self):
 821     """Count line in current function body."""
 822     if self.in_a_function:
 823       self.lines_in_function += 1
 824
 825   def Check(self, error, filename, linenum):
 826     """Report if too many lines in function body.
 827
 828     Args:
 829       error: The function to call with any errors found.
 830       filename: The name of the current file.
 831       linenum: The number of the line to check.
 832     """
 833     if Match(r'T(EST|est)', self.current_function):
 834       base_trigger = self._TEST_TRIGGER
 835     else:
 836       base_trigger = self._NORMAL_TRIGGER
 837     trigger = base_trigger * 2**_VerboseLevel()
 838
 839     if self.lines_in_function > trigger:
 840       error_level = int(math.log(self.lines_in_function / base_trigger, 2))
 841       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
 842       if error_level > 5:
 843         error_level = 5
 844       error(filename, linenum, 'readability/fn_size', error_level,
 845             'Small and focused functions are preferred:'
 846             ' %s has %d non-comment lines'
 847             ' (error triggered by exceeding %d lines).'  % (
 848                 self.current_function, self.lines_in_function, trigger))
 849
 850   def End(self):
 851     """Stop analyzing function body."""
 852     self.in_a_function = False
 853
 854
 855 class _IncludeError(Exception):
 856   """Indicates a problem with the include order in a file."""
 857   pass
 858
 859
 860 class FileInfo:
 861   """Provides utility functions for filenames.
 862
 863   FileInfo provides easy access to the components of a file's path
 864   relative to the project root.
 865   """
 866
 867   def __init__(self, filename):
 868     self._filename = filename
 869
 870   def FullName(self):
 871     """Make Windows paths like Unix."""
 872     return os.path.abspath(self._filename).replace('\\', '/')
 873
 874   def RepositoryName(self):
 875     """FullName after removing the local path to the repository.
 876
 877     If we have a real absolute path name here we can try to do something smart:
 878     detecting the root of the checkout and truncating /path/to/checkout from
 879     the name so that we get header guards that don't include things like
 880     "C:\Documents and Settings\..." or "/home/username/..." in them and thus
 881     people on different computers who have checked the source out to different
 882     locations won't see bogus errors.
 883     """
 884     fullname = self.FullName()
 885
 886     if os.path.exists(fullname):
 887       project_dir = os.path.dirname(fullname)
 888
 889       if os.path.exists(os.path.join(project_dir, ".svn")):
 890         # If there's a .svn file in the current directory, we recursively look
 891         # up the directory tree for the top of the SVN checkout
 892         root_dir = project_dir
 893         one_up_dir = os.path.dirname(root_dir)
 894         while os.path.exists(os.path.join(one_up_dir, ".svn")):
 895           root_dir = os.path.dirname(root_dir)
 896           one_up_dir = os.path.dirname(one_up_dir)
 897
 898         prefix = os.path.commonprefix([root_dir, project_dir])
 899         return fullname[len(prefix) + 1:]
 900
 901       # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
 902       # searching up from the current path.
 903       root_dir = os.path.dirname(fullname)
 904       while (root_dir != os.path.dirname(root_dir) and
 905              not os.path.exists(os.path.join(root_dir, ".git")) and
 906              not os.path.exists(os.path.join(root_dir, ".hg")) and
 907              not os.path.exists(os.path.join(root_dir, ".svn"))):
 908         root_dir = os.path.dirname(root_dir)
 909
 910       if (os.path.exists(os.path.join(root_dir, ".git")) or
 911           os.path.exists(os.path.join(root_dir, ".hg")) or
 912           os.path.exists(os.path.join(root_dir, ".svn"))):
 913         prefix = os.path.commonprefix([root_dir, project_dir])
 914         return fullname[len(prefix) + 1:]
 915
 916     # Don't know what to do; header guard warnings may be wrong...
 917     return fullname
 918
 919   def Split(self):
 920     """Splits the file into the directory, basename, and extension.
 921
 922     For 'chrome/browser/browser.cc', Split() would
 923     return ('chrome/browser', 'browser', '.cc')
 924
 925     Returns:
 926       A tuple of (directory, basename, extension).
 927     """
 928
 929     googlename = self.RepositoryName()
 930     project, rest = os.path.split(googlename)
 931     return (project,) + os.path.splitext(rest)
 932
 933   def BaseName(self):
 934     """File base name - text after the final slash, before the final period."""
 935     return self.Split()[1]
 936
 937   def Extension(self):
 938     """File extension - text following the final period."""
 939     return self.Split()[2]
 940
 941   def NoExtension(self):
 942     """File has no source file extension."""
 943     return '/'.join(self.Split()[0:2])
 944
 945   def IsSource(self):
 946     """File has a source file extension."""
 947     return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
 948
 949
 950 def _ShouldPrintError(category, confidence, linenum):
 951   """If confidence >= verbose, category passes filter and is not suppressed."""
 952
 953   # There are three ways we might decide not to print an error message:
 954   # a "NOLINT(category)" comment appears in the source,
 955   # the verbosity level isn't high enough, or the filters filter it out.
 956   if IsErrorSuppressedByNolint(category, linenum):
 957     return False
 958   if confidence < _cpplint_state.verbose_level:
 959     return False
 960
 961   is_filtered = False
 962   for one_filter in _Filters():
 963     if one_filter.startswith('-'):
 964       if category.startswith(one_filter[1:]):
 965         is_filtered = True
 966     elif one_filter.startswith('+'):
 967       if category.startswith(one_filter[1:]):
 968         is_filtered = False
 969     else:
 970       assert False  # should have been checked for in SetFilter.
 971   if is_filtered:
 972     return False
 973
 974   return True
 975
 976
 977 def Error(filename, linenum, category, confidence, message):
 978   """Logs the fact we've found a lint error.
 979
 980   We log where the error was found, and also our confidence in the error,
 981   that is, how certain we are this is a legitimate style regression, and
 982   not a misidentification or a use that's sometimes justified.
 983
 984   False positives can be suppressed by the use of
 985   "cpplint(category)"  comments on the offending line.  These are
 986   parsed into _error_suppressions.
 987
 988   Args:
 989     filename: The name of the file containing the error.
 990     linenum: The number of the line containing the error.
 991     category: A string used to describe the "category" this bug
 992       falls under: "whitespace", say, or "runtime".  Categories
 993       may have a hierarchy separated by slashes: "whitespace/indent".
 994     confidence: A number from 1-5 representing a confidence score for
 995       the error, with 5 meaning that we are certain of the problem,
 996       and 1 meaning that it could be a legitimate construct.
 997     message: The error message.
 998   """
 999   if _ShouldPrintError(category, confidence, linenum):
1000     _cpplint_state.IncrementErrorCount(category)
1001     if _cpplint_state.output_format == 'vs7':
1002       sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
1003           filename, linenum, message, category, confidence))
1004     elif _cpplint_state.output_format == 'eclipse':
1005       sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
1006           filename, linenum, message, category, confidence))
1007     else:
1008       sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
1009           filename, linenum, message, category, confidence))
1010
1011
1012 # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
1013 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
1014     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
1015 # Matches strings.  Escape codes should already be removed by ESCAPES.
1016 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
1017 # Matches characters.  Escape codes should already be removed by ESCAPES.
1018 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
1019 # Matches multi-line C++ comments.
1020 # This RE is a little bit more complicated than one might expect, because we
1021 # have to take care of space removals tools so we can handle comments inside
1022 # statements better.
1023 # The current rule is: We only clear spaces from both sides when we're at the
1024 # end of the line. Otherwise, we try to remove spaces from the right side,
1025 # if this doesn't work we try on left side but only if there's a non-character
1026 # on the right.
1027 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
1028     r"""(\s*/\*.*\*/\s*$|
1029             /\*.*\*/\s+|
1030          \s+/\*.*\*/(?=\W)|
1031             /\*.*\*/)""", re.VERBOSE)
1032
1033
1034 def IsCppString(line):
1035   """Does line terminate so, that the next symbol is in string constant.
1036
1037   This function does not consider single-line nor multi-line comments.
1038
1039   Args:
1040     line: is a partial line of code starting from the 0..n.
1041
1042   Returns:
1043     True, if next character appended to 'line' is inside a
1044     string constant.
1045   """
1046
1047   line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
1048   return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
1049
1050
1051 def CleanseRawStrings(raw_lines):
1052   """Removes C++11 raw strings from lines.
1053
1054     Before:
1055       static const char kData[] = R"(
1056           multi-line string
1057           )";
1058
1059     After:
1060       static const char kData[] = ""
1061           (replaced by blank line)
1062           "";
1063
1064   Args:
1065     raw_lines: list of raw lines.
1066
1067   Returns:
1068     list of lines with C++11 raw strings replaced by empty strings.
1069   """
1070
1071   delimiter = None
1072   lines_without_raw_strings = []
1073   for line in raw_lines:
1074     if delimiter:
1075       # Inside a raw string, look for the end
1076       end = line.find(delimiter)
1077       if end >= 0:
1078         # Found the end of the string, match leading space for this
1079         # line and resume copying the original lines, and also insert
1080         # a "" on the last line.
1081         leading_space = Match(r'^(\s*)\S', line)
1082         line = leading_space.group(1) + '""' + line[end + len(delimiter):]
1083         delimiter = None
1084       else:
1085         # Haven't found the end yet, append a blank line.
1086         line = ''
1087
1088     else:
1089       # Look for beginning of a raw string.
1090       # See 2.14.15 [lex.string] for syntax.
1091       matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
1092       if matched:
1093         delimiter = ')' + matched.group(2) + '"'
1094
1095         end = matched.group(3).find(delimiter)
1096         if end >= 0:
1097           # Raw string ended on same line
1098           line = (matched.group(1) + '""' +
1099                   matched.group(3)[end + len(delimiter):])
1100           delimiter = None
1101         else:
1102           # Start of a multi-line raw string
1103           line = matched.group(1) + '""'
1104
1105     lines_without_raw_strings.append(line)
1106
1107   # TODO(unknown): if delimiter is not None here, we might want to
1108   # emit a warning for unterminated string.
1109   return lines_without_raw_strings
1110
1111
1112 def FindNextMultiLineCommentStart(lines, lineix):
1113   """Find the beginning marker for a multiline comment."""
1114   while lineix < len(lines):
1115     if lines[lineix].strip().startswith('/*'):
1116       # Only return this marker if the comment goes beyond this line
1117       if lines[lineix].strip().find('*/', 2) < 0:
1118         return lineix
1119     lineix += 1
1120   return len(lines)
1121
1122
1123 def FindNextMultiLineCommentEnd(lines, lineix):
1124   """We are inside a comment, find the end marker."""
1125   while lineix < len(lines):
1126     if lines[lineix].strip().endswith('*/'):
1127       return lineix
1128     lineix += 1
1129   return len(lines)
1130
1131
1132 def RemoveMultiLineCommentsFromRange(lines, begin, end):
1133   """Clears a range of lines for multi-line comments."""
1134   # Having // dummy comments makes the lines non-empty, so we will not get
1135   # unnecessary blank line warnings later in the code.
1136   for i in range(begin, end):
1137     lines[i] = '// dummy'
1138
1139
1140 def RemoveMultiLineComments(filename, lines, error):
1141   """Removes multiline (c-style) comments from lines."""
1142   lineix = 0
1143   while lineix < len(lines):
1144     lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
1145     if lineix_begin >= len(lines):
1146       return
1147     lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
1148     if lineix_end >= len(lines):
1149       error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
1150             'Could not find end of multi-line comment')
1151       return
1152     RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
1153     lineix = lineix_end + 1
1154
1155
1156 def CleanseComments(line):
1157   """Removes //-comments and single-line C-style /* */ comments.
1158
1159   Args:
1160     line: A line of C++ source.
1161
1162   Returns:
1163     The line with single-line comments removed.
1164   """
1165   commentpos = line.find('//')
1166   if commentpos != -1 and not IsCppString(line[:commentpos]):
1167     line = line[:commentpos].rstrip()
1168   # get rid of /* ... */
1169   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
1170
1171
1172 class CleansedLines(object):
1173   """Holds 3 copies of all lines with different preprocessing applied to them.
1174
1175   1) elided member contains lines without strings and comments,
1176   2) lines member contains lines without comments, and
1177   3) raw_lines member contains all the lines without processing.
1178   All these three members are of <type 'list'>, and of the same length.
1179   """
1180
1181   def __init__(self, lines):
1182     self.elided = []
1183     self.lines = []
1184     self.raw_lines = lines
1185     self.num_lines = len(lines)
1186     self.lines_without_raw_strings = CleanseRawStrings(lines)
1187     for linenum in range(len(self.lines_without_raw_strings)):
1188       self.lines.append(CleanseComments(
1189           self.lines_without_raw_strings[linenum]))
1190       elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
1191       self.elided.append(CleanseComments(elided))
1192
1193   def NumLines(self):
1194     """Returns the number of lines represented."""
1195     return self.num_lines
1196
1197   @staticmethod
1198   def _CollapseStrings(elided):
1199     """Collapses strings and chars on a line to simple "" or '' blocks.
1200
1201     We nix strings first so we're not fooled by text like '"http://"'
1202
1203     Args:
1204       elided: The line being processed.
1205
1206     Returns:
1207       The line with collapsed strings.
1208     """
1209     if not _RE_PATTERN_INCLUDE.match(elided):
1210       # Remove escaped characters first to make quote/single quote collapsing
1211       # basic.  Things that look like escaped characters shouldn't occur
1212       # outside of strings and chars.
1213       elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
1214       elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
1215       elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
1216     return elided
1217
1218
1219 def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
1220   """Find the position just after the matching endchar.
1221
1222   Args:
1223     line: a CleansedLines line.
1224     startpos: start searching at this position.
1225     depth: nesting level at startpos.
1226     startchar: expression opening character.
1227     endchar: expression closing character.
1228
1229   Returns:
1230     On finding matching endchar: (index just after matching endchar, 0)
1231     Otherwise: (-1, new depth at end of this line)
1232   """
1233   for i in xrange(startpos, len(line)):
1234     if line[i] == startchar:
1235       depth += 1
1236     elif line[i] == endchar:
1237       depth -= 1
1238       if depth == 0:
1239         return (i + 1, 0)
1240   return (-1, depth)
1241
1242
1243 def CloseExpression(clean_lines, linenum, pos):
1244   """If input points to ( or { or [ or <, finds the position that closes it.
1245
1246   If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
1247   linenum/pos that correspond to the closing of the expression.
1248
1249   Args:
1250     clean_lines: A CleansedLines instance containing the file.
1251     linenum: The number of the line to check.
1252     pos: A position on the line.
1253
1254   Returns:
1255     A tuple (line, linenum, pos) pointer *past* the closing brace, or
1256     (line, len(lines), -1) if we never find a close.  Note we ignore
1257     strings and comments when matching; and the line we return is the
1258     'cleansed' line at linenum.
1259   """
1260
1261   line = clean_lines.elided[linenum]
1262   startchar = line[pos]
1263   if startchar not in '({[<':
1264     return (line, clean_lines.NumLines(), -1)
1265   if startchar == '(': endchar = ')'
1266   if startchar == '[': endchar = ']'
1267   if startchar == '{': endchar = '}'
1268   if startchar == '<': endchar = '>'
1269
1270   # Check first line
1271   (end_pos, num_open) = FindEndOfExpressionInLine(
1272       line, pos, 0, startchar, endchar)
1273   if end_pos > -1:
1274     return (line, linenum, end_pos)
1275
1276   # Continue scanning forward
1277   while linenum < clean_lines.NumLines() - 1:
1278     linenum += 1
1279     line = clean_lines.elided[linenum]
1280     (end_pos, num_open) = FindEndOfExpressionInLine(
1281         line, 0, num_open, startchar, endchar)
1282     if end_pos > -1:
1283       return (line, linenum, end_pos)
1284
1285   # Did not find endchar before end of file, give up
1286   return (line, clean_lines.NumLines(), -1)
1287
1288
1289 def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
1290   """Find position at the matching startchar.
1291
1292   This is almost the reverse of FindEndOfExpressionInLine, but note
1293   that the input position and returned position differs by 1.
1294
1295   Args:
1296     line: a CleansedLines line.
1297     endpos: start searching at this position.
1298     depth: nesting level at endpos.
1299     startchar: expression opening character.
1300     endchar: expression closing character.
1301
1302   Returns:
1303     On finding matching startchar: (index at matching startchar, 0)
1304     Otherwise: (-1, new depth at beginning of this line)
1305   """
1306   for i in xrange(endpos, -1, -1):
1307     if line[i] == endchar:
1308       depth += 1
1309     elif line[i] == startchar:
1310       depth -= 1
1311       if depth == 0:
1312         return (i, 0)
1313   return (-1, depth)
1314
1315
1316 def ReverseCloseExpression(clean_lines, linenum, pos):
1317   """If input points to ) or } or ] or >, finds the position that opens it.
1318
1319   If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
1320   linenum/pos that correspond to the opening of the expression.
1321
1322   Args:
1323     clean_lines: A CleansedLines instance containing the file.
1324     linenum: The number of the line to check.
1325     pos: A position on the line.
1326
1327   Returns:
1328     A tuple (line, linenum, pos) pointer *at* the opening brace, or
1329     (line, 0, -1) if we never find the matching opening brace.  Note
1330     we ignore strings and comments when matching; and the line we
1331     return is the 'cleansed' line at linenum.
1332   """
1333   line = clean_lines.elided[linenum]
1334   endchar = line[pos]
1335   if endchar not in ')}]>':
1336     return (line, 0, -1)
1337   if endchar == ')': startchar = '('
1338   if endchar == ']': startchar = '['
1339   if endchar == '}': startchar = '{'
1340   if endchar == '>': startchar = '<'
1341
1342   # Check last line
1343   (start_pos, num_open) = FindStartOfExpressionInLine(
1344       line, pos, 0, startchar, endchar)
1345   if start_pos > -1:
1346     return (line, linenum, start_pos)
1347
1348   # Continue scanning backward
1349   while linenum > 0:
1350     linenum -= 1
1351     line = clean_lines.elided[linenum]
1352     (start_pos, num_open) = FindStartOfExpressionInLine(
1353         line, len(line) - 1, num_open, startchar, endchar)
1354     if start_pos > -1:
1355       return (line, linenum, start_pos)
1356
1357   # Did not find startchar before beginning of file, give up
1358   return (line, 0, -1)
1359
1360
1361 def CheckForCopyright(filename, lines, error):
1362   """Logs an error if no Copyright message appears at the top of the file."""
1363
1364   # We'll say it should occur by line 10. Don't forget there's a
1365   # dummy line at the front.
1366   for line in xrange(1, min(len(lines), 11)):
1367     if re.search(r'Copyright', lines[line], re.I): break
1368   else:                       # means no copyright line was found
1369     error(filename, 0, 'legal/copyright', 5,
1370           'No copyright message found.  '
1371           'You should have a line: "Copyright [year] <Copyright Owner>"')
1372
1373
1374 def GetHeaderGuardCPPVariable(filename):
1375   """Returns the CPP variable that should be used as a header guard.
1376
1377   Args:
1378     filename: The name of a C++ header file.
1379
1380   Returns:
1381     The CPP variable that should be used as a header guard in the
1382     named file.
1383
1384   """
1385
1386   # Restores original filename in case that cpplint is invoked from Emacs's
1387   # flymake.
1388   filename = re.sub(r'_flymake\.h$', '.h', filename)
1389   filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
1390
1391   fileinfo = FileInfo(filename)
1392   file_path_from_root = fileinfo.RepositoryName()
1393   if _root:
1394     file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
1395   return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
1396
1397
1398 def CheckForHeaderGuard(filename, lines, error):
1399   """Checks that the file contains a header guard.
1400
1401   Logs an error if no #ifndef header guard is present.  For other
1402   headers, checks that the full pathname is used.
1403
1404   Args:
1405     filename: The name of the C++ header file.
1406     lines: An array of strings, each representing a line of the file.
1407     error: The function to call with any errors found.
1408   """
1409
1410   cppvar = GetHeaderGuardCPPVariable(filename)
1411
1412   ifndef = None
1413   ifndef_linenum = 0
1414   define = None
1415   endif = None
1416   endif_linenum = 0
1417   for linenum, line in enumerate(lines):
1418     linesplit = line.split()
1419     if len(linesplit) >= 2:
1420       # find the first occurrence of #ifndef and #define, save arg
1421       if not ifndef and linesplit[0] == '#ifndef':
1422         # set ifndef to the header guard presented on the #ifndef line.
1423         ifndef = linesplit[1]
1424         ifndef_linenum = linenum
1425       if not define and linesplit[0] == '#define':
1426         define = linesplit[1]
1427     # find the last occurrence of #endif, save entire line
1428     if line.startswith('#endif'):
1429       endif = line
1430       endif_linenum = linenum
1431
1432   if not ifndef:
1433     error(filename, 0, 'build/header_guard', 5,
1434           'No #ifndef header guard found, suggested CPP variable is: %s' %
1435           cppvar)
1436     return
1437
1438   if not define:
1439     error(filename, 0, 'build/header_guard', 5,
1440           'No #define header guard found, suggested CPP variable is: %s' %
1441           cppvar)
1442     return
1443
1444   # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
1445   # for backward compatibility.
1446   if ifndef != cppvar:
1447     error_level = 0
1448     if ifndef != cppvar + '_':
1449       error_level = 5
1450
1451     ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
1452                             error)
1453     error(filename, ifndef_linenum, 'build/header_guard', error_level,
1454           '#ifndef header guard has wrong style, please use: %s' % cppvar)
1455
1456   if define != ifndef:
1457     error(filename, 0, 'build/header_guard', 5,
1458           '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
1459           cppvar)
1460     return
1461
1462   if endif != ('#endif  // %s' % cppvar):
1463     error_level = 0
1464     if endif != ('#endif  // %s' % (cppvar + '_')):
1465       error_level = 5
1466
1467     ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
1468                             error)
1469     error(filename, endif_linenum, 'build/header_guard', error_level,
1470           '#endif line should be "#endif  // %s"' % cppvar)
1471
1472
1473 def CheckForBadCharacters(filename, lines, error):
1474   """Logs an error for each line containing bad characters.
1475
1476   Two kinds of bad characters:
1477
1478   1. Unicode replacement characters: These indicate that either the file
1479   contained invalid UTF-8 (likely) or Unicode replacement characters (which
1480   it shouldn't).  Note that it's possible for this to throw off line
1481   numbering if the invalid UTF-8 occurred adjacent to a newline.
1482
1483   2. NUL bytes.  These are problematic for some tools.
1484
1485   Args:
1486     filename: The name of the current file.
1487     lines: An array of strings, each representing a line of the file.
1488     error: The function to call with any errors found.
1489   """
1490   for linenum, line in enumerate(lines):
1491     if u'\ufffd' in line:
1492       error(filename, linenum, 'readability/utf8', 5,
1493             'Line contains invalid UTF-8 (or Unicode replacement character).')
1494     if '\0' in line:
1495       error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
1496
1497
1498 def CheckForNewlineAtEOF(filename, lines, error):
1499   """Logs an error if there is no newline char at the end of the file.
1500
1501   Args:
1502     filename: The name of the current file.
1503     lines: An array of strings, each representing a line of the file.
1504     error: The function to call with any errors found.
1505   """
1506
1507   # The array lines() was created by adding two newlines to the
1508   # original file (go figure), then splitting on \n.
1509   # To verify that the file ends in \n, we just have to make sure the
1510   # last-but-two element of lines() exists and is empty.
1511   if len(lines) < 3 or lines[-2]:
1512     error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
1513           'Could not find a newline character at the end of the file.')
1514
1515
1516 def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
1517   """Logs an error if we see /* ... */ or "..." that extend past one line.
1518
1519   /* ... */ comments are legit inside macros, for one line.
1520   Otherwise, we prefer // comments, so it's ok to warn about the
1521   other.  Likewise, it's ok for strings to extend across multiple
1522   lines, as long as a line continuation character (backslash)
1523   terminates each line. Although not currently prohibited by the C++
1524   style guide, it's ugly and unnecessary. We don't do well with either
1525   in this lint program, so we warn about both.
1526
1527   Args:
1528     filename: The name of the current file.
1529     clean_lines: A CleansedLines instance containing the file.
1530     linenum: The number of the line to check.
1531     error: The function to call with any errors found.
1532   """
1533   line = clean_lines.elided[linenum]
1534
1535   # Remove all \\ (escaped backslashes) from the line. They are OK, and the
1536   # second (escaped) slash may trigger later \" detection erroneously.
1537   line = line.replace('\\\\', '')
1538
1539   if line.count('/*') > line.count('*/'):
1540     error(filename, linenum, 'readability/multiline_comment', 5,
1541           'Complex multi-line /*...*/-style comment found. '
1542           'Lint may give bogus warnings.  '
1543           'Consider replacing these with //-style comments, '
1544           'with #if 0...#endif, '
1545           'or with more clearly structured multi-line comments.')
1546
1547   if (line.count('"') - line.count('\\"')) % 2:
1548     error(filename, linenum, 'readability/multiline_string', 5,
1549           'Multi-line string ("...") found.  This lint script doesn\'t '
1550           'do well with such strings, and may give bogus warnings.  '
1551           'Use C++11 raw strings or concatenation instead.')
1552
1553
1554 threading_list = (
1555     ('asctime(', 'asctime_r('),
1556     ('ctime(', 'ctime_r('),
1557     ('getgrgid(', 'getgrgid_r('),
1558     ('getgrnam(', 'getgrnam_r('),
1559     ('getlogin(', 'getlogin_r('),
1560     ('getpwnam(', 'getpwnam_r('),
1561     ('getpwuid(', 'getpwuid_r('),
1562     ('gmtime(', 'gmtime_r('),
1563     ('localtime(', 'localtime_r('),
1564     ('rand(', 'rand_r('),
1565     ('strtok(', 'strtok_r('),
1566     ('ttyname(', 'ttyname_r('),
1567     )
1568
1569
1570 def CheckPosixThreading(filename, clean_lines, linenum, error):
1571   """Checks for calls to thread-unsafe functions.
1572
1573   Much code has been originally written without consideration of
1574   multi-threading. Also, engineers are relying on their old experience;
1575   they have learned posix before threading extensions were added. These
1576   tests guide the engineers to use thread-safe functions (when using
1577   posix directly).
1578
1579   Args:
1580     filename: The name of the current file.
1581     clean_lines: A CleansedLines instance containing the file.
1582     linenum: The number of the line to check.
1583     error: The function to call with any errors found.
1584   """
1585   line = clean_lines.elided[linenum]
1586   for single_thread_function, multithread_safe_function in threading_list:
1587     ix = line.find(single_thread_function)
1588     # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
1589     if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
1590                                 line[ix - 1] not in ('_', '.', '>'))):
1591       error(filename, linenum, 'runtime/threadsafe_fn', 2,
1592             'Consider using ' + multithread_safe_function +
1593             '...) instead of ' + single_thread_function +
1594             '...) for improved thread safety.')
1595
1596
1597 def CheckVlogArguments(filename, clean_lines, linenum, error):
1598   """Checks that VLOG() is only used for defining a logging level.
1599
1600   For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
1601   VLOG(FATAL) are not.
1602
1603   Args:
1604     filename: The name of the current file.
1605     clean_lines: A CleansedLines instance containing the file.
1606     linenum: The number of the line to check.
1607     error: The function to call with any errors found.
1608   """
1609   line = clean_lines.elided[linenum]
1610   if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
1611     error(filename, linenum, 'runtime/vlog', 5,
1612           'VLOG() should be used with numeric verbosity level.  '
1613           'Use LOG() if you want symbolic severity levels.')
1614
1615
1616 # Matches invalid increment: *count++, which moves pointer instead of
1617 # incrementing a value.
1618 _RE_PATTERN_INVALID_INCREMENT = re.compile(
1619     r'^\s*\*\w+(\+\+|--);')
1620
1621
1622 def CheckInvalidIncrement(filename, clean_lines, linenum, error):
1623   """Checks for invalid increment *count++.
1624
1625   For example following function:
1626   void increment_counter(int* count) {
1627     *count++;
1628   }
1629   is invalid, because it effectively does count++, moving pointer, and should
1630   be replaced with ++*count, (*count)++ or *count += 1.
1631
1632   Args:
1633     filename: The name of the current file.
1634     clean_lines: A CleansedLines instance containing the file.
1635     linenum: The number of the line to check.
1636     error: The function to call with any errors found.
1637   """
1638   line = clean_lines.elided[linenum]
1639   if _RE_PATTERN_INVALID_INCREMENT.match(line):
1640     error(filename, linenum, 'runtime/invalid_increment', 5,
1641           'Changing pointer instead of value (or unused value of operator*).')
1642
1643
1644 class _BlockInfo(object):
1645   """Stores information about a generic block of code."""
1646
1647   def __init__(self, seen_open_brace):
1648     self.seen_open_brace = seen_open_brace
1649     self.open_parentheses = 0
1650     self.inline_asm = _NO_ASM
1651
1652   def CheckBegin(self, filename, clean_lines, linenum, error):
1653     """Run checks that applies to text up to the opening brace.
1654
1655     This is mostly for checking the text after the class identifier
1656     and the "{", usually where the base class is specified.  For other
1657     blocks, there isn't much to check, so we always pass.
1658
1659     Args:
1660       filename: The name of the current file.
1661       clean_lines: A CleansedLines instance containing the file.
1662       linenum: The number of the line to check.
1663       error: The function to call with any errors found.
1664     """
1665     pass
1666
1667   def CheckEnd(self, filename, clean_lines, linenum, error):
1668     """Run checks that applies to text after the closing brace.
1669
1670     This is mostly used for checking end of namespace comments.
1671
1672     Args:
1673       filename: The name of the current file.
1674       clean_lines: A CleansedLines instance containing the file.
1675       linenum: The number of the line to check.
1676       error: The function to call with any errors found.
1677     """
1678     pass
1679
1680
1681 class _ClassInfo(_BlockInfo):
1682   """Stores information about a class."""
1683
1684   def __init__(self, name, class_or_struct, clean_lines, linenum):
1685     _BlockInfo.__init__(self, False)
1686     self.name = name
1687     self.starting_linenum = linenum
1688     self.is_derived = False
1689     if class_or_struct == 'struct':
1690       self.access = 'public'
1691       self.is_struct = True
1692     else:
1693       self.access = 'private'
1694       self.is_struct = False
1695
1696     # Remember initial indentation level for this class.  Using raw_lines here
1697     # instead of elided to account for leading comments.
1698     initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
1699     if initial_indent:
1700       self.class_indent = len(initial_indent.group(1))
1701     else:
1702       self.class_indent = 0
1703
1704     # Try to find the end of the class.  This will be confused by things like:
1705     #   class A {
1706     #   } *x = { ...
1707     #
1708     # But it's still good enough for CheckSectionSpacing.
1709     self.last_line = 0
1710     depth = 0
1711     for i in range(linenum, clean_lines.NumLines()):
1712       line = clean_lines.elided[i]
1713       depth += line.count('{') - line.count('}')
1714       if not depth:
1715         self.last_line = i
1716         break
1717
1718   def CheckBegin(self, filename, clean_lines, linenum, error):
1719     # Look for a bare ':'
1720     if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
1721       self.is_derived = True
1722
1723   def CheckEnd(self, filename, clean_lines, linenum, error):
1724     # Check that closing brace is aligned with beginning of the class.
1725     # Only do this if the closing brace is indented by only whitespaces.
1726     # This means we will not check single-line class definitions.
1727     indent = Match(r'^( *)\}', clean_lines.elided[linenum])
1728     if indent and len(indent.group(1)) != self.class_indent:
1729       if self.is_struct:
1730         parent = 'struct ' + self.name
1731       else:
1732         parent = 'class ' + self.name
1733       error(filename, linenum, 'whitespace/indent', 3,
1734             'Closing brace should be aligned with beginning of %s' % parent)
1735
1736
1737 class _NamespaceInfo(_BlockInfo):
1738   """Stores information about a namespace."""
1739
1740   def __init__(self, name, linenum):
1741     _BlockInfo.__init__(self, False)
1742     self.name = name or ''
1743     self.starting_linenum = linenum
1744
1745   def CheckEnd(self, filename, clean_lines, linenum, error):
1746     """Check end of namespace comments."""
1747     line = clean_lines.raw_lines[linenum]
1748
1749     # Check how many lines is enclosed in this namespace.  Don't issue
1750     # warning for missing namespace comments if there aren't enough
1751     # lines.  However, do apply checks if there is already an end of
1752     # namespace comment and it's incorrect.
1753     #
1754     # TODO(unknown): We always want to check end of namespace comments
1755     # if a namespace is large, but sometimes we also want to apply the
1756     # check if a short namespace contained nontrivial things (something
1757     # other than forward declarations).  There is currently no logic on
1758     # deciding what these nontrivial things are, so this check is
1759     # triggered by namespace size only, which works most of the time.
1760     if (linenum - self.starting_linenum < 10
1761         and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
1762       return
1763
1764     # Look for matching comment at end of namespace.
1765     #
1766     # Note that we accept C style "/* */" comments for terminating
1767     # namespaces, so that code that terminate namespaces inside
1768     # preprocessor macros can be cpplint clean.
1769     #
1770     # We also accept stuff like "// end of namespace <name>." with the
1771     # period at the end.
1772     #
1773     # Besides these, we don't accept anything else, otherwise we might
1774     # get false negatives when existing comment is a substring of the
1775     # expected namespace.
1776     if self.name:
1777       # Named namespace
1778       if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
1779                     r'[\*/\.\\\s]*$'),
1780                    line):
1781         error(filename, linenum, 'readability/namespace', 5,
1782               'Namespace should be terminated with "// namespace %s"' %
1783               self.name)
1784     else:
1785       # Anonymous namespace
1786       if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
1787         error(filename, linenum, 'readability/namespace', 5,
1788               'Namespace should be terminated with "// namespace"')
1789
1790
1791 class _PreprocessorInfo(object):
1792   """Stores checkpoints of nesting stacks when #if/#else is seen."""
1793
1794   def __init__(self, stack_before_if):
1795     # The entire nesting stack before #if
1796     self.stack_before_if = stack_before_if
1797
1798     # The entire nesting stack up to #else
1799     self.stack_before_else = []
1800
1801     # Whether we have already seen #else or #elif
1802     self.seen_else = False
1803
1804
1805 class _NestingState(object):
1806   """Holds states related to parsing braces."""
1807
1808   def __init__(self):
1809     # Stack for tracking all braces.  An object is pushed whenever we
1810     # see a "{", and popped when we see a "}".  Only 3 types of
1811     # objects are possible:
1812     # - _ClassInfo: a class or struct.
1813     # - _NamespaceInfo: a namespace.
1814     # - _BlockInfo: some other type of block.
1815     self.stack = []
1816
1817     # Stack of _PreprocessorInfo objects.
1818     self.pp_stack = []
1819
1820   def SeenOpenBrace(self):
1821     """Check if we have seen the opening brace for the innermost block.
1822
1823     Returns:
1824       True if we have seen the opening brace, False if the innermost
1825       block is still expecting an opening brace.
1826     """
1827     return (not self.stack) or self.stack[-1].seen_open_brace
1828
1829   def InNamespaceBody(self):
1830     """Check if we are currently one level inside a namespace body.
1831
1832     Returns:
1833       True if top of the stack is a namespace block, False otherwise.
1834     """
1835     return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
1836
1837   def UpdatePreprocessor(self, line):
1838     """Update preprocessor stack.
1839
1840     We need to handle preprocessors due to classes like this:
1841       #ifdef SWIG
1842       struct ResultDetailsPageElementExtensionPoint {
1843       #else
1844       struct ResultDetailsPageElementExtensionPoint : public Extension {
1845       #endif
1846
1847     We make the following assumptions (good enough for most files):
1848     - Preprocessor condition evaluates to true from #if up to first
1849       #else/#elif/#endif.
1850
1851     - Preprocessor condition evaluates to false from #else/#elif up
1852       to #endif.  We still perform lint checks on these lines, but
1853       these do not affect nesting stack.
1854
1855     Args:
1856       line: current line to check.
1857     """
1858     if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
1859       # Beginning of #if block, save the nesting stack here.  The saved
1860       # stack will allow us to restore the parsing state in the #else case.
1861       self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
1862     elif Match(r'^\s*#\s*(else|elif)\b', line):
1863       # Beginning of #else block
1864       if self.pp_stack:
1865         if not self.pp_stack[-1].seen_else:
1866           # This is the first #else or #elif block.  Remember the
1867           # whole nesting stack up to this point.  This is what we
1868           # keep after the #endif.
1869           self.pp_stack[-1].seen_else = True
1870           self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
1871
1872         # Restore the stack to how it was before the #if
1873         self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
1874       else:
1875         # TODO(unknown): unexpected #else, issue warning?
1876         pass
1877     elif Match(r'^\s*#\s*endif\b', line):
1878       # End of #if or #else blocks.
1879       if self.pp_stack:
1880         # If we saw an #else, we will need to restore the nesting
1881         # stack to its former state before the #else, otherwise we
1882         # will just continue from where we left off.
1883         if self.pp_stack[-1].seen_else:
1884           # Here we can just use a shallow copy since we are the last
1885           # reference to it.
1886           self.stack = self.pp_stack[-1].stack_before_else
1887         # Drop the corresponding #if
1888         self.pp_stack.pop()
1889       else:
1890         # TODO(unknown): unexpected #endif, issue warning?
1891         pass
1892
1893   def Update(self, filename, clean_lines, linenum, error):
1894     """Update nesting state with current line.
1895
1896     Args:
1897       filename: The name of the current file.
1898       clean_lines: A CleansedLines instance containing the file.
1899       linenum: The number of the line to check.
1900       error: The function to call with any errors found.
1901     """
1902     line = clean_lines.elided[linenum]
1903
1904     # Update pp_stack first
1905     self.UpdatePreprocessor(line)
1906
1907     # Count parentheses.  This is to avoid adding struct arguments to
1908     # the nesting stack.
1909     if self.stack:
1910       inner_block = self.stack[-1]
1911       depth_change = line.count('(') - line.count(')')
1912       inner_block.open_parentheses += depth_change
1913
1914       # Also check if we are starting or ending an inline assembly block.
1915       if inner_block.inline_asm in (_NO_ASM, _END_ASM):
1916         if (depth_change != 0 and
1917             inner_block.open_parentheses == 1 and
1918             _MATCH_ASM.match(line)):
1919           # Enter assembly block
1920           inner_block.inline_asm = _INSIDE_ASM
1921         else:
1922           # Not entering assembly block.  If previous line was _END_ASM,
1923           # we will now shift to _NO_ASM state.
1924           inner_block.inline_asm = _NO_ASM
1925       elif (inner_block.inline_asm == _INSIDE_ASM and
1926             inner_block.open_parentheses == 0):
1927         # Exit assembly block
1928         inner_block.inline_asm = _END_ASM
1929
1930     # Consume namespace declaration at the beginning of the line.  Do
1931     # this in a loop so that we catch same line declarations like this:
1932     #   namespace proto2 { namespace bridge { class MessageSet; } }
1933     while True:
1934       # Match start of namespace.  The "\b\s*" below catches namespace
1935       # declarations even if it weren't followed by a whitespace, this
1936       # is so that we don't confuse our namespace checker.  The
1937       # missing spaces will be flagged by CheckSpacing.
1938       namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
1939       if not namespace_decl_match:
1940         break
1941
1942       new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
1943       self.stack.append(new_namespace)
1944
1945       line = namespace_decl_match.group(2)
1946       if line.find('{') != -1:
1947         new_namespace.seen_open_brace = True
1948         line = line[line.find('{') + 1:]
1949
1950     # Look for a class declaration in whatever is left of the line
1951     # after parsing namespaces.  The regexp accounts for decorated classes
1952     # such as in:
1953     #   class LOCKABLE API Object {
1954     #   };
1955     #
1956     # Templates with class arguments may confuse the parser, for example:
1957     #   template <class T
1958     #             class Comparator = less<T>,
1959     #             class Vector = vector<T> >
1960     #   class HeapQueue {
1961     #
1962     # Because this parser has no nesting state about templates, by the
1963     # time it saw "class Comparator", it may think that it's a new class.
1964     # Nested templates have a similar problem:
1965     #   template <
1966     #       typename ExportedType,
1967     #       typename TupleType,
1968     #       template <typename, typename> class ImplTemplate>
1969     #
1970     # To avoid these cases, we ignore classes that are followed by '=' or '>'
1971     class_decl_match = Match(
1972         r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
1973         r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
1974         r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
1975     if (class_decl_match and
1976         (not self.stack or self.stack[-1].open_parentheses == 0)):
1977       self.stack.append(_ClassInfo(
1978           class_decl_match.group(4), class_decl_match.group(2),
1979           clean_lines, linenum))
1980       line = class_decl_match.group(5)
1981
1982     # If we have not yet seen the opening brace for the innermost block,
1983     # run checks here.
1984     if not self.SeenOpenBrace():
1985       self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
1986
1987     # Update access control if we are inside a class/struct
1988     if self.stack and isinstance(self.stack[-1], _ClassInfo):
1989       classinfo = self.stack[-1]
1990       access_match = Match(
1991           r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
1992           r':(?:[^:]|$)',
1993           line)
1994       if access_match:
1995         classinfo.access = access_match.group(2)
1996
1997         # Check that access keywords are indented +1 space.  Skip this
1998         # check if the keywords are not preceded by whitespaces.
1999         indent = access_match.group(1)
2000         if (len(indent) != classinfo.class_indent + 1 and
2001             Match(r'^\s*$', indent)):
2002           if classinfo.is_struct:
2003             parent = 'struct ' + classinfo.name
2004           else:
2005             parent = 'class ' + classinfo.name
2006           slots = ''
2007           if access_match.group(3):
2008             slots = access_match.group(3)
2009           error(filename, linenum, 'whitespace/indent', 3,
2010                 '%s%s: should be indented +1 space inside %s' % (
2011                     access_match.group(2), slots, parent))
2012
2013     # Consume braces or semicolons from what's left of the line
2014     while True:
2015       # Match first brace, semicolon, or closed parenthesis.
2016       matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
2017       if not matched:
2018         break
2019
2020       token = matched.group(1)
2021       if token == '{':
2022         # If namespace or class hasn't seen a opening brace yet, mark
2023         # namespace/class head as complete.  Push a new block onto the
2024         # stack otherwise.
2025         if not self.SeenOpenBrace():
2026           self.stack[-1].seen_open_brace = True
2027         else:
2028           self.stack.append(_BlockInfo(True))
2029           if _MATCH_ASM.match(line):
2030             self.stack[-1].inline_asm = _BLOCK_ASM
2031       elif token == ';' or token == ')':
2032         # If we haven't seen an opening brace yet, but we already saw
2033         # a semicolon, this is probably a forward declaration.  Pop
2034         # the stack for these.
2035         #
2036         # Similarly, if we haven't seen an opening brace yet, but we
2037         # already saw a closing parenthesis, then these are probably
2038         # function arguments with extra "class" or "struct" keywords.
2039         # Also pop these stack for these.
2040         if not self.SeenOpenBrace():
2041           self.stack.pop()
2042       else:  # token == '}'
2043         # Perform end of block checks and pop the stack.
2044         if self.stack:
2045           self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
2046           self.stack.pop()
2047       line = matched.group(2)
2048
2049   def InnermostClass(self):
2050     """Get class info on the top of the stack.
2051
2052     Returns:
2053       A _ClassInfo object if we are inside a class, or None otherwise.
2054     """
2055     for i in range(len(self.stack), 0, -1):
2056       classinfo = self.stack[i - 1]
2057       if isinstance(classinfo, _ClassInfo):
2058         return classinfo
2059     return None
2060
2061   def CheckCompletedBlocks(self, filename, error):
2062     """Checks that all classes and namespaces have been completely parsed.
2063
2064     Call this when all lines in a file have been processed.
2065     Args:
2066       filename: The name of the current file.
2067       error: The function to call with any errors found.
2068     """
2069     # Note: This test can result in false positives if #ifdef constructs
2070     # get in the way of brace matching. See the testBuildClass test in
2071     # cpplint_unittest.py for an example of this.
2072     for obj in self.stack:
2073       if isinstance(obj, _ClassInfo):
2074         error(filename, obj.starting_linenum, 'build/class', 5,
2075               'Failed to find complete declaration of class %s' %
2076               obj.name)
2077       elif isinstance(obj, _NamespaceInfo):
2078         error(filename, obj.starting_linenum, 'build/namespaces', 5,
2079               'Failed to find complete declaration of namespace %s' %
2080               obj.name)
2081
2082
2083 def CheckForNonStandardConstructs(filename, clean_lines, linenum,
2084                                   nesting_state, error):
2085   r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
2086
2087   Complain about several constructs which gcc-2 accepts, but which are
2088   not standard C++.  Warning about these in lint is one way to ease the
2089   transition to new compilers.
2090   - put storage class first (e.g. "static const" instead of "const static").
2091   - "%lld" instead of %qd" in printf-type functions.
2092   - "%1$d" is non-standard in printf-type functions.
2093   - "\%" is an undefined character escape sequence.
2094   - text after #endif is not allowed.
2095   - invalid inner-style forward declaration.
2096   - >? and <? operators, and their >?= and <?= cousins.
2097
2098   Additionally, check for constructor/destructor style violations and reference
2099   members, as it is very convenient to do so while checking for
2100   gcc-2 compliance.
2101
2102   Args:
2103     filename: The name of the current file.
2104     clean_lines: A CleansedLines instance containing the file.
2105     linenum: The number of the line to check.
2106     nesting_state: A _NestingState instance which maintains information about
2107                    the current stack of nested blocks being parsed.
2108     error: A callable to which errors are reported, which takes 4 arguments:
2109            filename, line number, error level, and message
2110   """
2111
2112   # Remove comments from the line, but leave in strings for now.
2113   line = clean_lines.lines[linenum]
2114
2115   if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
2116     error(filename, linenum, 'runtime/printf_format', 3,
2117           '%q in format strings is deprecated.  Use %ll instead.')
2118
2119   if Search(r'printf\s*\(.*".*%\d+\$', line):
2120     error(filename, linenum, 'runtime/printf_format', 2,
2121           '%N$ formats are unconventional.  Try rewriting to avoid them.')
2122
2123   # Remove escaped backslashes before looking for undefined escapes.
2124   line = line.replace('\\\\', '')
2125
2126   if Search(r'("|\').*\\(%|\[|\(|{)', line):
2127     error(filename, linenum, 'build/printf_format', 3,
2128           '%, [, (, and { are undefined character escapes.  Unescape them.')
2129
2130   # For the rest, work with both comments and strings removed.
2131   line = clean_lines.elided[linenum]
2132
2133   if Search(r'\b(const|volatile|void|char|short|int|long'
2134             r'|float|double|signed|unsigned'
2135             r'|schar|u?int8|u?int16|u?int32|u?int64)'
2136             r'\s+(register|static|extern|typedef)\b',
2137             line):
2138     error(filename, linenum, 'build/storage_class', 5,
2139           'Storage class (static, extern, typedef, etc) should be first.')
2140
2141   if Match(r'\s*#\s*endif\s*[^/\s]+', line):
2142     error(filename, linenum, 'build/endif_comment', 5,
2143           'Uncommented text after #endif is non-standard.  Use a comment.')
2144
2145   if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
2146     error(filename, linenum, 'build/forward_decl', 5,
2147           'Inner-style forward declarations are invalid.  Remove this line.')
2148
2149   if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
2150             line):
2151     error(filename, linenum, 'build/deprecated', 3,
2152           '>? and <? (max and min) operators are non-standard and deprecated.')
2153
2154   if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
2155     # TODO(unknown): Could it be expanded safely to arbitrary references,
2156     # without triggering too many false positives? The first
2157     # attempt triggered 5 warnings for mostly benign code in the regtest, hence
2158     # the restriction.
2159     # Here's the original regexp, for the reference:
2160     # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
2161     # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
2162     error(filename, linenum, 'runtime/member_string_references', 2,
2163           'const string& members are dangerous. It is much better to use '
2164           'alternatives, such as pointers or simple constants.')
2165
2166   # Everything else in this function operates on class declarations.
2167   # Return early if the top of the nesting stack is not a class, or if
2168   # the class head is not completed yet.
2169   classinfo = nesting_state.InnermostClass()
2170   if not classinfo or not classinfo.seen_open_brace:
2171     return
2172
2173   # The class may have been declared with namespace or classname qualifiers.
2174   # The constructor and destructor will not have those qualifiers.
2175   base_classname = classinfo.name.split('::')[-1]
2176
2177   # Look for single-argument constructors that aren't marked explicit.
2178   # Technically a valid construct, but against style.
2179   args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
2180                % re.escape(base_classname),
2181                line)
2182   if (args and
2183       args.group(1) != 'void' and
2184       not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
2185                 % re.escape(base_classname), args.group(1).strip())):
2186     error(filename, linenum, 'runtime/explicit', 5,
2187           'Single-argument constructors should be marked explicit.')
2188
2189
2190 def CheckSpacingForFunctionCall(filename, line, linenum, error):
2191   """Checks for the correctness of various spacing around function calls.
2192
2193   Args:
2194     filename: The name of the current file.
2195     line: The text of the line to check.
2196     linenum: The number of the line to check.
2197     error: The function to call with any errors found.
2198   """
2199
2200   # Since function calls often occur inside if/for/while/switch
2201   # expressions - which have their own, more liberal conventions - we
2202   # first see if we should be looking inside such an expression for a
2203   # function call, to which we can apply more strict standards.
2204   fncall = line    # if there's no control flow construct, look at whole line
2205   for pattern in (r'\bif\s*\((.*)\)\s*{',
2206                   r'\bfor\s*\((.*)\)\s*{',
2207                   r'\bwhile\s*\((.*)\)\s*[{;]',
2208                   r'\bswitch\s*\((.*)\)\s*{'):
2209     match = Search(pattern, line)
2210     if match:
2211       fncall = match.group(1)    # look inside the parens for function calls
2212       break
2213
2214   # Except in if/for/while/switch, there should never be space
2215   # immediately inside parens (eg "f( 3, 4 )").  We make an exception
2216   # for nested parens ( (a+b) + c ).  Likewise, there should never be
2217   # a space before a ( when it's a function argument.  I assume it's a
2218   # function argument when the char before the whitespace is legal in
2219   # a function name (alnum + _) and we're not starting a macro. Also ignore
2220   # pointers and references to arrays and functions coz they're too tricky:
2221   # we use a very simple way to recognize these:
2222   # " (something)(maybe-something)" or
2223   # " (something)(maybe-something," or
2224   # " (something)[something]"
2225   # Note that we assume the contents of [] to be short enough that
2226   # they'll never need to wrap.
2227   if (  # Ignore control structures.
2228       not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
2229                  fncall) and
2230       # Ignore pointers/references to functions.
2231       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
2232       # Ignore pointers/references to arrays.
2233       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
2234     if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
2235       error(filename, linenum, 'whitespace/parens', 4,
2236             'Extra space after ( in function call')
2237     elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
2238       error(filename, linenum, 'whitespace/parens', 2,
2239             'Extra space after (')
2240     if (Search(r'\w\s+\(', fncall) and
2241         not Search(r'#\s*define|typedef', fncall) and
2242         not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
2243       error(filename, linenum, 'whitespace/parens', 4,
2244             'Extra space before ( in function call')
2245     # If the ) is followed only by a newline or a { + newline, assume it's
2246     # part of a control statement (if/while/etc), and don't complain
2247     if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
2248       # If the closing parenthesis is preceded by only whitespaces,
2249       # try to give a more descriptive error message.
2250       if Search(r'^\s+\)', fncall):
2251         error(filename, linenum, 'whitespace/parens', 2,
2252               'Closing ) should be moved to the previous line')
2253       else:
2254         error(filename, linenum, 'whitespace/parens', 2,
2255               'Extra space before )')
2256
2257
2258 def IsBlankLine(line):
2259   """Returns true if the given line is blank.
2260
2261   We consider a line to be blank if the line is empty or consists of
2262   only white spaces.
2263
2264   Args:
2265     line: A line of a string.
2266
2267   Returns:
2268     True, if the given line is blank.
2269   """
2270   return not line or line.isspace()
2271
2272
2273 def CheckForFunctionLengths(filename, clean_lines, linenum,
2274                             function_state, error):
2275   """Reports for long function bodies.
2276
2277   For an overview why this is done, see:
2278   http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
2279
2280   Uses a simplistic algorithm assuming other style guidelines
2281   (especially spacing) are followed.
2282   Only checks unindented functions, so class members are unchecked.
2283   Trivial bodies are unchecked, so constructors with huge initializer lists
2284   may be missed.
2285   Blank/comment lines are not counted so as to avoid encouraging the removal
2286   of vertical space and comments just to get through a lint check.
2287   NOLINT *on the last line of a function* disables this check.
2288
2289   Args:
2290     filename: The name of the current file.
2291     clean_lines: A CleansedLines instance containing the file.
2292     linenum: The number of the line to check.
2293     function_state: Current function name and lines in body so far.
2294     error: The function to call with any errors found.
2295   """
2296   lines = clean_lines.lines
2297   line = lines[linenum]
2298   raw = clean_lines.raw_lines
2299   raw_line = raw[linenum]
2300   joined_line = ''
2301
2302   starting_func = False
2303   regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
2304   match_result = Match(regexp, line)
2305   if match_result:
2306     # If the name is all caps and underscores, figure it's a macro and
2307     # ignore it, unless it's TEST or TEST_F.
2308     function_name = match_result.group(1).split()[-1]
2309     if function_name == 'TEST' or function_name == 'TEST_F' or (
2310         not Match(r'[A-Z_]+$', function_name)):
2311       starting_func = True
2312
2313   if starting_func:
2314     body_found = False
2315     for start_linenum in xrange(linenum, clean_lines.NumLines()):
2316       start_line = lines[start_linenum]
2317       joined_line += ' ' + start_line.lstrip()
2318       if Search(r'(;|})', start_line):  # Declarations and trivial functions
2319         body_found = True
2320         break                              # ... ignore
2321       elif Search(r'{', start_line):
2322         body_found = True
2323         function = Search(r'((\w|:)*)\(', line).group(1)
2324         if Match(r'TEST', function):    # Handle TEST... macros
2325           parameter_regexp = Search(r'(\(.*\))', joined_line)
2326           if parameter_regexp:             # Ignore bad syntax
2327             function += parameter_regexp.group(1)
2328         else:
2329           function += '()'
2330         function_state.Begin(function)
2331         break
2332     if not body_found:
2333       # No body for the function (or evidence of a non-function) was found.
2334       error(filename, linenum, 'readability/fn_size', 5,
2335             'Lint failed to find start of function body.')
2336   elif Match(r'^\}\s*$', line):  # function end
2337     function_state.Check(error, filename, linenum)
2338     function_state.End()
2339   elif not Match(r'^\s*$', line):
2340     function_state.Count()  # Count non-blank/non-comment lines.
2341
2342
2343 _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
2344
2345
2346 def CheckComment(comment, filename, linenum, error):
2347   """Checks for common mistakes in TODO comments.
2348
2349   Args:
2350     comment: The text of the comment from the line in question.
2351     filename: The name of the current file.
2352     linenum: The number of the line to check.
2353     error: The function to call with any errors found.
2354   """
2355   match = _RE_PATTERN_TODO.match(comment)
2356   if match:
2357     # One whitespace is correct; zero whitespace is handled elsewhere.
2358     leading_whitespace = match.group(1)
2359     if len(leading_whitespace) > 1:
2360       error(filename, linenum, 'whitespace/todo', 2,
2361             'Too many spaces before TODO')
2362
2363     username = match.group(2)
2364     if not username:
2365       error(filename, linenum, 'readability/todo', 2,
2366             'Missing username in TODO; it should look like '
2367             '"// TODO(my_username): Stuff."')
2368
2369     middle_whitespace = match.group(3)
2370     # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
2371     if middle_whitespace != ' ' and middle_whitespace != '':
2372       error(filename, linenum, 'whitespace/todo', 2,
2373             'TODO(my_username) should be followed by a space')
2374
2375 def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
2376   """Checks for improper use of DISALLOW* macros.
2377
2378   Args:
2379     filename: The name of the current file.
2380     clean_lines: A CleansedLines instance containing the file.
2381     linenum: The number of the line to check.
2382     nesting_state: A _NestingState instance which maintains information about
2383                    the current stack of nested blocks being parsed.
2384     error: The function to call with any errors found.
2385   """
2386   line = clean_lines.elided[linenum]  # get rid of comments and strings
2387
2388   matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
2389                    r'DISALLOW_EVIL_CONSTRUCTORS|'
2390                    r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
2391   if not matched:
2392     return
2393   if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
2394     if nesting_state.stack[-1].access != 'private':
2395       error(filename, linenum, 'readability/constructors', 3,
2396             '%s must be in the private: section' % matched.group(1))
2397
2398   else:
2399     # Found DISALLOW* macro outside a class declaration, or perhaps it
2400     # was used inside a function when it should have been part of the
2401     # class declaration.  We could issue a warning here, but it
2402     # probably resulted in a compiler error already.
2403     pass
2404
2405
2406 def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
2407   """Find the corresponding > to close a template.
2408
2409   Args:
2410     clean_lines: A CleansedLines instance containing the file.
2411     linenum: Current line number.
2412     init_suffix: Remainder of the current line after the initial <.
2413
2414   Returns:
2415     True if a matching bracket exists.
2416   """
2417   line = init_suffix
2418   nesting_stack = ['<']
2419   while True:
2420     # Find the next operator that can tell us whether < is used as an
2421     # opening bracket or as a less-than operator.  We only want to
2422     # warn on the latter case.
2423     #
2424     # We could also check all other operators and terminate the search
2425     # early, e.g. if we got something like this "a<b+c", the "<" is
2426     # most likely a less-than operator, but then we will get false
2427     # positives for default arguments and other template expressions.
2428     match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
2429     if match:
2430       # Found an operator, update nesting stack
2431       operator = match.group(1)
2432       line = match.group(2)
2433
2434       if nesting_stack[-1] == '<':
2435         # Expecting closing angle bracket
2436         if operator in ('<', '(', '['):
2437           nesting_stack.append(operator)
2438         elif operator == '>':
2439           nesting_stack.pop()
2440           if not nesting_stack:
2441             # Found matching angle bracket
2442             return True
2443         elif operator == ',':
2444           # Got a comma after a bracket, this is most likely a template
2445           # argument.  We have not seen a closing angle bracket yet, but
2446           # it's probably a few lines later if we look for it, so just
2447           # return early here.
2448           return True
2449         else:
2450           # Got some other operator.
2451           return False
2452
2453       else:
2454         # Expecting closing parenthesis or closing bracket
2455         if operator in ('<', '(', '['):
2456           nesting_stack.append(operator)
2457         elif operator in (')', ']'):
2458           # We don't bother checking for matching () or [].  If we got
2459           # something like (] or [), it would have been a syntax error.
2460           nesting_stack.pop()
2461
2462     else:
2463       # Scan the next line
2464       linenum += 1
2465       if linenum >= len(clean_lines.elided):
2466         break
2467       line = clean_lines.elided[linenum]
2468
2469   # Exhausted all remaining lines and still no matching angle bracket.
2470   # Most likely the input was incomplete, otherwise we should have
2471   # seen a semicolon and returned early.
2472   return True
2473
2474
2475 def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
2476   """Find the corresponding < that started a template.
2477
2478   Args:
2479     clean_lines: A CleansedLines instance containing the file.
2480     linenum: Current line number.
2481     init_prefix: Part of the current line before the initial >.
2482
2483   Returns:
2484     True if a matching bracket exists.
2485   """
2486   line = init_prefix
2487   nesting_stack = ['>']
2488   while True:
2489     # Find the previous operator
2490     match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
2491     if match:
2492       # Found an operator, update nesting stack
2493       operator = match.group(2)
2494       line = match.group(1)
2495
2496       if nesting_stack[-1] == '>':
2497         # Expecting opening angle bracket
2498         if operator in ('>', ')', ']'):
2499           nesting_stack.append(operator)
2500         elif operator == '<':
2501           nesting_stack.pop()
2502           if not nesting_stack:
2503             # Found matching angle bracket
2504             return True
2505         elif operator == ',':
2506           # Got a comma before a bracket, this is most likely a
2507           # template argument.  The opening angle bracket is probably
2508           # there if we look for it, so just return early here.
2509           return True
2510         else:
2511           # Got some other operator.
2512           return False
2513
2514       else:
2515         # Expecting opening parenthesis or opening bracket
2516         if operator in ('>', ')', ']'):
2517           nesting_stack.append(operator)
2518         elif operator in ('(', '['):
2519           nesting_stack.pop()
2520
2521     else:
2522       # Scan the previous line
2523       linenum -= 1
2524       if linenum < 0:
2525         break
2526       line = clean_lines.elided[linenum]
2527
2528   # Exhausted all earlier lines and still no matching angle bracket.
2529   return False
2530
2531
2532 def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
2533   """Checks for the correctness of various spacing issues in the code.
2534
2535   Things we check for: spaces around operators, spaces after
2536   if/for/while/switch, no spaces around parens in function calls, two
2537   spaces between code and comment, don't start a block with a blank
2538   line, don't end a function with a blank line, don't add a blank line
2539   after public/protected/private, don't have too many blank lines in a row.
2540
2541   Args:
2542     filename: The name of the current file.
2543     clean_lines: A CleansedLines instance containing the file.
2544     linenum: The number of the line to check.
2545     nesting_state: A _NestingState instance which maintains information about
2546                    the current stack of nested blocks being parsed.
2547     error: The function to call with any errors found.
2548   """
2549
2550   # Don't use "elided" lines here, otherwise we can't check commented lines.
2551   # Don't want to use "raw" either, because we don't want to check inside C++11
2552   # raw strings,
2553   raw = clean_lines.lines_without_raw_strings
2554   line = raw[linenum]
2555
2556   # Before nixing comments, check if the line is blank for no good
2557   # reason.  This includes the first line after a block is opened, and
2558   # blank lines at the end of a function (ie, right before a line like '}'
2559   #
2560   # Skip all the blank line checks if we are immediately inside a
2561   # namespace body.  In other words, don't issue blank line warnings
2562   # for this block:
2563   #   namespace {
2564   #
2565   #   }
2566   #
2567   # A warning about missing end of namespace comments will be issued instead.
2568   if IsBlankLine(line) and not nesting_state.InNamespaceBody():
2569     elided = clean_lines.elided
2570     prev_line = elided[linenum - 1]
2571     prevbrace = prev_line.rfind('{')
2572     # TODO(unknown): Don't complain if line before blank line, and line after,
2573     #                both start with alnums and are indented the same amount.
2574     #                This ignores whitespace at the start of a namespace block
2575     #                because those are not usually indented.
2576     if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
2577       # OK, we have a blank line at the start of a code block.  Before we
2578       # complain, we check if it is an exception to the rule: The previous
2579       # non-empty line has the parameters of a function header that are indented
2580       # 4 spaces (because they did not fit in a 80 column line when placed on
2581       # the same line as the function name).  We also check for the case where
2582       # the previous line is indented 6 spaces, which may happen when the
2583       # initializers of a constructor do not fit into a 80 column line.
2584       exception = False
2585       if Match(r' {6}\w', prev_line):  # Initializer list?
2586         # We are looking for the opening column of initializer list, which
2587         # should be indented 4 spaces to cause 6 space indentation afterwards.
2588         search_position = linenum-2
2589         while (search_position >= 0
2590                and Match(r' {6}\w', elided[search_position])):
2591           search_position -= 1
2592         exception = (search_position >= 0
2593                      and elided[search_position][:5] == '    :')
2594       else:
2595         # Search for the function arguments or an initializer list.  We use a
2596         # simple heuristic here: If the line is indented 4 spaces; and we have a
2597         # closing paren, without the opening paren, followed by an opening brace
2598         # or colon (for initializer lists) we assume that it is the last line of
2599         # a function header.  If we have a colon indented 4 spaces, it is an
2600         # initializer list.
2601         exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
2602                            prev_line)
2603                      or Match(r' {4}:', prev_line))
2604
2605       if not exception:
2606         error(filename, linenum, 'whitespace/blank_line', 2,
2607               'Redundant blank line at the start of a code block '
2608               'should be deleted.')
2609     # Ignore blank lines at the end of a block in a long if-else
2610     # chain, like this:
2611     #   if (condition1) {
2612     #     // Something followed by a blank line
2613     #
2614     #   } else if (condition2) {
2615     #     // Something else
2616     #   }
2617     if linenum + 1 < clean_lines.NumLines():
2618       next_line = raw[linenum + 1]
2619       if (next_line
2620           and Match(r'\s*}', next_line)
2621           and next_line.find('} else ') == -1):
2622         error(filename, linenum, 'whitespace/blank_line', 3,
2623               'Redundant blank line at the end of a code block '
2624               'should be deleted.')
2625
2626     matched = Match(r'\s*(public|protected|private):', prev_line)
2627     if matched:
2628       error(filename, linenum, 'whitespace/blank_line', 3,
2629             'Do not leave a blank line after "%s:"' % matched.group(1))
2630
2631   # Next, we complain if there's a comment too near the text
2632   commentpos = line.find('//')
2633   if commentpos != -1:
2634     # Check if the // may be in quotes.  If so, ignore it
2635     # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
2636     if (line.count('"', 0, commentpos) -
2637         line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
2638       # Allow one space for new scopes, two spaces otherwise:
2639       if (not Match(r'^\s*{ //', line) and
2640           ((commentpos >= 1 and
2641             line[commentpos-1] not in string.whitespace) or
2642            (commentpos >= 2 and
2643             line[commentpos-2] not in string.whitespace))):
2644         error(filename, linenum, 'whitespace/comments', 2,
2645               'At least two spaces is best between code and comments')
2646       # There should always be a space between the // and the comment
2647       commentend = commentpos + 2
2648       if commentend < len(line) and not line[commentend] == ' ':
2649         # but some lines are exceptions -- e.g. if they're big
2650         # comment delimiters like:
2651         # //----------------------------------------------------------
2652         # or are an empty C++ style Doxygen comment, like:
2653         # ///
2654         # or C++ style Doxygen comments placed after the variable:
2655         # ///<  Header comment
2656         # //!<  Header comment
2657         # or they begin with multiple slashes followed by a space:
2658         # //////// Header comment
2659         match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
2660                  Search(r'^/$', line[commentend:]) or
2661                  Search(r'^!< ', line[commentend:]) or
2662                  Search(r'^/< ', line[commentend:]) or
2663                  Search(r'^/+ ', line[commentend:]))
2664         if not match:
2665           error(filename, linenum, 'whitespace/comments', 4,
2666                 'Should have a space between // and comment')
2667       CheckComment(line[commentpos:], filename, linenum, error)
2668
2669   line = clean_lines.elided[linenum]  # get rid of comments and strings
2670
2671   # Don't try to do spacing checks for operator methods
2672   line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
2673
2674   # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
2675   # Otherwise not.  Note we only check for non-spaces on *both* sides;
2676   # sometimes people put non-spaces on one side when aligning ='s among
2677   # many lines (not that this is behavior that I approve of...)
2678   if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
2679     error(filename, linenum, 'whitespace/operators', 4,
2680           'Missing spaces around =')
2681
2682   # It's ok not to have spaces around binary operators like + - * /, but if
2683   # there's too little whitespace, we get concerned.  It's hard to tell,
2684   # though, so we punt on this one for now.  TODO.
2685
2686   # You should always have whitespace around binary operators.
2687   #
2688   # Check <= and >= first to avoid false positives with < and >, then
2689   # check non-include lines for spacing around < and >.
2690   match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
2691   if match:
2692     error(filename, linenum, 'whitespace/operators', 3,
2693           'Missing spaces around %s' % match.group(1))
2694   # We allow no-spaces around << when used like this: 10<<20, but
2695   # not otherwise (particularly, not when used as streams)
2696   # Also ignore using ns::operator<<;
2697   match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
2698   if (match and
2699       not (match.group(1).isdigit() and match.group(2).isdigit()) and
2700       not (match.group(1) == 'operator' and match.group(2) == ';')):
2701     error(filename, linenum, 'whitespace/operators', 3,
2702           'Missing spaces around <<')
2703   elif not Match(r'#.*include', line):
2704     # Avoid false positives on ->
2705     reduced_line = line.replace('->', '')
2706
2707     # Look for < that is not surrounded by spaces.  This is only
2708     # triggered if both sides are missing spaces, even though
2709     # technically should should flag if at least one side is missing a
2710     # space.  This is done to avoid some false positives with shifts.
2711     match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
2712     if (match and
2713         not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
2714       error(filename, linenum, 'whitespace/operators', 3,
2715             'Missing spaces around <')
2716
2717     # Look for > that is not surrounded by spaces.  Similar to the
2718     # above, we only trigger if both sides are missing spaces to avoid
2719     # false positives with shifts.
2720     match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
2721     if (match and
2722         not FindPreviousMatchingAngleBracket(clean_lines, linenum,
2723                                              match.group(1))):
2724       error(filename, linenum, 'whitespace/operators', 3,
2725             'Missing spaces around >')
2726
2727   # We allow no-spaces around >> for almost anything.  This is because
2728   # C++11 allows ">>" to close nested templates, which accounts for
2729   # most cases when ">>" is not followed by a space.
2730   #
2731   # We still warn on ">>" followed by alpha character, because that is
2732   # likely due to ">>" being used for right shifts, e.g.:
2733   #   value >> alpha
2734   #
2735   # When ">>" is used to close templates, the alphanumeric letter that
2736   # follows would be part of an identifier, and there should still be
2737   # a space separating the template type and the identifier.
2738   #   type<type<type>> alpha
2739   match = Search(r'>>[a-zA-Z_]', line)
2740   if match:
2741     error(filename, linenum, 'whitespace/operators', 3,
2742           'Missing spaces around >>')
2743
2744   # There shouldn't be space around unary operators
2745   match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
2746   if match:
2747     error(filename, linenum, 'whitespace/operators', 4,
2748           'Extra space for operator %s' % match.group(1))
2749
2750   # A pet peeve of mine: no spaces after an if, while, switch, or for
2751   match = Search(r' (if\(|for\(|while\(|switch\()', line)
2752   if match:
2753     error(filename, linenum, 'whitespace/parens', 5,
2754           'Missing space before ( in %s' % match.group(1))
2755
2756   # For if/for/while/switch, the left and right parens should be
2757   # consistent about how many spaces are inside the parens, and
2758   # there should either be zero or one spaces inside the parens.
2759   # We don't want: "if ( foo)" or "if ( foo   )".
2760   # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
2761   match = Search(r'\b(if|for|while|switch)\s*'
2762                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
2763                  line)
2764   if match:
2765     if len(match.group(2)) != len(match.group(4)):
2766       if not (match.group(3) == ';' and
2767               len(match.group(2)) == 1 + len(match.group(4)) or
2768               not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
2769         error(filename, linenum, 'whitespace/parens', 5,
2770               'Mismatching spaces inside () in %s' % match.group(1))
2771     if len(match.group(2)) not in [0, 1]:
2772       error(filename, linenum, 'whitespace/parens', 5,
2773             'Should have zero or one spaces inside ( and ) in %s' %
2774             match.group(1))
2775
2776   # You should always have a space after a comma (either as fn arg or operator)
2777   #
2778   # This does not apply when the non-space character following the
2779   # comma is another comma, since the only time when that happens is
2780   # for empty macro arguments.
2781   #
2782   # We run this check in two passes: first pass on elided lines to
2783   # verify that lines contain missing whitespaces, second pass on raw
2784   # lines to confirm that those missing whitespaces are not due to
2785   # elided comments.
2786   if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
2787     error(filename, linenum, 'whitespace/comma', 3,
2788           'Missing space after ,')
2789
2790   # You should always have a space after a semicolon
2791   # except for few corner cases
2792   # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
2793   # space after ;
2794   if Search(r';[^\s};\\)/]', line):
2795     error(filename, linenum, 'whitespace/semicolon', 3,
2796           'Missing space after ;')
2797
2798   # Next we will look for issues with function calls.
2799   CheckSpacingForFunctionCall(filename, line, linenum, error)
2800
2801   # Except after an opening paren, or after another opening brace (in case of
2802   # an initializer list, for instance), you should have spaces before your
2803   # braces. And since you should never have braces at the beginning of a line,
2804   # this is an easy test.
2805   match = Match(r'^(.*[^ ({]){', line)
2806   if match:
2807     # Try a bit harder to check for brace initialization.  This
2808     # happens in one of the following forms:
2809     #   Constructor() : initializer_list_{} { ... }
2810     #   Constructor{}.MemberFunction()
2811     #   Type variable{};
2812     #   FunctionCall(type{}, ...);
2813     #   LastArgument(..., type{});
2814     #   LOG(INFO) << type{} << " ...";
2815     #   map_of_type[{...}] = ...;
2816     #
2817     # We check for the character following the closing brace, and
2818     # silence the warning if it's one of those listed above, i.e.
2819     # "{.;,)<]".
2820     #
2821     # To account for nested initializer list, we allow any number of
2822     # closing braces up to "{;,)<".  We can't simply silence the
2823     # warning on first sight of closing brace, because that would
2824     # cause false negatives for things that are not initializer lists.
2825     #   Silence this:         But not this:
2826     #     Outer{                if (...) {
2827     #       Inner{...}            if (...){  // Missing space before {
2828     #     };                    }
2829     #
2830     # There is a false negative with this approach if people inserted
2831     # spurious semicolons, e.g. "if (cond){};", but we will catch the
2832     # spurious semicolon with a separate check.
2833     (endline, endlinenum, endpos) = CloseExpression(
2834         clean_lines, linenum, len(match.group(1)))
2835     trailing_text = ''
2836     if endpos > -1:
2837       trailing_text = endline[endpos:]
2838     for offset in xrange(endlinenum + 1,
2839                          min(endlinenum + 3, clean_lines.NumLines() - 1)):
2840       trailing_text += clean_lines.elided[offset]
2841     if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
2842       error(filename, linenum, 'whitespace/braces', 5,
2843             'Missing space before {')
2844
2845   # Make sure '} else {' has spaces.
2846   if Search(r'}else', line):
2847     error(filename, linenum, 'whitespace/braces', 5,
2848           'Missing space before else')
2849
2850   # You shouldn't have spaces before your brackets, except maybe after
2851   # 'delete []' or 'new char * []'.
2852   if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
2853     error(filename, linenum, 'whitespace/braces', 5,
2854           'Extra space before [')
2855
2856   # You shouldn't have a space before a semicolon at the end of the line.
2857   # There's a special case for "for" since the style guide allows space before
2858   # the semicolon there.
2859   if Search(r':\s*;\s*$', line):
2860     error(filename, linenum, 'whitespace/semicolon', 5,
2861           'Semicolon defining empty statement. Use {} instead.')
2862   elif Search(r'^\s*;\s*$', line):
2863     error(filename, linenum, 'whitespace/semicolon', 5,
2864           'Line contains only semicolon. If this should be an empty statement, '
2865           'use {} instead.')
2866   elif (Search(r'\s+;\s*$', line) and
2867         not Search(r'\bfor\b', line)):
2868     error(filename, linenum, 'whitespace/semicolon', 5,
2869           'Extra space before last semicolon. If this should be an empty '
2870           'statement, use {} instead.')
2871
2872   # In range-based for, we wanted spaces before and after the colon, but
2873   # not around "::" tokens that might appear.
2874   if (Search('for *\(.*[^:]:[^: ]', line) or
2875       Search('for *\(.*[^: ]:[^:]', line)):
2876     error(filename, linenum, 'whitespace/forcolon', 2,
2877           'Missing space around colon in range-based for loop')
2878
2879
2880 def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
2881   """Checks for additional blank line issues related to sections.
2882
2883   Currently the only thing checked here is blank line before protected/private.
2884
2885   Args:
2886     filename: The name of the current file.
2887     clean_lines: A CleansedLines instance containing the file.
2888     class_info: A _ClassInfo objects.
2889     linenum: The number of the line to check.
2890     error: The function to call with any errors found.
2891   """
2892   # Skip checks if the class is small, where small means 25 lines or less.
2893   # 25 lines seems like a good cutoff since that's the usual height of
2894   # terminals, and any class that can't fit in one screen can't really
2895   # be considered "small".
2896   #
2897   # Also skip checks if we are on the first line.  This accounts for
2898   # classes that look like
2899   #   class Foo { public: ... };
2900   #
2901   # If we didn't find the end of the class, last_line would be zero,
2902   # and the check will be skipped by the first condition.
2903   if (class_info.last_line - class_info.starting_linenum <= 24 or
2904       linenum <= class_info.starting_linenum):
2905     return
2906
2907   matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
2908   if matched:
2909     # Issue warning if the line before public/protected/private was
2910     # not a blank line, but don't do this if the previous line contains
2911     # "class" or "struct".  This can happen two ways:
2912     #  - We are at the beginning of the class.
2913     #  - We are forward-declaring an inner class that is semantically
2914     #    private, but needed to be public for implementation reasons.
2915     # Also ignores cases where the previous line ends with a backslash as can be
2916     # common when defining classes in C macros.
2917     prev_line = clean_lines.lines[linenum - 1]
2918     if (not IsBlankLine(prev_line) and
2919         not Search(r'\b(class|struct)\b', prev_line) and
2920         not Search(r'\\$', prev_line)):
2921       # Try a bit harder to find the beginning of the class.  This is to
2922       # account for multi-line base-specifier lists, e.g.:
2923       #   class Derived
2924       #       : public Base {
2925       end_class_head = class_info.starting_linenum
2926       for i in range(class_info.starting_linenum, linenum):
2927         if Search(r'\{\s*$', clean_lines.lines[i]):
2928           end_class_head = i
2929           break
2930       if end_class_head < linenum - 1:
2931         error(filename, linenum, 'whitespace/blank_line', 3,
2932               '"%s:" should be preceded by a blank line' % matched.group(1))
2933
2934
2935 def GetPreviousNonBlankLine(clean_lines, linenum):
2936   """Return the most recent non-blank line and its line number.
2937
2938   Args:
2939     clean_lines: A CleansedLines instance containing the file contents.
2940     linenum: The number of the line to check.
2941
2942   Returns:
2943     A tuple with two elements.  The first element is the contents of the last
2944     non-blank line before the current line, or the empty string if this is the
2945     first non-blank line.  The second is the line number of that line, or -1
2946     if this is the first non-blank line.
2947   """
2948
2949   prevlinenum = linenum - 1
2950   while prevlinenum >= 0:
2951     prevline = clean_lines.elided[prevlinenum]
2952     if not IsBlankLine(prevline):     # if not a blank line...
2953       return (prevline, prevlinenum)
2954     prevlinenum -= 1
2955   return ('', -1)
2956
2957
2958 def CheckBraces(filename, clean_lines, linenum, error):
2959   """Looks for misplaced braces (e.g. at the end of line).
2960
2961   Args:
2962     filename: The name of the current file.
2963     clean_lines: A CleansedLines instance containing the file.
2964     linenum: The number of the line to check.
2965     error: The function to call with any errors found.
2966   """
2967
2968   line = clean_lines.elided[linenum]        # get rid of comments and strings
2969
2970   if Match(r'\s*{\s*$', line):
2971     # We allow an open brace to start a line in the case where someone is using
2972     # braces in a block to explicitly create a new scope, which is commonly used
2973     # to control the lifetime of stack-allocated variables.  Braces are also
2974     # used for brace initializers inside function calls.  We don't detect this
2975     # perfectly: we just don't complain if the last non-whitespace character on
2976     # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
2977     # previous line starts a preprocessor block.
2978     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2979     if (not Search(r'[,;:}{(]\s*$', prevline) and
2980         not Match(r'\s*#', prevline)):
2981       error(filename, linenum, 'whitespace/braces', 4,
2982             '{ should almost always be at the end of the previous line')
2983
2984   # An else clause should be on the same line as the preceding closing brace.
2985   if Match(r'\s*else\s*', line):
2986     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
2987     if Match(r'\s*}\s*$', prevline):
2988       error(filename, linenum, 'whitespace/newline', 4,
2989             'An else should appear on the same line as the preceding }')
2990
2991   # If braces come on one side of an else, they should be on both.
2992   # However, we have to worry about "else if" that spans multiple lines!
2993   if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
2994     if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
2995       # find the ( after the if
2996       pos = line.find('else if')
2997       pos = line.find('(', pos)
2998       if pos > 0:
2999         (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
3000         if endline[endpos:].find('{') == -1:    # must be brace after if
3001           error(filename, linenum, 'readability/braces', 5,
3002                 'If an else has a brace on one side, it should have it on both')
3003     else:            # common case: else not followed by a multi-line if
3004       error(filename, linenum, 'readability/braces', 5,
3005             'If an else has a brace on one side, it should have it on both')
3006
3007   # Likewise, an else should never have the else clause on the same line
3008   if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
3009     error(filename, linenum, 'whitespace/newline', 4,
3010           'Else clause should never be on same line as else (use 2 lines)')
3011
3012   # In the same way, a do/while should never be on one line
3013   if Match(r'\s*do [^\s{]', line):
3014     error(filename, linenum, 'whitespace/newline', 4,
3015           'do/while clauses should not be on a single line')
3016
3017   # Block bodies should not be followed by a semicolon.  Due to C++11
3018   # brace initialization, there are more places where semicolons are
3019   # required than not, so we use a whitelist approach to check these
3020   # rather than a blacklist.  These are the places where "};" should
3021   # be replaced by just "}":
3022   # 1. Some flavor of block following closing parenthesis:
3023   #    for (;;) {};
3024   #    while (...) {};
3025   #    switch (...) {};
3026   #    Function(...) {};
3027   #    if (...) {};
3028   #    if (...) else if (...) {};
3029   #
3030   # 2. else block:
3031   #    if (...) else {};
3032   #
3033   # 3. const member function:
3034   #    Function(...) const {};
3035   #
3036   # 4. Block following some statement:
3037   #    x = 42;
3038   #    {};
3039   #
3040   # 5. Block at the beginning of a function:
3041   #    Function(...) {
3042   #      {};
3043   #    }
3044   #
3045   #    Note that naively checking for the preceding "{" will also match
3046   #    braces inside multi-dimensional arrays, but this is fine since
3047   #    that expression will not contain semicolons.
3048   #
3049   # 6. Block following another block:
3050   #    while (true) {}
3051   #    {};
3052   #
3053   # 7. End of namespaces:
3054   #    namespace {};
3055   #
3056   #    These semicolons seems far more common than other kinds of
3057   #    redundant semicolons, possibly due to people converting classes
3058   #    to namespaces.  For now we do not warn for this case.
3059   #
3060   # Try matching case 1 first.
3061   match = Match(r'^(.*\)\s*)\{', line)
3062   if match:
3063     # Matched closing parenthesis (case 1).  Check the token before the
3064     # matching opening parenthesis, and don't warn if it looks like a
3065     # macro.  This avoids these false positives:
3066     #  - macro that defines a base class
3067     #  - multi-line macro that defines a base class
3068     #  - macro that defines the whole class-head
3069     #
3070     # But we still issue warnings for macros that we know are safe to
3071     # warn, specifically:
3072     #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
3073     #  - TYPED_TEST
3074     #  - INTERFACE_DEF
3075     #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
3076     #
3077     # We implement a whitelist of safe macros instead of a blacklist of
3078     # unsafe macros, even though the latter appears less frequently in
3079     # google code and would have been easier to implement.  This is because
3080     # the downside for getting the whitelist wrong means some extra
3081     # semicolons, while the downside for getting the blacklist wrong
3082     # would result in compile errors.
3083     #
3084     # In addition to macros, we also don't want to warn on compound
3085     # literals.
3086     closing_brace_pos = match.group(1).rfind(')')
3087     opening_parenthesis = ReverseCloseExpression(
3088         clean_lines, linenum, closing_brace_pos)
3089     if opening_parenthesis[2] > -1:
3090       line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
3091       macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
3092       if ((macro and
3093            macro.group(1) not in (
3094                'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
3095                'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
3096                'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
3097           Search(r'\s+=\s*$', line_prefix)):
3098         match = None
3099
3100   else:
3101     # Try matching cases 2-3.
3102     match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
3103     if not match:
3104       # Try matching cases 4-6.  These are always matched on separate lines.
3105       #
3106       # Note that we can't simply concatenate the previous line to the
3107       # current line and do a single match, otherwise we may output
3108       # duplicate warnings for the blank line case:
3109       #   if (cond) {
3110       #     // blank line
3111       #   }
3112       prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
3113       if prevline and Search(r'[;{}]\s*$', prevline):
3114         match = Match(r'^(\s*)\{', line)
3115
3116   # Check matching closing brace
3117   if match:
3118     (endline, endlinenum, endpos) = CloseExpression(
3119         clean_lines, linenum, len(match.group(1)))
3120     if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
3121       # Current {} pair is eligible for semicolon check, and we have found
3122       # the redundant semicolon, output warning here.
3123       #
3124       # Note: because we are scanning forward for opening braces, and
3125       # outputting warnings for the matching closing brace, if there are
3126       # nested blocks with trailing semicolons, we will get the error
3127       # messages in reversed order.
3128       error(filename, endlinenum, 'readability/braces', 4,
3129             "You don't need a ; after a }")
3130
3131
3132 def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
3133   """Look for empty loop/conditional body with only a single semicolon.
3134
3135   Args:
3136     filename: The name of the current file.
3137     clean_lines: A CleansedLines instance containing the file.
3138     linenum: The number of the line to check.
3139     error: The function to call with any errors found.
3140   """
3141
3142   # Search for loop keywords at the beginning of the line.  Because only
3143   # whitespaces are allowed before the keywords, this will also ignore most
3144   # do-while-loops, since those lines should start with closing brace.
3145   #
3146   # We also check "if" blocks here, since an empty conditional block
3147   # is likely an error.
3148   line = clean_lines.elided[linenum]
3149   matched = Match(r'\s*(for|while|if)\s*\(', line)
3150   if matched:
3151     # Find the end of the conditional expression
3152     (end_line, end_linenum, end_pos) = CloseExpression(
3153         clean_lines, linenum, line.find('('))
3154
3155     # Output warning if what follows the condition expression is a semicolon.
3156     # No warning for all other cases, including whitespace or newline, since we
3157     # have a separate check for semicolons preceded by whitespace.
3158     if end_pos >= 0 and Match(r';', end_line[end_pos:]):
3159       if matched.group(1) == 'if':
3160         error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
3161               'Empty conditional bodies should use {}')
3162       else:
3163         error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
3164               'Empty loop bodies should use {} or continue')
3165
3166
3167 def CheckCheck(filename, clean_lines, linenum, error):
3168   """Checks the use of CHECK and EXPECT macros.
3169
3170   Args:
3171     filename: The name of the current file.
3172     clean_lines: A CleansedLines instance containing the file.
3173     linenum: The number of the line to check.
3174     error: The function to call with any errors found.
3175   """
3176
3177   # Decide the set of replacement macros that should be suggested
3178   lines = clean_lines.elided
3179   check_macro = None
3180   start_pos = -1
3181   for macro in _CHECK_MACROS:
3182     i = lines[linenum].find(macro)
3183     if i >= 0:
3184       check_macro = macro
3185
3186       # Find opening parenthesis.  Do a regular expression match here
3187       # to make sure that we are matching the expected CHECK macro, as
3188       # opposed to some other macro that happens to contain the CHECK
3189       # substring.
3190       matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
3191       if not matched:
3192         continue
3193       start_pos = len(matched.group(1))
3194       break
3195   if not check_macro or start_pos < 0:
3196     # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
3197     return
3198
3199   # Find end of the boolean expression by matching parentheses
3200   (last_line, end_line, end_pos) = CloseExpression(
3201       clean_lines, linenum, start_pos)
3202   if end_pos < 0:
3203     return
3204   if linenum == end_line:
3205     expression = lines[linenum][start_pos + 1:end_pos - 1]
3206   else:
3207     expression = lines[linenum][start_pos + 1:]
3208     for i in xrange(linenum + 1, end_line):
3209       expression += lines[i]
3210     expression += last_line[0:end_pos - 1]
3211
3212   # Parse expression so that we can take parentheses into account.
3213   # This avoids false positives for inputs like "CHECK((a < 4) == b)",
3214   # which is not replaceable by CHECK_LE.
3215   lhs = ''
3216   rhs = ''
3217   operator = None
3218   while expression:
3219     matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
3220                     r'==|!=|>=|>|<=|<|\()(.*)$', expression)
3221     if matched:
3222       token = matched.group(1)
3223       if token == '(':
3224         # Parenthesized operand
3225         expression = matched.group(2)
3226         (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
3227         if end < 0:
3228           return  # Unmatched parenthesis
3229         lhs += '(' + expression[0:end]
3230         expression = expression[end:]
3231       elif token in ('&&', '||'):
3232         # Logical and/or operators.  This means the expression
3233         # contains more than one term, for example:
3234         #   CHECK(42 < a && a < b);
3235         #
3236         # These are not replaceable with CHECK_LE, so bail out early.
3237         return
3238       elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
3239         # Non-relational operator
3240         lhs += token
3241         expression = matched.group(2)
3242       else:
3243         # Relational operator
3244         operator = token
3245         rhs = matched.group(2)
3246         break
3247     else:
3248       # Unparenthesized operand.  Instead of appending to lhs one character
3249       # at a time, we do another regular expression match to consume several
3250       # characters at once if possible.  Trivial benchmark shows that this
3251       # is more efficient when the operands are longer than a single
3252       # character, which is generally the case.
3253       matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
3254       if not matched:
3255         matched = Match(r'^(\s*\S)(.*)$', expression)
3256         if not matched:
3257           break
3258       lhs += matched.group(1)
3259       expression = matched.group(2)
3260
3261   # Only apply checks if we got all parts of the boolean expression
3262   if not (lhs and operator and rhs):
3263     return
3264
3265   # Check that rhs do not contain logical operators.  We already know
3266   # that lhs is fine since the loop above parses out && and ||.
3267   if rhs.find('&&') > -1 or rhs.find('||') > -1:
3268     return
3269
3270   # At least one of the operands must be a constant literal.  This is
3271   # to avoid suggesting replacements for unprintable things like
3272   # CHECK(variable != iterator)
3273   #
3274   # The following pattern matches decimal, hex integers, strings, and
3275   # characters (in that order).
3276   lhs = lhs.strip()
3277   rhs = rhs.strip()
3278   match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
3279   if Match(match_constant, lhs) or Match(match_constant, rhs):
3280     # Note: since we know both lhs and rhs, we can provide a more
3281     # descriptive error message like:
3282     #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
3283     # Instead of:
3284     #   Consider using CHECK_EQ instead of CHECK(a == b)
3285     #
3286     # We are still keeping the less descriptive message because if lhs
3287     # or rhs gets long, the error message might become unreadable.
3288     error(filename, linenum, 'readability/check', 2,
3289           'Consider using %s instead of %s(a %s b)' % (
3290               _CHECK_REPLACEMENT[check_macro][operator],
3291               check_macro, operator))
3292
3293
3294 def CheckAltTokens(filename, clean_lines, linenum, error):
3295   """Check alternative keywords being used in boolean expressions.
3296
3297   Args:
3298     filename: The name of the current file.
3299     clean_lines: A CleansedLines instance containing the file.
3300     linenum: The number of the line to check.
3301     error: The function to call with any errors found.
3302   """
3303   line = clean_lines.elided[linenum]
3304
3305   # Avoid preprocessor lines
3306   if Match(r'^\s*#', line):
3307     return
3308
3309   # Last ditch effort to avoid multi-line comments.  This will not help
3310   # if the comment started before the current line or ended after the
3311   # current line, but it catches most of the false positives.  At least,
3312   # it provides a way to workaround this warning for people who use
3313   # multi-line comments in preprocessor macros.
3314   #
3315   # TODO(unknown): remove this once cpplint has better support for
3316   # multi-line comments.
3317   if line.find('/*') >= 0 or line.find('*/') >= 0:
3318     return
3319
3320   for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
3321     error(filename, linenum, 'readability/alt_tokens', 2,
3322           'Use operator %s instead of %s' % (
3323               _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
3324
3325
3326 def GetLineWidth(line):
3327   """Determines the width of the line in column positions.
3328
3329   Args:
3330     line: A string, which may be a Unicode string.
3331
3332   Returns:
3333     The width of the line in column positions, accounting for Unicode
3334     combining characters and wide characters.
3335   """
3336   if isinstance(line, unicode):
3337     width = 0
3338     for uc in unicodedata.normalize('NFC', line):
3339       if unicodedata.east_asian_width(uc) in ('W', 'F'):
3340         width += 2
3341       elif not unicodedata.combining(uc):
3342         width += 1
3343     return width
3344   else:
3345     return len(line)
3346
3347
3348 def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
3349                error):
3350   """Checks rules from the 'C++ style rules' section of cppguide.html.
3351
3352   Most of these rules are hard to test (naming, comment style), but we
3353   do what we can.  In particular we check for 2-space indents, line lengths,
3354   tab usage, spaces inside code, etc.
3355
3356   Args:
3357     filename: The name of the current file.
3358     clean_lines: A CleansedLines instance containing the file.
3359     linenum: The number of the line to check.
3360     file_extension: The extension (without the dot) of the filename.
3361     nesting_state: A _NestingState instance which maintains information about
3362                    the current stack of nested blocks being parsed.
3363     error: The function to call with any errors found.
3364   """
3365
3366   # Don't use "elided" lines here, otherwise we can't check commented lines.
3367   # Don't want to use "raw" either, because we don't want to check inside C++11
3368   # raw strings,
3369   raw_lines = clean_lines.lines_without_raw_strings
3370   line = raw_lines[linenum]
3371
3372   if line.find('\t') != -1:
3373     error(filename, linenum, 'whitespace/tab', 1,
3374           'Tab found; better to use spaces')
3375
3376   # One or three blank spaces at the beginning of the line is weird; it's
3377   # hard to reconcile that with 2-space indents.
3378   # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
3379   # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
3380   # if(RLENGTH > 20) complain = 0;
3381   # if(match($0, " +(error|private|public|protected):")) complain = 0;
3382   # if(match(prev, "&& *$")) complain = 0;
3383   # if(match(prev, "\\|\\| *$")) complain = 0;
3384   # if(match(prev, "[\",=><] *$")) complain = 0;
3385   # if(match($0, " <<")) complain = 0;
3386   # if(match(prev, " +for \\(")) complain = 0;
3387   # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
3388   initial_spaces = 0
3389   cleansed_line = clean_lines.elided[linenum]
3390   while initial_spaces < len(line) and line[initial_spaces] == ' ':
3391     initial_spaces += 1
3392   if line and line[-1].isspace():
3393     error(filename, linenum, 'whitespace/end_of_line', 4,
3394           'Line ends in whitespace.  Consider deleting these extra spaces.')
3395   # There are certain situations we allow one space, notably for section labels
3396   elif ((initial_spaces == 1 or initial_spaces == 3) and
3397         not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
3398     error(filename, linenum, 'whitespace/indent', 3,
3399           'Weird number of spaces at line-start.  '
3400           'Are you using a 2-space indent?')
3401
3402   # Check if the line is a header guard.
3403   is_header_guard = False
3404   if file_extension == 'h':
3405     cppvar = GetHeaderGuardCPPVariable(filename)
3406     if (line.startswith('#ifndef %s' % cppvar) or
3407         line.startswith('#define %s' % cppvar) or
3408         line.startswith('#endif  // %s' % cppvar)):
3409       is_header_guard = True
3410   # #include lines and header guards can be long, since there's no clean way to
3411   # split them.
3412   #
3413   # URLs can be long too.  It's possible to split these, but it makes them
3414   # harder to cut&paste.
3415   #
3416   # The "$Id:...$" comment may also get very long without it being the
3417   # developers fault.
3418   if (not line.startswith('#include') and not is_header_guard and
3419       not Match(r'^\s*//.*http(s?)://\S*$', line) and
3420       not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
3421     line_width = GetLineWidth(line)
3422     extended_length = int((_line_length * 1.25))
3423     if line_width > extended_length:
3424       error(filename, linenum, 'whitespace/line_length', 4,
3425             'Lines should very rarely be longer than %i characters' %
3426             extended_length)
3427     elif line_width > _line_length:
3428       error(filename, linenum, 'whitespace/line_length', 2,
3429             'Lines should be <= %i characters long' % _line_length)
3430
3431   if (cleansed_line.count(';') > 1 and
3432       # for loops are allowed two ;'s (and may run over two lines).
3433       cleansed_line.find('for') == -1 and
3434       (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
3435        GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
3436       # It's ok to have many commands in a switch case that fits in 1 line
3437       not ((cleansed_line.find('case ') != -1 or
3438             cleansed_line.find('default:') != -1) and
3439            cleansed_line.find('break;') != -1)):
3440     error(filename, linenum, 'whitespace/newline', 0,
3441           'More than one command on the same line')
3442
3443   # Some more style checks
3444   CheckBraces(filename, clean_lines, linenum, error)
3445   CheckEmptyBlockBody(filename, clean_lines, linenum, error)
3446   CheckAccess(filename, clean_lines, linenum, nesting_state, error)
3447   CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
3448   CheckCheck(filename, clean_lines, linenum, error)
3449   CheckAltTokens(filename, clean_lines, linenum, error)
3450   classinfo = nesting_state.InnermostClass()
3451   if classinfo:
3452     CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
3453
3454
3455 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
3456 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
3457 # Matches the first component of a filename delimited by -s and _s. That is:
3458 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
3459 #  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
3460 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
3461 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
3462 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
3463
3464
3465 def _DropCommonSuffixes(filename):
3466   """Drops common suffixes like _test.cc or -inl.h from filename.
3467
3468   For example:
3469     >>> _DropCommonSuffixes('foo/foo-inl.h')
3470     'foo/foo'
3471     >>> _DropCommonSuffixes('foo/bar/foo.cc')
3472     'foo/bar/foo'
3473     >>> _DropCommonSuffixes('foo/foo_internal.h')
3474     'foo/foo'
3475     >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
3476     'foo/foo_unusualinternal'
3477
3478   Args:
3479     filename: The input filename.
3480
3481   Returns:
3482     The filename with the common suffix removed.
3483   """
3484   for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
3485                  'inl.h', 'impl.h', 'internal.h'):
3486     if (filename.endswith(suffix) and len(filename) > len(suffix) and
3487         filename[-len(suffix) - 1] in ('-', '_')):
3488       return filename[:-len(suffix) - 1]
3489   return os.path.splitext(filename)[0]
3490
3491
3492 def _IsTestFilename(filename):
3493   """Determines if the given filename has a suffix that identifies it as a test.
3494
3495   Args:
3496     filename: The input filename.
3497
3498   Returns:
3499     True if 'filename' looks like a test, False otherwise.
3500   """
3501   if (filename.endswith('_test.cc') or
3502       filename.endswith('_unittest.cc') or
3503       filename.endswith('_regtest.cc')):
3504     return True
3505   else:
3506     return False
3507
3508
3509 def _ClassifyInclude(fileinfo, include, is_system):
3510   """Figures out what kind of header 'include' is.
3511
3512   Args:
3513     fileinfo: The current file cpplint is running over. A FileInfo instance.
3514     include: The path to a #included file.
3515     is_system: True if the #include used <> rather than "".
3516
3517   Returns:
3518     One of the _XXX_HEADER constants.
3519
3520   For example:
3521     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
3522     _C_SYS_HEADER
3523     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
3524     _CPP_SYS_HEADER
3525     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
3526     _LIKELY_MY_HEADER
3527     >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
3528     ...                  'bar/foo_other_ext.h', False)
3529     _POSSIBLE_MY_HEADER
3530     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
3531     _OTHER_HEADER
3532   """
3533   # This is a list of all standard c++ header files, except
3534   # those already checked for above.
3535   is_cpp_h = include in _CPP_HEADERS
3536
3537   if is_system:
3538     if is_cpp_h:
3539       return _CPP_SYS_HEADER
3540     else:
3541       return _C_SYS_HEADER
3542
3543   # If the target file and the include we're checking share a
3544   # basename when we drop common extensions, and the include
3545   # lives in . , then it's likely to be owned by the target file.
3546   target_dir, target_base = (
3547       os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
3548   include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
3549   if target_base == include_base and (
3550       include_dir == target_dir or
3551       include_dir == os.path.normpath(target_dir + '/../public')):
3552     return _LIKELY_MY_HEADER
3553
3554   # If the target and include share some initial basename
3555   # component, it's possible the target is implementing the
3556   # include, so it's allowed to be first, but we'll never
3557   # complain if it's not there.
3558   target_first_component = _RE_FIRST_COMPONENT.match(target_base)
3559   include_first_component = _RE_FIRST_COMPONENT.match(include_base)
3560   if (target_first_component and include_first_component and
3561       target_first_component.group(0) ==
3562       include_first_component.group(0)):
3563     return _POSSIBLE_MY_HEADER
3564
3565   return _OTHER_HEADER
3566
3567
3568
3569 def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
3570   """Check rules that are applicable to #include lines.
3571
3572   Strings on #include lines are NOT removed from elided line, to make
3573   certain tasks easier. However, to prevent false positives, checks
3574   applicable to #include lines in CheckLanguage must be put here.
3575
3576   Args:
3577     filename: The name of the current file.
3578     clean_lines: A CleansedLines instance containing the file.
3579     linenum: The number of the line to check.
3580     include_state: An _IncludeState instance in which the headers are inserted.
3581     error: The function to call with any errors found.
3582   """
3583   fileinfo = FileInfo(filename)
3584
3585   line = clean_lines.lines[linenum]
3586
3587   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
3588   if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
3589     error(filename, linenum, 'build/include', 4,
3590           'Include the directory when naming .h files')
3591
3592   # we shouldn't include a file more than once. actually, there are a
3593   # handful of instances where doing so is okay, but in general it's
3594   # not.
3595   match = _RE_PATTERN_INCLUDE.search(line)
3596   if match:
3597     include = match.group(2)
3598     is_system = (match.group(1) == '<')
3599     if include in include_state:
3600       error(filename, linenum, 'build/include', 4,
3601             '"%s" already included at %s:%s' %
3602             (include, filename, include_state[include]))
3603     else:
3604       include_state[include] = linenum
3605
3606       # We want to ensure that headers appear in the right order:
3607       # 1) for foo.cc, foo.h  (preferred location)
3608       # 2) c system files
3609       # 3) cpp system files
3610       # 4) for foo.cc, foo.h  (deprecated location)
3611       # 5) other google headers
3612       #
3613       # We classify each include statement as one of those 5 types
3614       # using a number of techniques. The include_state object keeps
3615       # track of the highest type seen, and complains if we see a
3616       # lower type after that.
3617       error_message = include_state.CheckNextIncludeOrder(
3618           _ClassifyInclude(fileinfo, include, is_system))
3619       if error_message:
3620         error(filename, linenum, 'build/include_order', 4,
3621               '%s. Should be: %s.h, c system, c++ system, other.' %
3622               (error_message, fileinfo.BaseName()))
3623       canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
3624       if not include_state.IsInAlphabeticalOrder(
3625           clean_lines, linenum, canonical_include):
3626         error(filename, linenum, 'build/include_alpha', 4,
3627               'Include "%s" not in alphabetical order' % include)
3628       include_state.SetLastHeader(canonical_include)
3629
3630   # Look for any of the stream classes that are part of standard C++.
3631   match = _RE_PATTERN_INCLUDE.match(line)
3632   if match:
3633     include = match.group(2)
3634     if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
3635       # Many unit tests use cout, so we exempt them.
3636       if not _IsTestFilename(filename):
3637         error(filename, linenum, 'readability/streams', 3,
3638               'Streams are highly discouraged.')
3639
3640
3641 def _GetTextInside(text, start_pattern):
3642   r"""Retrieves all the text between matching open and close parentheses.
3643
3644   Given a string of lines and a regular expression string, retrieve all the text
3645   following the expression and between opening punctuation symbols like
3646   (, [, or {, and the matching close-punctuation symbol. This properly nested
3647   occurrences of the punctuations, so for the text like
3648     printf(a(), b(c()));
3649   a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
3650   start_pattern must match string having an open punctuation symbol at the end.
3651
3652   Args:
3653     text: The lines to extract text. Its comments and strings must be elided.
3654            It can be single line and can span multiple lines.
3655     start_pattern: The regexp string indicating where to start extracting
3656                    the text.
3657   Returns:
3658     The extracted text.
3659     None if either the opening string or ending punctuation could not be found.
3660   """
3661   # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
3662   # rewritten to use _GetTextInside (and use inferior regexp matching today).
3663
3664   # Give opening punctuations to get the matching close-punctuations.
3665   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
3666   closing_punctuation = set(matching_punctuation.itervalues())
3667
3668   # Find the position to start extracting text.
3669   match = re.search(start_pattern, text, re.M)
3670   if not match:  # start_pattern not found in text.
3671     return None
3672   start_position = match.end(0)
3673
3674   assert start_position > 0, (
3675       'start_pattern must ends with an opening punctuation.')
3676   assert text[start_position - 1] in matching_punctuation, (
3677       'start_pattern must ends with an opening punctuation.')
3678   # Stack of closing punctuations we expect to have in text after position.
3679   punctuation_stack = [matching_punctuation[text[start_position - 1]]]
3680   position = start_position
3681   while punctuation_stack and position < len(text):
3682     if text[position] == punctuation_stack[-1]:
3683       punctuation_stack.pop()
3684     elif text[position] in closing_punctuation:
3685       # A closing punctuation without matching opening punctuations.
3686       return None
3687     elif text[position] in matching_punctuation:
3688       punctuation_stack.append(matching_punctuation[text[position]])
3689     position += 1
3690   if punctuation_stack:
3691     # Opening punctuations left without matching close-punctuations.
3692     return None
3693   # punctuations match.
3694   return text[start_position:position - 1]
3695
3696
3697 # Patterns for matching call-by-reference parameters.
3698 #
3699 # Supports nested templates up to 2 levels deep using this messy pattern:
3700 #   < (?: < (?: < [^<>]*
3701 #               >
3702 #           |   [^<>] )*
3703 #         >
3704 #     |   [^<>] )*
3705 #   >
3706 _RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
3707 _RE_PATTERN_TYPE = (
3708     r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
3709     r'(?:\w|'
3710     r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
3711     r'::)+')
3712 # A call-by-reference parameter ends with '& identifier'.
3713 _RE_PATTERN_REF_PARAM = re.compile(
3714     r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
3715     r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
3716 # A call-by-const-reference parameter either ends with 'const& identifier'
3717 # or looks like 'const type& identifier' when 'type' is atomic.
3718 _RE_PATTERN_CONST_REF_PARAM = (
3719     r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
3720     r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
3721
3722
3723 def CheckLanguage(filename, clean_lines, linenum, file_extension,
3724                   include_state, nesting_state, error):
3725   """Checks rules from the 'C++ language rules' section of cppguide.html.
3726
3727   Some of these rules are hard to test (function overloading, using
3728   uint32 inappropriately), but we do the best we can.
3729
3730   Args:
3731     filename: The name of the current file.
3732     clean_lines: A CleansedLines instance containing the file.
3733     linenum: The number of the line to check.
3734     file_extension: The extension (without the dot) of the filename.
3735     include_state: An _IncludeState instance in which the headers are inserted.
3736     nesting_state: A _NestingState instance which maintains information about
3737                    the current stack of nested blocks being parsed.
3738     error: The function to call with any errors found.
3739   """
3740   # If the line is empty or consists of entirely a comment, no need to
3741   # check it.
3742   line = clean_lines.elided[linenum]
3743   if not line:
3744     return
3745
3746   match = _RE_PATTERN_INCLUDE.search(line)
3747   if match:
3748     CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
3749     return
3750
3751   # Reset include state across preprocessor directives.  This is meant
3752   # to silence warnings for conditional includes.
3753   if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
3754     include_state.ResetSection()
3755
3756   # Make Windows paths like Unix.
3757   fullname = os.path.abspath(filename).replace('\\', '/')
3758
3759   # TODO(unknown): figure out if they're using default arguments in fn proto.
3760
3761   # Check to see if they're using an conversion function cast.
3762   # I just try to capture the most common basic types, though there are more.
3763   # Parameterless conversion functions, such as bool(), are allowed as they are
3764   # probably a member operator declaration or default constructor.
3765   match = Search(
3766       r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
3767       r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
3768       r'(\([^)].*)', line)
3769   if match:
3770     matched_new = match.group(1)
3771     matched_type = match.group(2)
3772     matched_funcptr = match.group(3)
3773
3774     # gMock methods are defined using some variant of MOCK_METHODx(name, type)
3775     # where type may be float(), int(string), etc.  Without context they are
3776     # virtually indistinguishable from int(x) casts. Likewise, gMock's
3777     # MockCallback takes a template parameter of the form return_type(arg_type),
3778     # which looks much like the cast we're trying to detect.
3779     #
3780     # std::function<> wrapper has a similar problem.
3781     #
3782     # Return types for function pointers also look like casts if they
3783     # don't have an extra space.
3784     if (matched_new is None and  # If new operator, then this isn't a cast
3785         not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
3786              Search(r'\bMockCallback<.*>', line) or
3787              Search(r'\bstd::function<.*>', line)) and
3788         not (matched_funcptr and
3789              Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
3790                    matched_funcptr))):
3791       # Try a bit harder to catch gmock lines: the only place where
3792       # something looks like an old-style cast is where we declare the
3793       # return type of the mocked method, and the only time when we
3794       # are missing context is if MOCK_METHOD was split across
3795       # multiple lines.  The missing MOCK_METHOD is usually one or two
3796       # lines back, so scan back one or two lines.
3797       #
3798       # It's not possible for gmock macros to appear in the first 2
3799       # lines, since the class head + section name takes up 2 lines.
3800       if (linenum < 2 or
3801           not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
3802                      clean_lines.elided[linenum - 1]) or
3803                Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
3804                      clean_lines.elided[linenum - 2]))):
3805         error(filename, linenum, 'readability/casting', 4,
3806               'Using deprecated casting style.  '
3807               'Use static_cast<%s>(...) instead' %
3808               matched_type)
3809
3810   CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3811                   'static_cast',
3812                   r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
3813
3814   # This doesn't catch all cases. Consider (const char * const)"hello".
3815   #
3816   # (char *) "foo" should always be a const_cast (reinterpret_cast won't
3817   # compile).
3818   if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3819                      'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
3820     pass
3821   else:
3822     # Check pointer casts for other than string constants
3823     CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
3824                     'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
3825
3826   # In addition, we look for people taking the address of a cast.  This
3827   # is dangerous -- casts can assign to temporaries, so the pointer doesn't
3828   # point where you think.
3829   match = Search(
3830       r'(?:&\(([^)]+)\)[\w(])|'
3831       r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
3832   if match and match.group(1) != '*':
3833     error(filename, linenum, 'runtime/casting', 4,
3834           ('Are you taking an address of a cast?  '
3835            'This is dangerous: could be a temp var.  '
3836            'Take the address before doing the cast, rather than after'))
3837
3838   # Create an extended_line, which is the concatenation of the current and
3839   # next lines, for more effective checking of code that may span more than one
3840   # line.
3841   if linenum + 1 < clean_lines.NumLines():
3842     extended_line = line + clean_lines.elided[linenum + 1]
3843   else:
3844     extended_line = line
3845
3846   # Check for people declaring static/global STL strings at the top level.
3847   # This is dangerous because the C++ language does not guarantee that
3848   # globals with constructors are initialized before the first access.
3849   match = Match(
3850       r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
3851       line)
3852   # Make sure it's not a function.
3853   # Function template specialization looks like: "string foo<Type>(...".
3854   # Class template definitions look like: "string Foo<Type>::Method(...".
3855   #
3856   # Also ignore things that look like operators.  These are matched separately
3857   # because operator names cross non-word boundaries.  If we change the pattern
3858   # above, we would decrease the accuracy of matching identifiers.
3859   if (match and
3860       not Search(r'\boperator\W', line) and
3861       not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
3862     error(filename, linenum, 'runtime/string', 4,
3863           'For a static/global string constant, use a C style string instead: '
3864           '"%schar %s[]".' %
3865           (match.group(1), match.group(2)))
3866
3867   if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
3868     error(filename, linenum, 'runtime/init', 4,
3869           'You seem to be initializing a member variable with itself.')
3870
3871   if file_extension == 'h':
3872     # TODO(unknown): check that 1-arg constructors are explicit.
3873     #                How to tell it's a constructor?
3874     #                (handled in CheckForNonStandardConstructs for now)
3875     # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
3876     #                (level 1 error)
3877     pass
3878
3879   # Check if people are using the verboten C basic types.  The only exception
3880   # we regularly allow is "unsigned short port" for port.
3881   if Search(r'\bshort port\b', line):
3882     if not Search(r'\bunsigned short port\b', line):
3883       error(filename, linenum, 'runtime/int', 4,
3884             'Use "unsigned short" for ports, not "short"')
3885   else:
3886     match = Search(r'\b(short|long(?! +double)|long long)\b', line)
3887     if match:
3888       error(filename, linenum, 'runtime/int', 4,
3889             'Use int16/int64/etc, rather than the C type %s' % match.group(1))
3890
3891   # When snprintf is used, the second argument shouldn't be a literal.
3892   match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
3893   if match and match.group(2) != '0':
3894     # If 2nd arg is zero, snprintf is used to calculate size.
3895     error(filename, linenum, 'runtime/printf', 3,
3896           'If you can, use sizeof(%s) instead of %s as the 2nd arg '
3897           'to snprintf.' % (match.group(1), match.group(2)))
3898
3899   # Check if some verboten C functions are being used.
3900   if Search(r'\bsprintf\b', line):
3901     error(filename, linenum, 'runtime/printf', 5,
3902           'Never use sprintf.  Use snprintf instead.')
3903   match = Search(r'\b(strcpy|strcat)\b', line)
3904   if match:
3905     error(filename, linenum, 'runtime/printf', 4,
3906           'Almost always, snprintf is better than %s' % match.group(1))
3907
3908   # Check if some verboten operator overloading is going on
3909   # TODO(unknown): catch out-of-line unary operator&:
3910   #   class X {};
3911   #   int operator&(const X& x) { return 42; }  // unary operator&
3912   # The trick is it's hard to tell apart from binary operator&:
3913   #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
3914   if Search(r'\boperator\s*&\s*\(\s*\)', line):
3915     error(filename, linenum, 'runtime/operator', 4,
3916           'Unary operator& is dangerous.  Do not use it.')
3917
3918   # Check for suspicious usage of "if" like
3919   # } if (a == b) {
3920   if Search(r'\}\s*if\s*\(', line):
3921     error(filename, linenum, 'readability/braces', 4,
3922           'Did you mean "else if"? If not, start a new line for "if".')
3923
3924   # Check for potential format string bugs like printf(foo).
3925   # We constrain the pattern not to pick things like DocidForPrintf(foo).
3926   # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
3927   # TODO(sugawarayu): Catch the following case. Need to change the calling
3928   # convention of the whole function to process multiple line to handle it.
3929   #   printf(
3930   #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
3931   printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
3932   if printf_args:
3933     match = Match(r'([\w.\->()]+)$', printf_args)
3934     if match and match.group(1) != '__VA_ARGS__':
3935       function_name = re.search(r'\b((?:string)?printf)\s*\(',
3936                                 line, re.I).group(1)
3937       error(filename, linenum, 'runtime/printf', 4,
3938             'Potential format string bug. Do %s("%%s", %s) instead.'
3939             % (function_name, match.group(1)))
3940
3941   # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
3942   match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
3943   if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
3944     error(filename, linenum, 'runtime/memset', 4,
3945           'Did you mean "memset(%s, 0, %s)"?'
3946           % (match.group(1), match.group(2)))
3947
3948   if Search(r'\busing namespace\b', line):
3949     error(filename, linenum, 'build/namespaces', 5,
3950           'Do not use namespace using-directives.  '
3951           'Use using-declarations instead.')
3952
3953   # Detect variable-length arrays.
3954   match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
3955   if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
3956       match.group(3).find(']') == -1):
3957     # Split the size using space and arithmetic operators as delimiters.
3958     # If any of the resulting tokens are not compile time constants then
3959     # report the error.
3960     tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
3961     is_const = True
3962     skip_next = False
3963     for tok in tokens:
3964       if skip_next:
3965         skip_next = False
3966         continue
3967
3968       if Search(r'sizeof\(.+\)', tok): continue
3969       if Search(r'arraysize\(\w+\)', tok): continue
3970
3971       tok = tok.lstrip('(')
3972       tok = tok.rstrip(')')
3973       if not tok: continue
3974       if Match(r'\d+', tok): continue
3975       if Match(r'0[xX][0-9a-fA-F]+', tok): continue
3976       if Match(r'k[A-Z0-9]\w*', tok): continue
3977       if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
3978       if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
3979       # A catch all for tricky sizeof cases, including 'sizeof expression',
3980       # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
3981       # requires skipping the next token because we split on ' ' and '*'.
3982       if tok.startswith('sizeof'):
3983         skip_next = True
3984         continue
3985       is_const = False
3986       break
3987     if not is_const:
3988       error(filename, linenum, 'runtime/arrays', 1,
3989             'Do not use variable-length arrays.  Use an appropriately named '
3990             "('k' followed by CamelCase) compile-time constant for the size.")
3991
3992   # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
3993   # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
3994   # in the class declaration.
3995   match = Match(
3996       (r'\s*'
3997        r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
3998        r'\(.*\);$'),
3999       line)
4000   if match and linenum + 1 < clean_lines.NumLines():
4001     next_line = clean_lines.elided[linenum + 1]
4002     # We allow some, but not all, declarations of variables to be present
4003     # in the statement that defines the class.  The [\w\*,\s]* fragment of
4004     # the regular expression below allows users to declare instances of
4005     # the class or pointers to instances, but not less common types such
4006     # as function pointers or arrays.  It's a tradeoff between allowing
4007     # reasonable code and avoiding trying to parse more C++ using regexps.
4008     if not Search(r'^\s*}[\w\*,\s]*;', next_line):
4009       error(filename, linenum, 'readability/constructors', 3,
4010             match.group(1) + ' should be the last thing in the class')
4011
4012   # Check for use of unnamed namespaces in header files.  Registration
4013   # macros are typically OK, so we allow use of "namespace {" on lines
4014   # that end with backslashes.
4015   if (file_extension == 'h'
4016       and Search(r'\bnamespace\s*{', line)
4017       and line[-1] != '\\'):
4018     error(filename, linenum, 'build/namespaces', 4,
4019           'Do not use unnamed namespaces in header files.  See '
4020           'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
4021           ' for more information.')
4022
4023 def CheckForNonConstReference(filename, clean_lines, linenum,
4024                               nesting_state, error):
4025   """Check for non-const references.
4026
4027   Separate from CheckLanguage since it scans backwards from current
4028   line, instead of scanning forward.
4029
4030   Args:
4031     filename: The name of the current file.
4032     clean_lines: A CleansedLines instance containing the file.
4033     linenum: The number of the line to check.
4034     nesting_state: A _NestingState instance which maintains information about
4035                    the current stack of nested blocks being parsed.
4036     error: The function to call with any errors found.
4037   """
4038   # Do nothing if there is no '&' on current line.
4039   line = clean_lines.elided[linenum]
4040   if '&' not in line:
4041     return
4042
4043   # Long type names may be broken across multiple lines, usually in one
4044   # of these forms:
4045   #   LongType
4046   #       ::LongTypeContinued &identifier
4047   #   LongType::
4048   #       LongTypeContinued &identifier
4049   #   LongType<
4050   #       ...>::LongTypeContinued &identifier
4051   #
4052   # If we detected a type split across two lines, join the previous
4053   # line to current line so that we can match const references
4054   # accordingly.
4055   #
4056   # Note that this only scans back one line, since scanning back
4057   # arbitrary number of lines would be expensive.  If you have a type
4058   # that spans more than 2 lines, please use a typedef.
4059   if linenum > 1:
4060     previous = None
4061     if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
4062       # previous_line\n + ::current_line
4063       previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
4064                         clean_lines.elided[linenum - 1])
4065     elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
4066       # previous_line::\n + current_line
4067       previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
4068                         clean_lines.elided[linenum - 1])
4069     if previous:
4070       line = previous.group(1) + line.lstrip()
4071     else:
4072       # Check for templated parameter that is split across multiple lines
4073       endpos = line.rfind('>')
4074       if endpos > -1:
4075         (_, startline, startpos) = ReverseCloseExpression(
4076             clean_lines, linenum, endpos)
4077         if startpos > -1 and startline < linenum:
4078           # Found the matching < on an earlier line, collect all
4079           # pieces up to current line.
4080           line = ''
4081           for i in xrange(startline, linenum + 1):
4082             line += clean_lines.elided[i].strip()
4083
4084   # Check for non-const references in function parameters.  A single '&' may
4085   # found in the following places:
4086   #   inside expression: binary & for bitwise AND
4087   #   inside expression: unary & for taking the address of something
4088   #   inside declarators: reference parameter
4089   # We will exclude the first two cases by checking that we are not inside a
4090   # function body, including one that was just introduced by a trailing '{'.
4091   # TODO(unknwon): Doesn't account for preprocessor directives.
4092   # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
4093   check_params = False
4094   if not nesting_state.stack:
4095     check_params = True  # top level
4096   elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
4097         isinstance(nesting_state.stack[-1], _NamespaceInfo)):
4098     check_params = True  # within class or namespace
4099   elif Match(r'.*{\s*$', line):
4100     if (len(nesting_state.stack) == 1 or
4101         isinstance(nesting_state.stack[-2], _ClassInfo) or
4102         isinstance(nesting_state.stack[-2], _NamespaceInfo)):
4103       check_params = True  # just opened global/class/namespace block
4104   # We allow non-const references in a few standard places, like functions
4105   # called "swap()" or iostream operators like "<<" or ">>".  Do not check
4106   # those function parameters.
4107   #
4108   # We also accept & in static_assert, which looks like a function but
4109   # it's actually a declaration expression.
4110   whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
4111                            r'operator\s*[<>][<>]|'
4112                            r'static_assert|COMPILE_ASSERT'
4113                            r')\s*\(')
4114   if Search(whitelisted_functions, line):
4115     check_params = False
4116   elif not Search(r'\S+\([^)]*$', line):
4117     # Don't see a whitelisted function on this line.  Actually we
4118     # didn't see any function name on this line, so this is likely a
4119     # multi-line parameter list.  Try a bit harder to catch this case.
4120     for i in xrange(2):
4121       if (linenum > i and
4122           Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
4123         check_params = False
4124         break
4125
4126   if check_params:
4127     decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
4128     for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
4129       if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
4130         error(filename, linenum, 'runtime/references', 2,
4131               'Is this a non-const reference? '
4132               'If so, make const or use a pointer: ' +
4133               ReplaceAll(' *<', '<', parameter))
4134
4135
4136 def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
4137                     error):
4138   """Checks for a C-style cast by looking for the pattern.
4139
4140   Args:
4141     filename: The name of the current file.
4142     linenum: The number of the line to check.
4143     line: The line of code to check.
4144     raw_line: The raw line of code to check, with comments.
4145     cast_type: The string for the C++ cast to recommend.  This is either
4146       reinterpret_cast, static_cast, or const_cast, depending.
4147     pattern: The regular expression used to find C-style casts.
4148     error: The function to call with any errors found.
4149
4150   Returns:
4151     True if an error was emitted.
4152     False otherwise.
4153   """
4154   match = Search(pattern, line)
4155   if not match:
4156     return False
4157
4158   # e.g., sizeof(int)
4159   sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
4160   if sizeof_match:
4161     error(filename, linenum, 'runtime/sizeof', 1,
4162           'Using sizeof(type).  Use sizeof(varname) instead if possible')
4163     return True
4164
4165   # operator++(int) and operator--(int)
4166   if (line[0:match.start(1) - 1].endswith(' operator++') or
4167       line[0:match.start(1) - 1].endswith(' operator--')):
4168     return False
4169
4170   # A single unnamed argument for a function tends to look like old
4171   # style cast.  If we see those, don't issue warnings for deprecated
4172   # casts, instead issue warnings for unnamed arguments where
4173   # appropriate.
4174   #
4175   # These are things that we want warnings for, since the style guide
4176   # explicitly require all parameters to be named:
4177   #   Function(int);
4178   #   Function(int) {
4179   #   ConstMember(int) const;
4180   #   ConstMember(int) const {
4181   #   ExceptionMember(int) throw (...);
4182   #   ExceptionMember(int) throw (...) {
4183   #   PureVirtual(int) = 0;
4184   #
4185   # These are functions of some sort, where the compiler would be fine
4186   # if they had named parameters, but people often omit those
4187   # identifiers to reduce clutter:
4188   #   (FunctionPointer)(int);
4189   #   (FunctionPointer)(int) = value;
4190   #   Function((function_pointer_arg)(int))
4191   #   <TemplateArgument(int)>;
4192   #   <(FunctionPointerTemplateArgument)(int)>;
4193   remainder = line[match.end(0):]
4194   if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
4195     # Looks like an unnamed parameter.
4196
4197     # Don't warn on any kind of template arguments.
4198     if Match(r'^\s*>', remainder):
4199       return False
4200
4201     # Don't warn on assignments to function pointers, but keep warnings for
4202     # unnamed parameters to pure virtual functions.  Note that this pattern
4203     # will also pass on assignments of "0" to function pointers, but the
4204     # preferred values for those would be "nullptr" or "NULL".
4205     matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
4206     if matched_zero and matched_zero.group(1) != '0':
4207       return False
4208
4209     # Don't warn on function pointer declarations.  For this we need
4210     # to check what came before the "(type)" string.
4211     if Match(r'.*\)\s*$', line[0:match.start(0)]):
4212       return False
4213
4214     # Don't warn if the parameter is named with block comments, e.g.:
4215     #  Function(int /*unused_param*/);
4216     if '/*' in raw_line:
4217       return False
4218
4219     # Passed all filters, issue warning here.
4220     error(filename, linenum, 'readability/function', 3,
4221           'All parameters should be named in a function')
4222     return True
4223
4224   # At this point, all that should be left is actual casts.
4225   error(filename, linenum, 'readability/casting', 4,
4226         'Using C-style cast.  Use %s<%s>(...) instead' %
4227         (cast_type, match.group(1)))
4228
4229   return True
4230
4231
4232 _HEADERS_CONTAINING_TEMPLATES = (
4233     ('<deque>', ('deque',)),
4234     ('<functional>', ('unary_function', 'binary_function',
4235                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
4236                       'negate',
4237                       'equal_to', 'not_equal_to', 'greater', 'less',
4238                       'greater_equal', 'less_equal',
4239                       'logical_and', 'logical_or', 'logical_not',
4240                       'unary_negate', 'not1', 'binary_negate', 'not2',
4241                       'bind1st', 'bind2nd',
4242                       'pointer_to_unary_function',
4243                       'pointer_to_binary_function',
4244                       'ptr_fun',
4245                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
4246                       'mem_fun_ref_t',
4247                       'const_mem_fun_t', 'const_mem_fun1_t',
4248                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
4249                       'mem_fun_ref',
4250                      )),
4251     ('<limits>', ('numeric_limits',)),
4252     ('<list>', ('list',)),
4253     ('<map>', ('map', 'multimap',)),
4254     ('<memory>', ('allocator',)),
4255     ('<queue>', ('queue', 'priority_queue',)),
4256     ('<set>', ('set', 'multiset',)),
4257     ('<stack>', ('stack',)),
4258     ('<string>', ('char_traits', 'basic_string',)),
4259     ('<utility>', ('pair',)),
4260     ('<vector>', ('vector',)),
4261
4262     # gcc extensions.
4263     # Note: std::hash is their hash, ::hash is our hash
4264     ('<hash_map>', ('hash_map', 'hash_multimap',)),
4265     ('<hash_set>', ('hash_set', 'hash_multiset',)),
4266     ('<slist>', ('slist',)),
4267     )
4268
4269 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
4270
4271 _re_pattern_algorithm_header = []
4272 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
4273                   'transform'):
4274   # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
4275   # type::max().
4276   _re_pattern_algorithm_header.append(
4277       (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
4278        _template,
4279        '<algorithm>'))
4280
4281 _re_pattern_templates = []
4282 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
4283   for _template in _templates:
4284     _re_pattern_templates.append(
4285         (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
4286          _template + '<>',
4287          _header))
4288
4289
4290 def FilesBelongToSameModule(filename_cc, filename_h):
4291   """Check if these two filenames belong to the same module.
4292
4293   The concept of a 'module' here is a as follows:
4294   foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
4295   same 'module' if they are in the same directory.
4296   some/path/public/xyzzy and some/path/internal/xyzzy are also considered
4297   to belong to the same module here.
4298
4299   If the filename_cc contains a longer path than the filename_h, for example,
4300   '/absolute/path/to/base/sysinfo.cc', and this file would include
4301   'base/sysinfo.h', this function also produces the prefix needed to open the
4302   header. This is used by the caller of this function to more robustly open the
4303   header file. We don't have access to the real include paths in this context,
4304   so we need this guesswork here.
4305
4306   Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
4307   according to this implementation. Because of this, this function gives
4308   some false positives. This should be sufficiently rare in practice.
4309
4310   Args:
4311     filename_cc: is the path for the .cc file
4312     filename_h: is the path for the header path
4313
4314   Returns:
4315     Tuple with a bool and a string:
4316     bool: True if filename_cc and filename_h belong to the same module.
4317     string: the additional prefix needed to open the header file.
4318   """
4319
4320   if not filename_cc.endswith('.cc'):
4321     return (False, '')
4322   filename_cc = filename_cc[:-len('.cc')]
4323   if filename_cc.endswith('_unittest'):
4324     filename_cc = filename_cc[:-len('_unittest')]
4325   elif filename_cc.endswith('_test'):
4326     filename_cc = filename_cc[:-len('_test')]
4327   filename_cc = filename_cc.replace('/public/', '/')
4328   filename_cc = filename_cc.replace('/internal/', '/')
4329
4330   if not filename_h.endswith('.h'):
4331     return (False, '')
4332   filename_h = filename_h[:-len('.h')]
4333   if filename_h.endswith('-inl'):
4334     filename_h = filename_h[:-len('-inl')]
4335   filename_h = filename_h.replace('/public/', '/')
4336   filename_h = filename_h.replace('/internal/', '/')
4337
4338   files_belong_to_same_module = filename_cc.endswith(filename_h)
4339   common_path = ''
4340   if files_belong_to_same_module:
4341     common_path = filename_cc[:-len(filename_h)]
4342   return files_belong_to_same_module, common_path
4343
4344
4345 def UpdateIncludeState(filename, include_state, io=codecs):
4346   """Fill up the include_state with new includes found from the file.
4347
4348   Args:
4349     filename: the name of the header to read.
4350     include_state: an _IncludeState instance in which the headers are inserted.
4351     io: The io factory to use to read the file. Provided for testability.
4352
4353   Returns:
4354     True if a header was succesfully added. False otherwise.
4355   """
4356   headerfile = None
4357   try:
4358     headerfile = io.open(filename, 'r', 'utf8', 'replace')
4359   except IOError:
4360     return False
4361   linenum = 0
4362   for line in headerfile:
4363     linenum += 1
4364     clean_line = CleanseComments(line)
4365     match = _RE_PATTERN_INCLUDE.search(clean_line)
4366     if match:
4367       include = match.group(2)
4368       # The value formatting is cute, but not really used right now.
4369       # What matters here is that the key is in include_state.
4370       include_state.setdefault(include, '%s:%d' % (filename, linenum))
4371   return True
4372
4373
4374 def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
4375                               io=codecs):
4376   """Reports for missing stl includes.
4377
4378   This function will output warnings to make sure you are including the headers
4379   necessary for the stl containers and functions that you use. We only give one
4380   reason to include a header. For example, if you use both equal_to<> and
4381   less<> in a .h file, only one (the latter in the file) of these will be
4382   reported as a reason to include the <functional>.
4383
4384   Args:
4385     filename: The name of the current file.
4386     clean_lines: A CleansedLines instance containing the file.
4387     include_state: An _IncludeState instance.
4388     error: The function to call with any errors found.
4389     io: The IO factory to use to read the header file. Provided for unittest
4390         injection.
4391   """
4392   required = {}  # A map of header name to linenumber and the template entity.
4393                  # Example of required: { '<functional>': (1219, 'less<>') }
4394
4395   for linenum in xrange(clean_lines.NumLines()):
4396     line = clean_lines.elided[linenum]
4397     if not line or line[0] == '#':
4398       continue
4399
4400     # String is special -- it is a non-templatized type in STL.
4401     matched = _RE_PATTERN_STRING.search(line)
4402     if matched:
4403       # Don't warn about strings in non-STL namespaces:
4404       # (We check only the first match per line; good enough.)
4405       prefix = line[:matched.start()]
4406       if prefix.endswith('std::') or not prefix.endswith('::'):
4407         required['<string>'] = (linenum, 'string')
4408
4409     for pattern, template, header in _re_pattern_algorithm_header:
4410       if pattern.search(line):
4411         required[header] = (linenum, template)
4412
4413     # The following function is just a speed up, no semantics are changed.
4414     if not '<' in line:  # Reduces the cpu time usage by skipping lines.
4415       continue
4416
4417     for pattern, template, header in _re_pattern_templates:
4418       if pattern.search(line):
4419         required[header] = (linenum, template)
4420
4421   # The policy is that if you #include something in foo.h you don't need to
4422   # include it again in foo.cc. Here, we will look at possible includes.
4423   # Let's copy the include_state so it is only messed up within this function.
4424   include_state = include_state.copy()
4425
4426   # Did we find the header for this file (if any) and succesfully load it?
4427   header_found = False
4428
4429   # Use the absolute path so that matching works properly.
4430   abs_filename = FileInfo(filename).FullName()
4431
4432   # For Emacs's flymake.
4433   # If cpplint is invoked from Emacs's flymake, a temporary file is generated
4434   # by flymake and that file name might end with '_flymake.cc'. In that case,
4435   # restore original file name here so that the corresponding header file can be
4436   # found.
4437   # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
4438   # instead of 'foo_flymake.h'
4439   abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
4440
4441   # include_state is modified during iteration, so we iterate over a copy of
4442   # the keys.
4443   header_keys = include_state.keys()
4444   for header in header_keys:
4445     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
4446     fullpath = common_path + header
4447     if same_module and UpdateIncludeState(fullpath, include_state, io):
4448       header_found = True
4449
4450   # If we can't find the header file for a .cc, assume it's because we don't
4451   # know where to look. In that case we'll give up as we're not sure they
4452   # didn't include it in the .h file.
4453   # TODO(unknown): Do a better job of finding .h files so we are confident that
4454   # not having the .h file means there isn't one.
4455   if filename.endswith('.cc') and not header_found:
4456     return
4457
4458   # All the lines have been processed, report the errors found.
4459   for required_header_unstripped in required:
4460     template = required[required_header_unstripped][1]
4461     if required_header_unstripped.strip('<>"') not in include_state:
4462       error(filename, required[required_header_unstripped][0],
4463             'build/include_what_you_use', 4,
4464             'Add #include ' + required_header_unstripped + ' for ' + template)
4465
4466
4467 _RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
4468
4469
4470 def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
4471   """Check that make_pair's template arguments are deduced.
4472
4473   G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
4474   specified explicitly, and such use isn't intended in any case.
4475
4476   Args:
4477     filename: The name of the current file.
4478     clean_lines: A CleansedLines instance containing the file.
4479     linenum: The number of the line to check.
4480     error: The function to call with any errors found.
4481   """
4482   line = clean_lines.elided[linenum]
4483   match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
4484   if match:
4485     error(filename, linenum, 'build/explicit_make_pair',
4486           4,  # 4 = high confidence
4487           'For C++11-compatibility, omit template arguments from make_pair'
4488           ' OR use pair directly OR if appropriate, construct a pair directly')
4489
4490
4491 def ProcessLine(filename, file_extension, clean_lines, line,
4492                 include_state, function_state, nesting_state, error,
4493                 extra_check_functions=[]):
4494   """Processes a single line in the file.
4495
4496   Args:
4497     filename: Filename of the file that is being processed.
4498     file_extension: The extension (dot not included) of the file.
4499     clean_lines: An array of strings, each representing a line of the file,
4500                  with comments stripped.
4501     line: Number of line being processed.
4502     include_state: An _IncludeState instance in which the headers are inserted.
4503     function_state: A _FunctionState instance which counts function lines, etc.
4504     nesting_state: A _NestingState instance which maintains information about
4505                    the current stack of nested blocks being parsed.
4506     error: A callable to which errors are reported, which takes 4 arguments:
4507            filename, line number, error level, and message
4508     extra_check_functions: An array of additional check functions that will be
4509                            run on each source line. Each function takes 4
4510                            arguments: filename, clean_lines, line, error
4511   """
4512   raw_lines = clean_lines.raw_lines
4513   ParseNolintSuppressions(filename, raw_lines[line], line, error)
4514   nesting_state.Update(filename, clean_lines, line, error)
4515   if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
4516     return
4517   CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
4518   CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
4519   CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
4520   CheckLanguage(filename, clean_lines, line, file_extension, include_state,
4521                 nesting_state, error)
4522   CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
4523   CheckForNonStandardConstructs(filename, clean_lines, line,
4524                                 nesting_state, error)
4525   CheckVlogArguments(filename, clean_lines, line, error)
4526   CheckPosixThreading(filename, clean_lines, line, error)
4527   CheckInvalidIncrement(filename, clean_lines, line, error)
4528   CheckMakePairUsesDeduction(filename, clean_lines, line, error)
4529   for check_fn in extra_check_functions:
4530     check_fn(filename, clean_lines, line, error)
4531
4532 def ProcessFileData(filename, file_extension, lines, error,
4533                     extra_check_functions=[]):
4534   """Performs lint checks and reports any errors to the given error function.
4535
4536   Args:
4537     filename: Filename of the file that is being processed.
4538     file_extension: The extension (dot not included) of the file.
4539     lines: An array of strings, each representing a line of the file, with the
4540            last element being empty if the file is terminated with a newline.
4541     error: A callable to which errors are reported, which takes 4 arguments:
4542            filename, line number, error level, and message
4543     extra_check_functions: An array of additional check functions that will be
4544                            run on each source line. Each function takes 4
4545                            arguments: filename, clean_lines, line, error
4546   """
4547   lines = (['// marker so line numbers and indices both start at 1'] + lines +
4548            ['// marker so line numbers end in a known way'])
4549
4550   include_state = _IncludeState()
4551   function_state = _FunctionState()
4552   nesting_state = _NestingState()
4553
4554   ResetNolintSuppressions()
4555
4556   CheckForCopyright(filename, lines, error)
4557
4558   if file_extension == 'h':
4559     CheckForHeaderGuard(filename, lines, error)
4560
4561   RemoveMultiLineComments(filename, lines, error)
4562   clean_lines = CleansedLines(lines)
4563   for line in xrange(clean_lines.NumLines()):
4564     ProcessLine(filename, file_extension, clean_lines, line,
4565                 include_state, function_state, nesting_state, error,
4566                 extra_check_functions)
4567   nesting_state.CheckCompletedBlocks(filename, error)
4568
4569   CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
4570
4571   # We check here rather than inside ProcessLine so that we see raw
4572   # lines rather than "cleaned" lines.
4573   CheckForBadCharacters(filename, lines, error)
4574
4575   CheckForNewlineAtEOF(filename, lines, error)
4576
4577 def ProcessFile(filename, vlevel, extra_check_functions=[]):
4578   """Does google-lint on a single file.
4579
4580   Args:
4581     filename: The name of the file to parse.
4582
4583     vlevel: The level of errors to report.  Every error of confidence
4584     >= verbose_level will be reported.  0 is a good default.
4585
4586     extra_check_functions: An array of additional check functions that will be
4587                            run on each source line. Each function takes 4
4588                            arguments: filename, clean_lines, line, error
4589   """
4590
4591   _SetVerboseLevel(vlevel)
4592
4593   try:
4594     # Support the UNIX convention of using "-" for stdin.  Note that
4595     # we are not opening the file with universal newline support
4596     # (which codecs doesn't support anyway), so the resulting lines do
4597     # contain trailing '\r' characters if we are reading a file that
4598     # has CRLF endings.
4599     # If after the split a trailing '\r' is present, it is removed
4600     # below. If it is not expected to be present (i.e. os.linesep !=
4601     # '\r\n' as in Windows), a warning is issued below if this file
4602     # is processed.
4603
4604     if filename == '-':
4605       lines = codecs.StreamReaderWriter(sys.stdin,
4606                                         codecs.getreader('utf8'),
4607                                         codecs.getwriter('utf8'),
4608                                         'replace').read().split('\n')
4609     else:
4610       lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
4611
4612     carriage_return_found = False
4613     # Remove trailing '\r'.
4614     for linenum in range(len(lines)):
4615       if lines[linenum].endswith('\r'):
4616         lines[linenum] = lines[linenum].rstrip('\r')
4617         carriage_return_found = True
4618
4619   except IOError:
4620     sys.stderr.write(
4621         "Skipping input '%s': Can't open for reading\n" % filename)
4622     return
4623
4624   # Note, if no dot is found, this will give the entire filename as the ext.
4625   file_extension = filename[filename.rfind('.') + 1:]
4626
4627   # When reading from stdin, the extension is unknown, so no cpplint tests
4628   # should rely on the extension.
4629   if filename != '-' and file_extension not in _valid_extensions:
4630     sys.stderr.write('Ignoring %s; not a valid file name '
4631                      '(%s)\n' % (filename, ', '.join(_valid_extensions)))
4632   else:
4633     ProcessFileData(filename, file_extension, lines, Error,
4634                     extra_check_functions)
4635     if carriage_return_found and os.linesep != '\r\n':
4636       # Use 0 for linenum since outputting only one error for potentially
4637       # several lines.
4638       Error(filename, 0, 'whitespace/newline', 1,
4639             'One or more unexpected \\r (^M) found;'
4640             'better to use only a \\n')
4641
4642   sys.stderr.write('Done processing %s\n' % filename)
4643
4644
4645 def PrintUsage(message):
4646   """Prints a brief usage string and exits, optionally with an error message.
4647
4648   Args:
4649     message: The optional error message.
4650   """
4651   sys.stderr.write(_USAGE)
4652   if message:
4653     sys.exit('\nFATAL ERROR: ' + message)
4654   else:
4655     sys.exit(1)
4656
4657
4658 def PrintCategories():
4659   """Prints a list of all the error-categories used by error messages.
4660
4661   These are the categories used to filter messages via --filter.
4662   """
4663   sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
4664   sys.exit(0)
4665
4666
4667 def ParseArguments(args):
4668   """Parses the command line arguments.
4669
4670   This may set the output format and verbosity level as side-effects.
4671
4672   Args:
4673     args: The command line arguments:
4674
4675   Returns:
4676     The list of filenames to lint.
4677   """
4678   try:
4679     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
4680                                                  'counting=',
4681                                                  'filter=',
4682                                                  'root=',
4683                                                  'linelength=',
4684                                                  'extensions='])
4685   except getopt.GetoptError:
4686     PrintUsage('Invalid arguments.')
4687
4688   verbosity = _VerboseLevel()
4689   output_format = _OutputFormat()
4690   filters = ''
4691   counting_style = ''
4692
4693   for (opt, val) in opts:
4694     if opt == '--help':
4695       PrintUsage(None)
4696     elif opt == '--output':
4697       if val not in ('emacs', 'vs7', 'eclipse'):
4698         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
4699       output_format = val
4700     elif opt == '--verbose':
4701       verbosity = int(val)
4702     elif opt == '--filter':
4703       filters = val
4704       if not filters:
4705         PrintCategories()
4706     elif opt == '--counting':
4707       if val not in ('total', 'toplevel', 'detailed'):
4708         PrintUsage('Valid counting options are total, toplevel, and detailed')
4709       counting_style = val
4710     elif opt == '--root':
4711       global _root
4712       _root = val
4713     elif opt == '--linelength':
4714       global _line_length
4715       try:
4716           _line_length = int(val)
4717       except ValueError:
4718           PrintUsage('Line length must be digits.')
4719     elif opt == '--extensions':
4720       global _valid_extensions
4721       try:
4722           _valid_extensions = set(val.split(','))
4723       except ValueError:
4724           PrintUsage('Extensions must be comma seperated list.')
4725
4726   if not filenames:
4727     PrintUsage('No files were specified.')
4728
4729   _SetOutputFormat(output_format)
4730   _SetVerboseLevel(verbosity)
4731   _SetFilters(filters)
4732   _SetCountingStyle(counting_style)
4733
4734   return filenames
4735
4736
4737 def main():
4738   filenames = ParseArguments(sys.argv[1:])
4739
4740   # Change stderr to write with replacement characters so we don't die
4741   # if we try to print something containing non-ASCII characters.
4742   sys.stderr = codecs.StreamReaderWriter(sys.stderr,
4743                                          codecs.getreader('utf8'),
4744                                          codecs.getwriter('utf8'),
4745                                          'replace')
4746
4747   _cpplint_state.ResetErrorCounts()
4748   for filename in filenames:
4749     ProcessFile(filename, _cpplint_state.verbose_level)
4750   _cpplint_state.PrintErrorCounts()
4751
4752   sys.exit(_cpplint_state.error_count > 0)
4753
4754
4755 if __name__ == '__main__':
4756   main()