doc/helpers/reindent.py

   1 #!/usr/bin/python
   2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
   3 # +------------------------------------------------------------------+
   4 # |             ____ _               _        __  __ _  __           |
   5 # |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
   6 # |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
   7 # |           | |___| | | |  __/ (__|   <    | |  | | . \            |
   8 # |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
   9 # |                                                                  |
  10 # | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
  11 # +------------------------------------------------------------------+
  12 #
  13 # This file is part of Check_MK.
  14 # The official homepage is at http://mathias-kettner.de/check_mk.
  15 #
  16 # check_mk is free software;  you can redistribute it and/or modify it
  17 # under the  terms of the  GNU General Public License  as published by
  18 # the Free Software Foundation in version 2.  check_mk is  distributed
  19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
  20 # out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
  21 # PARTICULAR PURPOSE. See the  GNU General Public License for more de-
  22 # tails. You should have  received  a copy of the  GNU  General Public
  23 # License along with GNU Make; see the file  COPYING.  If  not,  write
  24 # to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
  25 # Boston, MA 02110-1301 USA.
  26
  27 # Released to the public domain, by Tim Peters, 03 October 2000.
  28 """reindent [-d][-r][-v] [ path ... ]
  29
  30 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
  31 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
  32 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
  33 -v (--verbose)  Verbose.   Print informative msgs; else no output.
  34 -h (--help)     Help.      Print this usage information and exit.
  35
  36 Change Python (.py) files to use 4-space indents and no hard tab characters.
  37 Also trim excess spaces and tabs from ends of lines, and remove empty lines
  38 at the end of files.  Also ensure the last line ends with a newline.
  39
  40 If no paths are given on the command line, reindent operates as a filter,
  41 reading a single source file from standard input and writing the transformed
  42 source to standard output.  In this case, the -d, -r and -v flags are
  43 ignored.
  44
  45 You can pass one or more file and/or directory paths.  When a directory
  46 path, all .py files within the directory will be examined, and, if the -r
  47 option is given, likewise recursively for subdirectories.
  48
  49 If output is not to standard output, reindent overwrites files in place,
  50 renaming the originals with a .bak extension.  If it finds nothing to
  51 change, the file is left alone.  If reindent does change a file, the changed
  52 file is a fixed-point for future runs (i.e., running reindent on the
  53 resulting .py file won't change it again).
  54
  55 The hard part of reindenting is figuring out what to do with comment
  56 lines.  So long as the input files get a clean bill of health from
  57 tabnanny.py, reindent should do a good job.
  58
  59 The backup file is a copy of the one that is being reindented. The ".bak"
  60 file is generated with shutil.copy(), but some corner cases regarding
  61 user/group and permissions could leave the backup file more readable that
  62 you'd prefer. You can always use the --nobackup option to prevent this.
  63 """
  64
  65 __version__ = "1"
  66
  67 import tokenize
  68 import os, shutil
  69 import sys
  70
  71 verbose = 0
  72 recurse = 0
  73 dryrun = 0
  74 makebackup = True
  75
  76
  77 def usage(msg=None):
  78     if msg is not None:
  79         print >> sys.stderr, msg
  80     print >> sys.stderr, __doc__
  81
  82
  83 def errprint(*args):
  84     sep = ""
  85     for arg in args:
  86         sys.stderr.write(sep + str(arg))
  87         sep = " "
  88     sys.stderr.write("\n")
  89
  90
  91 def main():
  92     import getopt
  93     global verbose, recurse, dryrun, makebackup
  94     try:
  95         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
  96                                    ["dryrun", "recurse", "nobackup", "verbose", "help"])
  97     except getopt.error as msg:
  98         usage(msg)
  99         return
 100     for o, a in opts:
 101         if o in ('-d', '--dryrun'):
 102             dryrun += 1
 103         elif o in ('-r', '--recurse'):
 104             recurse += 1
 105         elif o in ('-n', '--nobackup'):
 106             makebackup = False
 107         elif o in ('-v', '--verbose'):
 108             verbose += 1
 109         elif o in ('-h', '--help'):
 110             usage()
 111             return
 112     if not args:
 113         r = Reindenter(sys.stdin)
 114         r.run()
 115         r.write(sys.stdout)
 116         return
 117     for arg in args:
 118         check(arg)
 119
 120
 121 def check(file):
 122     if os.path.isdir(file) and not os.path.islink(file):
 123         if verbose:
 124             print "listing directory", file
 125         names = os.listdir(file)
 126         for name in names:
 127             fullname = os.path.join(file, name)
 128             if ((recurse and os.path.isdir(fullname) and not os.path.islink(fullname) and
 129                  not os.path.split(fullname)[1].startswith(".")) or name.lower().endswith(".py")):
 130                 check(fullname)
 131         return
 132
 133     if verbose:
 134         print "checking", file, "...",
 135     try:
 136         f = open(file)
 137     except IOError as msg:
 138         errprint("%s: I/O Error: %s" % (file, str(msg)))
 139         return
 140
 141     r = Reindenter(f)
 142     f.close()
 143     if r.run():
 144         if verbose:
 145             print "changed."
 146             if dryrun:
 147                 print "But this is a dry run, so leaving it alone."
 148         if not dryrun:
 149             bak = file + ".bak"
 150             if makebackup:
 151                 shutil.copyfile(file, bak)
 152                 if verbose:
 153                     print "backed up", file, "to", bak
 154             f = open(file, "w")
 155             r.write(f)
 156             f.close()
 157             if verbose:
 158                 print "wrote new", file
 159         return True
 160     else:
 161         if verbose:
 162             print "unchanged."
 163         return False
 164
 165
 166 def _rstrip(line, JUNK='\n \t'):
 167     """Return line stripped of trailing spaces, tabs, newlines.
 168
 169     Note that line.rstrip() instead also strips sundry control characters,
 170     but at least one known Emacs user expects to keep junk like that, not
 171     mentioning Barry by name or anything <wink>.
 172     """
 173
 174     i = len(line)
 175     while i > 0 and line[i - 1] in JUNK:
 176         i -= 1
 177     return line[:i]
 178
 179
 180 class Reindenter:
 181     def __init__(self, f):
 182         self.find_stmt = 1  # next token begins a fresh stmt?
 183         self.level = 0  # current indent level
 184
 185         # Raw file lines.
 186         self.raw = f.readlines()
 187
 188         # File lines, rstripped & tab-expanded.  Dummy at start is so
 189         # that we can use tokenize's 1-based line numbering easily.
 190         # Note that a line is all-blank iff it's "\n".
 191         self.lines = [_rstrip(line).expandtabs() + "\n" for line in self.raw]
 192         self.lines.insert(0, None)
 193         self.index = 1  # index into self.lines of next line
 194
 195         # List of (lineno, indentlevel) pairs, one for each stmt and
 196         # comment line.  indentlevel is -1 for comment lines, as a
 197         # signal that tokenize doesn't know what to do about them;
 198         # indeed, they're our headache!
 199         self.stats = []
 200
 201     def run(self):
 202         tokenize.tokenize(self.getline, self.tokeneater)
 203         # Remove trailing empty lines.
 204         lines = self.lines
 205         while lines and lines[-1] == "\n":
 206             lines.pop()
 207         # Sentinel.
 208         stats = self.stats
 209         stats.append((len(lines), 0))
 210         # Map count of leading spaces to # we want.
 211         have2want = {}
 212         # Program after transformation.
 213         after = self.after = []
 214         # Copy over initial empty lines -- there's nothing to do until
 215         # we see a line with *something* on it.
 216         i = stats[0][0]
 217         after.extend(lines[1:i])
 218         for i in range(len(stats) - 1):
 219             thisstmt, thislevel = stats[i]
 220             nextstmt = stats[i + 1][0]
 221             have = getlspace(lines[thisstmt])
 222             want = thislevel * 4
 223             if want < 0:
 224                 # A comment line.
 225                 if have:
 226                     # An indented comment line.  If we saw the same
 227                     # indentation before, reuse what it most recently
 228                     # mapped to.
 229                     want = have2want.get(have, -1)
 230                     if want < 0:
 231                         # Then it probably belongs to the next real stmt.
 232                         for j in xrange(i + 1, len(stats) - 1):
 233                             jline, jlevel = stats[j]
 234                             if jlevel >= 0:
 235                                 if have == getlspace(lines[jline]):
 236                                     want = jlevel * 4
 237                                 break
 238                     if want < 0:  # Maybe it's a hanging
 239                         # comment like this one,
 240                         # in which case we should shift it like its base
 241                         # line got shifted.
 242                         for j in xrange(i - 1, -1, -1):
 243                             jline, jlevel = stats[j]
 244                             if jlevel >= 0:
 245                                 want = have + getlspace(after[jline-1]) - \
 246                                        getlspace(lines[jline])
 247                                 break
 248                     if want < 0:
 249                         # Still no luck -- leave it alone.
 250                         want = have
 251                 else:
 252                     want = 0
 253             assert want >= 0
 254             have2want[have] = want
 255             diff = want - have
 256             if diff == 0 or have == 0:
 257                 after.extend(lines[thisstmt:nextstmt])
 258             else:
 259                 for line in lines[thisstmt:nextstmt]:
 260                     if diff > 0:
 261                         if line == "\n":
 262                             after.append(line)
 263                         else:
 264                             after.append(" " * diff + line)
 265                     else:
 266                         remove = min(getlspace(line), -diff)
 267                         after.append(line[remove:])
 268         return self.raw != self.after
 269
 270     def write(self, f):
 271         f.writelines(self.after)
 272
 273     # Line-getter for tokenize.
 274     def getline(self):
 275         if self.index >= len(self.lines):
 276             line = ""
 277         else:
 278             line = self.lines[self.index]
 279             self.index += 1
 280         return line
 281
 282     # Line-eater for tokenize.
 283     def tokeneater(self,
 284                    type,
 285                    token,
 286                    row_col,
 287                    end,
 288                    line,
 289                    INDENT=tokenize.INDENT,
 290                    DEDENT=tokenize.DEDENT,
 291                    NEWLINE=tokenize.NEWLINE,
 292                    COMMENT=tokenize.COMMENT,
 293                    NL=tokenize.NL):
 294
 295         sline, _scol = row_col
 296         if type == NEWLINE:
 297             # A program statement, or ENDMARKER, will eventually follow,
 298             # after some (possibly empty) run of tokens of the form
 299             #     (NL | COMMENT)* (INDENT | DEDENT+)?
 300             self.find_stmt = 1
 301
 302         elif type == INDENT:
 303             self.find_stmt = 1
 304             self.level += 1
 305
 306         elif type == DEDENT:
 307             self.find_stmt = 1
 308             self.level -= 1
 309
 310         elif type == COMMENT:
 311             if self.find_stmt:
 312                 self.stats.append((sline, -1))
 313                 # but we're still looking for a new stmt, so leave
 314                 # find_stmt alone
 315
 316         elif type == NL:
 317             pass
 318
 319         elif self.find_stmt:
 320             # This is the first "real token" following a NEWLINE, so it
 321             # must be the first token of the next program statement, or an
 322             # ENDMARKER.
 323             self.find_stmt = 0
 324             if line:  # not endmarker
 325                 self.stats.append((sline, self.level))
 326
 327
 328 # Count number of leading blanks.
 329 def getlspace(line):
 330     i, n = 0, len(line)
 331     while i < n and line[i] == " ":
 332         i += 1
 333     return i
 334
 335
 336 if __name__ == '__main__':
 337     main()