Tools/scripts/reindent.py

   1 #! /usr/bin/env python
   2
   3 # Released to the public domain, by Tim Peters, 03 October 2000.
   4
   5 """reindent [-d][-r][-v] [ path ... ]
   6
   7 -d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
   8 -r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
   9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
  10 -v (--verbose)  Verbose.   Print informative msgs; else no output.
  11 -h (--help)     Help.      Print this usage information and exit.
  12
  13 Change Python (.py) files to use 4-space indents and no hard tab characters.
  14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
  15 at the end of files.  Also ensure the last line ends with a newline.
  16
  17 If no paths are given on the command line, reindent operates as a filter,
  18 reading a single source file from standard input and writing the transformed
  19 source to standard output.  In this case, the -d, -r and -v flags are
  20 ignored.
  21
  22 You can pass one or more file and/or directory paths.  When a directory
  23 path, all .py files within the directory will be examined, and, if the -r
  24 option is given, likewise recursively for subdirectories.
  25
  26 If output is not to standard output, reindent overwrites files in place,
  27 renaming the originals with a .bak extension.  If it finds nothing to
  28 change, the file is left alone.  If reindent does change a file, the changed
  29 file is a fixed-point for future runs (i.e., running reindent on the
  30 resulting .py file won't change it again).
  31
  32 The hard part of reindenting is figuring out what to do with comment
  33 lines.  So long as the input files get a clean bill of health from
  34 tabnanny.py, reindent should do a good job.
  35
  36 The backup file is a copy of the one that is being reindented. The ".bak"
  37 file is generated with shutil.copy(), but some corner cases regarding
  38 user/group and permissions could leave the backup file more readable that
  39 you'd prefer. You can always use the --nobackup option to prevent this.
  40 """
  41
  42 __version__ = "1"
  43
  44 import tokenize
  45 import os, shutil
  46 import sys
  47
  48 verbose    = 0
  49 recurse    = 0
  50 dryrun     = 0
  51 makebackup = True
  52
  53 def usage(msg=None):
  54     if msg is not None:
  55         print >> sys.stderr, msg
  56     print >> sys.stderr, __doc__
  57
  58 def errprint(*args):
  59     sep = ""
  60     for arg in args:
  61         sys.stderr.write(sep + str(arg))
  62         sep = " "
  63     sys.stderr.write("\n")
  64
  65 def main():
  66     import getopt
  67     global verbose, recurse, dryrun, makebackup
  68     try:
  69         opts, args = getopt.getopt(sys.argv[1:], "drnvh",
  70                         ["dryrun", "recurse", "nobackup", "verbose", "help"])
  71     except getopt.error, msg:
  72         usage(msg)
  73         return
  74     for o, a in opts:
  75         if o in ('-d', '--dryrun'):
  76             dryrun += 1
  77         elif o in ('-r', '--recurse'):
  78             recurse += 1
  79         elif o in ('-n', '--nobackup'):
  80             makebackup = False
  81         elif o in ('-v', '--verbose'):
  82             verbose += 1
  83         elif o in ('-h', '--help'):
  84             usage()
  85             return
  86     if not args:
  87         r = Reindenter(sys.stdin)
  88         r.run()
  89         r.write(sys.stdout)
  90         return
  91     for arg in args:
  92         check(arg)
  93
  94 def check(file):
  95     if os.path.isdir(file) and not os.path.islink(file):
  96         if verbose:
  97             print "listing directory", file
  98         names = os.listdir(file)
  99         for name in names:
 100             fullname = os.path.join(file, name)
 101             if ((recurse and os.path.isdir(fullname) and
 102                  not os.path.islink(fullname) and
 103                  not os.path.split(fullname)[1].startswith("."))
 104                 or name.lower().endswith(".py")):
 105                 check(fullname)
 106         return
 107
 108     if verbose:
 109         print "checking", file, "...",
 110     try:
 111         f = open(file)
 112     except IOError, msg:
 113         errprint("%s: I/O Error: %s" % (file, str(msg)))
 114         return
 115
 116     r = Reindenter(f)
 117     f.close()
 118     if r.run():
 119         if verbose:
 120             print "changed."
 121             if dryrun:
 122                 print "But this is a dry run, so leaving it alone."
 123         if not dryrun:
 124             bak = file + ".bak"
 125             if makebackup:
 126                 shutil.copyfile(file, bak)
 127                 if verbose:
 128                     print "backed up", file, "to", bak
 129             f = open(file, "w")
 130             r.write(f)
 131             f.close()
 132             if verbose:
 133                 print "wrote new", file
 134         return True
 135     else:
 136         if verbose:
 137             print "unchanged."
 138         return False
 139
 140 def _rstrip(line, JUNK='\n \t'):
 141     """Return line stripped of trailing spaces, tabs, newlines.
 142
 143     Note that line.rstrip() instead also strips sundry control characters,
 144     but at least one known Emacs user expects to keep junk like that, not
 145     mentioning Barry by name or anything <wink>.
 146     """
 147
 148     i = len(line)
 149     while i > 0 and line[i-1] in JUNK:
 150         i -= 1
 151     return line[:i]
 152
 153 class Reindenter:
 154
 155     def __init__(self, f):
 156         self.find_stmt = 1  # next token begins a fresh stmt?
 157         self.level = 0      # current indent level
 158
 159         # Raw file lines.
 160         self.raw = f.readlines()
 161
 162         # File lines, rstripped & tab-expanded.  Dummy at start is so
 163         # that we can use tokenize's 1-based line numbering easily.
 164         # Note that a line is all-blank iff it's "\n".
 165         self.lines = [_rstrip(line).expandtabs() + "\n"
 166                       for line in self.raw]
 167         self.lines.insert(0, None)
 168         self.index = 1  # index into self.lines of next line
 169
 170         # List of (lineno, indentlevel) pairs, one for each stmt and
 171         # comment line.  indentlevel is -1 for comment lines, as a
 172         # signal that tokenize doesn't know what to do about them;
 173         # indeed, they're our headache!
 174         self.stats = []
 175
 176     def run(self):
 177         tokenize.tokenize(self.getline, self.tokeneater)
 178         # Remove trailing empty lines.
 179         lines = self.lines
 180         while lines and lines[-1] == "\n":
 181             lines.pop()
 182         # Sentinel.
 183         stats = self.stats
 184         stats.append((len(lines), 0))
 185         # Map count of leading spaces to # we want.
 186         have2want = {}
 187         # Program after transformation.
 188         after = self.after = []
 189         # Copy over initial empty lines -- there's nothing to do until
 190         # we see a line with *something* on it.
 191         i = stats[0][0]
 192         after.extend(lines[1:i])
 193         for i in range(len(stats)-1):
 194             thisstmt, thislevel = stats[i]
 195             nextstmt = stats[i+1][0]
 196             have = getlspace(lines[thisstmt])
 197             want = thislevel * 4
 198             if want < 0:
 199                 # A comment line.
 200                 if have:
 201                     # An indented comment line.  If we saw the same
 202                     # indentation before, reuse what it most recently
 203                     # mapped to.
 204                     want = have2want.get(have, -1)
 205                     if want < 0:
 206                         # Then it probably belongs to the next real stmt.
 207                         for j in xrange(i+1, len(stats)-1):
 208                             jline, jlevel = stats[j]
 209                             if jlevel >= 0:
 210                                 if have == getlspace(lines[jline]):
 211                                     want = jlevel * 4
 212                                 break
 213                     if want < 0:           # Maybe it's a hanging
 214                                            # comment like this one,
 215                         # in which case we should shift it like its base
 216                         # line got shifted.
 217                         for j in xrange(i-1, -1, -1):
 218                             jline, jlevel = stats[j]
 219                             if jlevel >= 0:
 220                                 want = have + getlspace(after[jline-1]) - \
 221                                        getlspace(lines[jline])
 222                                 break
 223                     if want < 0:
 224                         # Still no luck -- leave it alone.
 225                         want = have
 226                 else:
 227                     want = 0
 228             assert want >= 0
 229             have2want[have] = want
 230             diff = want - have
 231             if diff == 0 or have == 0:
 232                 after.extend(lines[thisstmt:nextstmt])
 233             else:
 234                 for line in lines[thisstmt:nextstmt]:
 235                     if diff > 0:
 236                         if line == "\n":
 237                             after.append(line)
 238                         else:
 239                             after.append(" " * diff + line)
 240                     else:
 241                         remove = min(getlspace(line), -diff)
 242                         after.append(line[remove:])
 243         return self.raw != self.after
 244
 245     def write(self, f):
 246         f.writelines(self.after)
 247
 248     # Line-getter for tokenize.
 249     def getline(self):
 250         if self.index >= len(self.lines):
 251             line = ""
 252         else:
 253             line = self.lines[self.index]
 254             self.index += 1
 255         return line
 256
 257     # Line-eater for tokenize.
 258     def tokeneater(self, type, token, (sline, scol), end, line,
 259                    INDENT=tokenize.INDENT,
 260                    DEDENT=tokenize.DEDENT,
 261                    NEWLINE=tokenize.NEWLINE,
 262                    COMMENT=tokenize.COMMENT,
 263                    NL=tokenize.NL):
 264
 265         if type == NEWLINE:
 266             # A program statement, or ENDMARKER, will eventually follow,
 267             # after some (possibly empty) run of tokens of the form
 268             #     (NL | COMMENT)* (INDENT | DEDENT+)?
 269             self.find_stmt = 1
 270
 271         elif type == INDENT:
 272             self.find_stmt = 1
 273             self.level += 1
 274
 275         elif type == DEDENT:
 276             self.find_stmt = 1
 277             self.level -= 1
 278
 279         elif type == COMMENT:
 280             if self.find_stmt:
 281                 self.stats.append((sline, -1))
 282                 # but we're still looking for a new stmt, so leave
 283                 # find_stmt alone
 284
 285         elif type == NL:
 286             pass
 287
 288         elif self.find_stmt:
 289             # This is the first "real token" following a NEWLINE, so it
 290             # must be the first token of the next program statement, or an
 291             # ENDMARKER.
 292             self.find_stmt = 0
 293             if line:   # not endmarker
 294                 self.stats.append((sline, self.level))
 295
 296 # Count number of leading blanks.
 297 def getlspace(line):
 298     i, n = 0, len(line)
 299     while i < n and line[i] == " ":
 300         i += 1
 301     return i
 302
 303 if __name__ == '__main__':
 304     main()