3 # Released to the public domain, by Tim Peters, 03 October 2000.
5 """reindent [-d][-r][-v] [ path ... ]
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too.
9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10 -v (--verbose) Verbose. Print informative msgs; else no output.
11 -h (--help) Help. Print this usage information and exit.
13 Change Python (.py) files to use 4-space indents and no hard tab characters.
14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
15 at the end of files. Also ensure the last line ends with a newline.
17 If no paths are given on the command line, reindent operates as a filter,
18 reading a single source file from standard input and writing the transformed
19 source to standard output. In this case, the -d, -r and -v flags are
22 You can pass one or more file and/or directory paths. When a directory
23 path, all .py files within the directory will be examined, and, if the -r
24 option is given, likewise recursively for subdirectories.
26 If output is not to standard output, reindent overwrites files in place,
27 renaming the originals with a .bak extension. If it finds nothing to
28 change, the file is left alone. If reindent does change a file, the changed
29 file is a fixed-point for future runs (i.e., running reindent on the
30 resulting .py file won't change it again).
32 The hard part of reindenting is figuring out what to do with comment
33 lines. So long as the input files get a clean bill of health from
34 tabnanny.py, reindent should do a good job.
36 The backup file is a copy of the one that is being reindented. The ".bak"
37 file is generated with shutil.copy(), but some corner cases regarding
38 user/group and permissions could leave the backup file more readable that
39 you'd prefer. You can always use the --nobackup option to prevent this.
55 print >> sys
.stderr
, msg
56 print >> sys
.stderr
, __doc__
61 sys
.stderr
.write(sep
+ str(arg
))
63 sys
.stderr
.write("\n")
67 global verbose
, recurse
, dryrun
, makebackup
69 opts
, args
= getopt
.getopt(sys
.argv
[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
71 except getopt
.error
, msg
:
75 if o
in ('-d', '--dryrun'):
77 elif o
in ('-r', '--recurse'):
79 elif o
in ('-n', '--nobackup'):
81 elif o
in ('-v', '--verbose'):
83 elif o
in ('-h', '--help'):
87 r
= Reindenter(sys
.stdin
)
95 if os
.path
.isdir(file) and not os
.path
.islink(file):
97 print "listing directory", file
98 names
= os
.listdir(file)
100 fullname
= os
.path
.join(file, name
)
101 if ((recurse
and os
.path
.isdir(fullname
) and
102 not os
.path
.islink(fullname
) and
103 not os
.path
.split(fullname
)[1].startswith("."))
104 or name
.lower().endswith(".py")):
109 print "checking", file, "...",
113 errprint("%s: I/O Error: %s" % (file, str(msg
)))
122 print "But this is a dry run, so leaving it alone."
126 shutil
.copyfile(file, bak
)
128 print "backed up", file, "to", bak
133 print "wrote new", file
140 def _rstrip(line
, JUNK
='\n \t'):
141 """Return line stripped of trailing spaces, tabs, newlines.
143 Note that line.rstrip() instead also strips sundry control characters,
144 but at least one known Emacs user expects to keep junk like that, not
145 mentioning Barry by name or anything <wink>.
149 while i
> 0 and line
[i
-1] in JUNK
:
155 def __init__(self
, f
):
156 self
.find_stmt
= 1 # next token begins a fresh stmt?
157 self
.level
= 0 # current indent level
160 self
.raw
= f
.readlines()
162 # File lines, rstripped & tab-expanded. Dummy at start is so
163 # that we can use tokenize's 1-based line numbering easily.
164 # Note that a line is all-blank iff it's "\n".
165 self
.lines
= [_rstrip(line
).expandtabs() + "\n"
166 for line
in self
.raw
]
167 self
.lines
.insert(0, None)
168 self
.index
= 1 # index into self.lines of next line
170 # List of (lineno, indentlevel) pairs, one for each stmt and
171 # comment line. indentlevel is -1 for comment lines, as a
172 # signal that tokenize doesn't know what to do about them;
173 # indeed, they're our headache!
177 tokenize
.tokenize(self
.getline
, self
.tokeneater
)
178 # Remove trailing empty lines.
180 while lines
and lines
[-1] == "\n":
184 stats
.append((len(lines
), 0))
185 # Map count of leading spaces to # we want.
187 # Program after transformation.
188 after
= self
.after
= []
189 # Copy over initial empty lines -- there's nothing to do until
190 # we see a line with *something* on it.
192 after
.extend(lines
[1:i
])
193 for i
in range(len(stats
)-1):
194 thisstmt
, thislevel
= stats
[i
]
195 nextstmt
= stats
[i
+1][0]
196 have
= getlspace(lines
[thisstmt
])
201 # An indented comment line. If we saw the same
202 # indentation before, reuse what it most recently
204 want
= have2want
.get(have
, -1)
206 # Then it probably belongs to the next real stmt.
207 for j
in xrange(i
+1, len(stats
)-1):
208 jline
, jlevel
= stats
[j
]
210 if have
== getlspace(lines
[jline
]):
213 if want
< 0: # Maybe it's a hanging
214 # comment like this one,
215 # in which case we should shift it like its base
217 for j
in xrange(i
-1, -1, -1):
218 jline
, jlevel
= stats
[j
]
220 want
= have
+ getlspace(after
[jline
-1]) - \
221 getlspace(lines
[jline
])
224 # Still no luck -- leave it alone.
229 have2want
[have
] = want
231 if diff
== 0 or have
== 0:
232 after
.extend(lines
[thisstmt
:nextstmt
])
234 for line
in lines
[thisstmt
:nextstmt
]:
239 after
.append(" " * diff
+ line
)
241 remove
= min(getlspace(line
), -diff
)
242 after
.append(line
[remove
:])
243 return self
.raw
!= self
.after
246 f
.writelines(self
.after
)
248 # Line-getter for tokenize.
250 if self
.index
>= len(self
.lines
):
253 line
= self
.lines
[self
.index
]
257 # Line-eater for tokenize.
258 def tokeneater(self
, type, token
, (sline
, scol
), end
, line
,
259 INDENT
=tokenize
.INDENT
,
260 DEDENT
=tokenize
.DEDENT
,
261 NEWLINE
=tokenize
.NEWLINE
,
262 COMMENT
=tokenize
.COMMENT
,
266 # A program statement, or ENDMARKER, will eventually follow,
267 # after some (possibly empty) run of tokens of the form
268 # (NL | COMMENT)* (INDENT | DEDENT+)?
279 elif type == COMMENT
:
281 self
.stats
.append((sline
, -1))
282 # but we're still looking for a new stmt, so leave
289 # This is the first "real token" following a NEWLINE, so it
290 # must be the first token of the next program statement, or an
293 if line
: # not endmarker
294 self
.stats
.append((sline
, self
.level
))
296 # Count number of leading blanks.
299 while i
< n
and line
[i
] == " ":
303 if __name__
== '__main__':