Added LoggerAdapter class, changed copyright dates, made check for extra parameter...
[python.git] / Tools / scripts / reindent.py
blob5ac98c7f7bef2249b4aad432d7941da3f0435eb0
1 #! /usr/bin/env python
3 # Released to the public domain, by Tim Peters, 03 October 2000.
5 """reindent [-d][-r][-v] [ path ... ]
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too.
9 -v (--verbose) Verbose. Print informative msgs; else no output.
10 -h (--help) Help. Print this usage information and exit.
12 Change Python (.py) files to use 4-space indents and no hard tab characters.
13 Also trim excess spaces and tabs from ends of lines, and remove empty lines
14 at the end of files. Also ensure the last line ends with a newline.
16 If no paths are given on the command line, reindent operates as a filter,
17 reading a single source file from standard input and writing the transformed
18 source to standard output. In this case, the -d, -r and -v flags are
19 ignored.
21 You can pass one or more file and/or directory paths. When a directory
22 path, all .py files within the directory will be examined, and, if the -r
23 option is given, likewise recursively for subdirectories.
25 If output is not to standard output, reindent overwrites files in place,
26 renaming the originals with a .bak extension. If it finds nothing to
27 change, the file is left alone. If reindent does change a file, the changed
28 file is a fixed-point for future runs (i.e., running reindent on the
29 resulting .py file won't change it again).
31 The hard part of reindenting is figuring out what to do with comment
32 lines. So long as the input files get a clean bill of health from
33 tabnanny.py, reindent should do a good job.
34 """
36 __version__ = "1"
38 import tokenize
39 import os
40 import sys
42 verbose = 0
43 recurse = 0
44 dryrun = 0
46 def usage(msg=None):
47 if msg is not None:
48 print >> sys.stderr, msg
49 print >> sys.stderr, __doc__
51 def errprint(*args):
52 sep = ""
53 for arg in args:
54 sys.stderr.write(sep + str(arg))
55 sep = " "
56 sys.stderr.write("\n")
58 def main():
59 import getopt
60 global verbose, recurse, dryrun
61 try:
62 opts, args = getopt.getopt(sys.argv[1:], "drvh",
63 ["dryrun", "recurse", "verbose", "help"])
64 except getopt.error, msg:
65 usage(msg)
66 return
67 for o, a in opts:
68 if o in ('-d', '--dryrun'):
69 dryrun += 1
70 elif o in ('-r', '--recurse'):
71 recurse += 1
72 elif o in ('-v', '--verbose'):
73 verbose += 1
74 elif o in ('-h', '--help'):
75 usage()
76 return
77 if not args:
78 r = Reindenter(sys.stdin)
79 r.run()
80 r.write(sys.stdout)
81 return
82 for arg in args:
83 check(arg)
85 def check(file):
86 if os.path.isdir(file) and not os.path.islink(file):
87 if verbose:
88 print "listing directory", file
89 names = os.listdir(file)
90 for name in names:
91 fullname = os.path.join(file, name)
92 if ((recurse and os.path.isdir(fullname) and
93 not os.path.islink(fullname))
94 or name.lower().endswith(".py")):
95 check(fullname)
96 return
98 if verbose:
99 print "checking", file, "...",
100 try:
101 f = open(file)
102 except IOError, msg:
103 errprint("%s: I/O Error: %s" % (file, str(msg)))
104 return
106 r = Reindenter(f)
107 f.close()
108 if r.run():
109 if verbose:
110 print "changed."
111 if dryrun:
112 print "But this is a dry run, so leaving it alone."
113 if not dryrun:
114 bak = file + ".bak"
115 if os.path.exists(bak):
116 os.remove(bak)
117 os.rename(file, bak)
118 if verbose:
119 print "renamed", file, "to", bak
120 f = open(file, "w")
121 r.write(f)
122 f.close()
123 if verbose:
124 print "wrote new", file
125 else:
126 if verbose:
127 print "unchanged."
129 def _rstrip(line, JUNK='\n \t'):
130 """Return line stripped of trailing spaces, tabs, newlines.
132 Note that line.rstrip() instead also strips sundry control characters,
133 but at least one known Emacs user expects to keep junk like that, not
134 mentioning Barry by name or anything <wink>.
137 i = len(line)
138 while i > 0 and line[i-1] in JUNK:
139 i -= 1
140 return line[:i]
142 class Reindenter:
144 def __init__(self, f):
145 self.find_stmt = 1 # next token begins a fresh stmt?
146 self.level = 0 # current indent level
148 # Raw file lines.
149 self.raw = f.readlines()
151 # File lines, rstripped & tab-expanded. Dummy at start is so
152 # that we can use tokenize's 1-based line numbering easily.
153 # Note that a line is all-blank iff it's "\n".
154 self.lines = [_rstrip(line).expandtabs() + "\n"
155 for line in self.raw]
156 self.lines.insert(0, None)
157 self.index = 1 # index into self.lines of next line
159 # List of (lineno, indentlevel) pairs, one for each stmt and
160 # comment line. indentlevel is -1 for comment lines, as a
161 # signal that tokenize doesn't know what to do about them;
162 # indeed, they're our headache!
163 self.stats = []
165 def run(self):
166 tokenize.tokenize(self.getline, self.tokeneater)
167 # Remove trailing empty lines.
168 lines = self.lines
169 while lines and lines[-1] == "\n":
170 lines.pop()
171 # Sentinel.
172 stats = self.stats
173 stats.append((len(lines), 0))
174 # Map count of leading spaces to # we want.
175 have2want = {}
176 # Program after transformation.
177 after = self.after = []
178 # Copy over initial empty lines -- there's nothing to do until
179 # we see a line with *something* on it.
180 i = stats[0][0]
181 after.extend(lines[1:i])
182 for i in range(len(stats)-1):
183 thisstmt, thislevel = stats[i]
184 nextstmt = stats[i+1][0]
185 have = getlspace(lines[thisstmt])
186 want = thislevel * 4
187 if want < 0:
188 # A comment line.
189 if have:
190 # An indented comment line. If we saw the same
191 # indentation before, reuse what it most recently
192 # mapped to.
193 want = have2want.get(have, -1)
194 if want < 0:
195 # Then it probably belongs to the next real stmt.
196 for j in xrange(i+1, len(stats)-1):
197 jline, jlevel = stats[j]
198 if jlevel >= 0:
199 if have == getlspace(lines[jline]):
200 want = jlevel * 4
201 break
202 if want < 0: # Maybe it's a hanging
203 # comment like this one,
204 # in which case we should shift it like its base
205 # line got shifted.
206 for j in xrange(i-1, -1, -1):
207 jline, jlevel = stats[j]
208 if jlevel >= 0:
209 want = have + getlspace(after[jline-1]) - \
210 getlspace(lines[jline])
211 break
212 if want < 0:
213 # Still no luck -- leave it alone.
214 want = have
215 else:
216 want = 0
217 assert want >= 0
218 have2want[have] = want
219 diff = want - have
220 if diff == 0 or have == 0:
221 after.extend(lines[thisstmt:nextstmt])
222 else:
223 for line in lines[thisstmt:nextstmt]:
224 if diff > 0:
225 if line == "\n":
226 after.append(line)
227 else:
228 after.append(" " * diff + line)
229 else:
230 remove = min(getlspace(line), -diff)
231 after.append(line[remove:])
232 return self.raw != self.after
234 def write(self, f):
235 f.writelines(self.after)
237 # Line-getter for tokenize.
238 def getline(self):
239 if self.index >= len(self.lines):
240 line = ""
241 else:
242 line = self.lines[self.index]
243 self.index += 1
244 return line
246 # Line-eater for tokenize.
247 def tokeneater(self, type, token, (sline, scol), end, line,
248 INDENT=tokenize.INDENT,
249 DEDENT=tokenize.DEDENT,
250 NEWLINE=tokenize.NEWLINE,
251 COMMENT=tokenize.COMMENT,
252 NL=tokenize.NL):
254 if type == NEWLINE:
255 # A program statement, or ENDMARKER, will eventually follow,
256 # after some (possibly empty) run of tokens of the form
257 # (NL | COMMENT)* (INDENT | DEDENT+)?
258 self.find_stmt = 1
260 elif type == INDENT:
261 self.find_stmt = 1
262 self.level += 1
264 elif type == DEDENT:
265 self.find_stmt = 1
266 self.level -= 1
268 elif type == COMMENT:
269 if self.find_stmt:
270 self.stats.append((sline, -1))
271 # but we're still looking for a new stmt, so leave
272 # find_stmt alone
274 elif type == NL:
275 pass
277 elif self.find_stmt:
278 # This is the first "real token" following a NEWLINE, so it
279 # must be the first token of the next program statement, or an
280 # ENDMARKER.
281 self.find_stmt = 0
282 if line: # not endmarker
283 self.stats.append((sline, self.level))
285 # Count number of leading blanks.
286 def getlspace(line):
287 i, n = 0, len(line)
288 while i < n and line[i] == " ":
289 i += 1
290 return i
292 if __name__ == '__main__':
293 main()