Remove obsolete warning filters in regrtest.py (from issue #7092 -- patch by
[python.git] / Tools / scripts / reindent.py
blob3e9affb9c511b924f117244ee21a11db417eb7b7
1 #! /usr/bin/env python
3 # Released to the public domain, by Tim Peters, 03 October 2000.
5 """reindent [-d][-r][-v] [ path ... ]
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too.
9 -n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10 -v (--verbose) Verbose. Print informative msgs; else no output.
11 -h (--help) Help. Print this usage information and exit.
13 Change Python (.py) files to use 4-space indents and no hard tab characters.
14 Also trim excess spaces and tabs from ends of lines, and remove empty lines
15 at the end of files. Also ensure the last line ends with a newline.
17 If no paths are given on the command line, reindent operates as a filter,
18 reading a single source file from standard input and writing the transformed
19 source to standard output. In this case, the -d, -r and -v flags are
20 ignored.
22 You can pass one or more file and/or directory paths. When a directory
23 path, all .py files within the directory will be examined, and, if the -r
24 option is given, likewise recursively for subdirectories.
26 If output is not to standard output, reindent overwrites files in place,
27 renaming the originals with a .bak extension. If it finds nothing to
28 change, the file is left alone. If reindent does change a file, the changed
29 file is a fixed-point for future runs (i.e., running reindent on the
30 resulting .py file won't change it again).
32 The hard part of reindenting is figuring out what to do with comment
33 lines. So long as the input files get a clean bill of health from
34 tabnanny.py, reindent should do a good job.
36 The backup file is a copy of the one that is being reindented. The ".bak"
37 file is generated with shutil.copy(), but some corner cases regarding
38 user/group and permissions could leave the backup file more readable that
39 you'd prefer. You can always use the --nobackup option to prevent this.
40 """
42 __version__ = "1"
44 import tokenize
45 import os, shutil
46 import sys
48 verbose = 0
49 recurse = 0
50 dryrun = 0
51 makebackup = True
53 def usage(msg=None):
54 if msg is not None:
55 print >> sys.stderr, msg
56 print >> sys.stderr, __doc__
58 def errprint(*args):
59 sep = ""
60 for arg in args:
61 sys.stderr.write(sep + str(arg))
62 sep = " "
63 sys.stderr.write("\n")
65 def main():
66 import getopt
67 global verbose, recurse, dryrun, makebackup
68 try:
69 opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70 ["dryrun", "recurse", "nobackup", "verbose", "help"])
71 except getopt.error, msg:
72 usage(msg)
73 return
74 for o, a in opts:
75 if o in ('-d', '--dryrun'):
76 dryrun += 1
77 elif o in ('-r', '--recurse'):
78 recurse += 1
79 elif o in ('-n', '--nobackup'):
80 makebackup = False
81 elif o in ('-v', '--verbose'):
82 verbose += 1
83 elif o in ('-h', '--help'):
84 usage()
85 return
86 if not args:
87 r = Reindenter(sys.stdin)
88 r.run()
89 r.write(sys.stdout)
90 return
91 for arg in args:
92 check(arg)
94 def check(file):
95 if os.path.isdir(file) and not os.path.islink(file):
96 if verbose:
97 print "listing directory", file
98 names = os.listdir(file)
99 for name in names:
100 fullname = os.path.join(file, name)
101 if ((recurse and os.path.isdir(fullname) and
102 not os.path.islink(fullname) and
103 not os.path.split(fullname)[1].startswith("."))
104 or name.lower().endswith(".py")):
105 check(fullname)
106 return
108 if verbose:
109 print "checking", file, "...",
110 try:
111 f = open(file)
112 except IOError, msg:
113 errprint("%s: I/O Error: %s" % (file, str(msg)))
114 return
116 r = Reindenter(f)
117 f.close()
118 if r.run():
119 if verbose:
120 print "changed."
121 if dryrun:
122 print "But this is a dry run, so leaving it alone."
123 if not dryrun:
124 bak = file + ".bak"
125 if makebackup:
126 shutil.copyfile(file, bak)
127 if verbose:
128 print "backed up", file, "to", bak
129 f = open(file, "w")
130 r.write(f)
131 f.close()
132 if verbose:
133 print "wrote new", file
134 return True
135 else:
136 if verbose:
137 print "unchanged."
138 return False
140 def _rstrip(line, JUNK='\n \t'):
141 """Return line stripped of trailing spaces, tabs, newlines.
143 Note that line.rstrip() instead also strips sundry control characters,
144 but at least one known Emacs user expects to keep junk like that, not
145 mentioning Barry by name or anything <wink>.
148 i = len(line)
149 while i > 0 and line[i-1] in JUNK:
150 i -= 1
151 return line[:i]
153 class Reindenter:
155 def __init__(self, f):
156 self.find_stmt = 1 # next token begins a fresh stmt?
157 self.level = 0 # current indent level
159 # Raw file lines.
160 self.raw = f.readlines()
162 # File lines, rstripped & tab-expanded. Dummy at start is so
163 # that we can use tokenize's 1-based line numbering easily.
164 # Note that a line is all-blank iff it's "\n".
165 self.lines = [_rstrip(line).expandtabs() + "\n"
166 for line in self.raw]
167 self.lines.insert(0, None)
168 self.index = 1 # index into self.lines of next line
170 # List of (lineno, indentlevel) pairs, one for each stmt and
171 # comment line. indentlevel is -1 for comment lines, as a
172 # signal that tokenize doesn't know what to do about them;
173 # indeed, they're our headache!
174 self.stats = []
176 def run(self):
177 tokenize.tokenize(self.getline, self.tokeneater)
178 # Remove trailing empty lines.
179 lines = self.lines
180 while lines and lines[-1] == "\n":
181 lines.pop()
182 # Sentinel.
183 stats = self.stats
184 stats.append((len(lines), 0))
185 # Map count of leading spaces to # we want.
186 have2want = {}
187 # Program after transformation.
188 after = self.after = []
189 # Copy over initial empty lines -- there's nothing to do until
190 # we see a line with *something* on it.
191 i = stats[0][0]
192 after.extend(lines[1:i])
193 for i in range(len(stats)-1):
194 thisstmt, thislevel = stats[i]
195 nextstmt = stats[i+1][0]
196 have = getlspace(lines[thisstmt])
197 want = thislevel * 4
198 if want < 0:
199 # A comment line.
200 if have:
201 # An indented comment line. If we saw the same
202 # indentation before, reuse what it most recently
203 # mapped to.
204 want = have2want.get(have, -1)
205 if want < 0:
206 # Then it probably belongs to the next real stmt.
207 for j in xrange(i+1, len(stats)-1):
208 jline, jlevel = stats[j]
209 if jlevel >= 0:
210 if have == getlspace(lines[jline]):
211 want = jlevel * 4
212 break
213 if want < 0: # Maybe it's a hanging
214 # comment like this one,
215 # in which case we should shift it like its base
216 # line got shifted.
217 for j in xrange(i-1, -1, -1):
218 jline, jlevel = stats[j]
219 if jlevel >= 0:
220 want = have + getlspace(after[jline-1]) - \
221 getlspace(lines[jline])
222 break
223 if want < 0:
224 # Still no luck -- leave it alone.
225 want = have
226 else:
227 want = 0
228 assert want >= 0
229 have2want[have] = want
230 diff = want - have
231 if diff == 0 or have == 0:
232 after.extend(lines[thisstmt:nextstmt])
233 else:
234 for line in lines[thisstmt:nextstmt]:
235 if diff > 0:
236 if line == "\n":
237 after.append(line)
238 else:
239 after.append(" " * diff + line)
240 else:
241 remove = min(getlspace(line), -diff)
242 after.append(line[remove:])
243 return self.raw != self.after
245 def write(self, f):
246 f.writelines(self.after)
248 # Line-getter for tokenize.
249 def getline(self):
250 if self.index >= len(self.lines):
251 line = ""
252 else:
253 line = self.lines[self.index]
254 self.index += 1
255 return line
257 # Line-eater for tokenize.
258 def tokeneater(self, type, token, (sline, scol), end, line,
259 INDENT=tokenize.INDENT,
260 DEDENT=tokenize.DEDENT,
261 NEWLINE=tokenize.NEWLINE,
262 COMMENT=tokenize.COMMENT,
263 NL=tokenize.NL):
265 if type == NEWLINE:
266 # A program statement, or ENDMARKER, will eventually follow,
267 # after some (possibly empty) run of tokens of the form
268 # (NL | COMMENT)* (INDENT | DEDENT+)?
269 self.find_stmt = 1
271 elif type == INDENT:
272 self.find_stmt = 1
273 self.level += 1
275 elif type == DEDENT:
276 self.find_stmt = 1
277 self.level -= 1
279 elif type == COMMENT:
280 if self.find_stmt:
281 self.stats.append((sline, -1))
282 # but we're still looking for a new stmt, so leave
283 # find_stmt alone
285 elif type == NL:
286 pass
288 elif self.find_stmt:
289 # This is the first "real token" following a NEWLINE, so it
290 # must be the first token of the next program statement, or an
291 # ENDMARKER.
292 self.find_stmt = 0
293 if line: # not endmarker
294 self.stats.append((sline, self.level))
296 # Count number of leading blanks.
297 def getlspace(line):
298 i, n = 0, len(line)
299 while i < n and line[i] == " ":
300 i += 1
301 return i
303 if __name__ == '__main__':
304 main()