Misc. changes, including documenting the ability to specify a class attribute in...
[python.git] / Lib / fileinput.py
blob27ccc3bfedfd7a5b2584258b86228f4df2f89c78
1 """Helper class to quickly write a loop over all standard input files.
3 Typical use is:
5 import fileinput
6 for line in fileinput.input():
7 process(line)
9 This iterates over the lines of all files listed in sys.argv[1:],
10 defaulting to sys.stdin if the list is empty. If a filename is '-' it
11 is also replaced by sys.stdin. To specify an alternative list of
12 filenames, pass it as the argument to input(). A single file name is
13 also allowed.
15 Functions filename(), lineno() return the filename and cumulative line
16 number of the line that has just been read; filelineno() returns its
17 line number in the current file; isfirstline() returns true iff the
18 line just read is the first line of its file; isstdin() returns true
19 iff the line was read from sys.stdin. Function nextfile() closes the
20 current file so that the next iteration will read the first line from
21 the next file (if any); lines not read from the file will not count
22 towards the cumulative line count; the filename is not changed until
23 after the first line of the next file has been read. Function close()
24 closes the sequence.
26 Before any lines have been read, filename() returns None and both line
27 numbers are zero; nextfile() has no effect. After all lines have been
28 read, filename() and the line number functions return the values
29 pertaining to the last line read; nextfile() has no effect.
31 All files are opened in text mode. If an I/O error occurs during
32 opening or reading a file, the IOError exception is raised.
34 If sys.stdin is used more than once, the second and further use will
35 return no lines, except perhaps for interactive use, or if it has been
36 explicitly reset (e.g. using sys.stdin.seek(0)).
38 Empty files are opened and immediately closed; the only time their
39 presence in the list of filenames is noticeable at all is when the
40 last file opened is empty.
42 It is possible that the last line of a file doesn't end in a newline
43 character; otherwise lines are returned including the trailing
44 newline.
46 Class FileInput is the implementation; its methods filename(),
47 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48 correspond to the functions in the module. In addition it has a
49 readline() method which returns the next input line, and a
50 __getitem__() method which implements the sequence behavior. The
51 sequence must be accessed in strictly sequential order; sequence
52 access and readline() cannot be mixed.
54 Optional in-place filtering: if the keyword argument inplace=1 is
55 passed to input() or to the FileInput constructor, the file is moved
56 to a backup file and standard output is directed to the input file.
57 This makes it possible to write a filter that rewrites its input file
58 in place. If the keyword argument backup=".<some extension>" is also
59 given, it specifies the extension for the backup file, and the backup
60 file remains around; by default, the extension is ".bak" and it is
61 deleted when the output file is closed. In-place filtering is
62 disabled when standard input is read. XXX The current implementation
63 does not work for MS-DOS 8+3 filesystems.
65 Performance: this module is unfortunately one of the slower ways of
66 processing large numbers of input lines. Nevertheless, a significant
67 speed-up has been obtained by using readlines(bufsize) instead of
68 readline(). A new keyword argument, bufsize=N, is present on the
69 input() function and the FileInput() class to override the default
70 buffer size.
72 XXX Possible additions:
74 - optional getopt argument processing
75 - specify open mode ('r' or 'rb')
76 - fileno()
77 - isatty()
78 - read(), read(size), even readlines()
80 """
82 import sys, os
84 __all__ = ["input","close","nextfile","filename","lineno","filelineno",
85 "isfirstline","isstdin","FileInput"]
87 _state = None
89 DEFAULT_BUFSIZE = 8*1024
91 def input(files=None, inplace=0, backup="", bufsize=0):
92 """input([files[, inplace[, backup]]])
94 Create an instance of the FileInput class. The instance will be used
95 as global state for the functions of this module, and is also returned
96 to use during iteration. The parameters to this function will be passed
97 along to the constructor of the FileInput class.
98 """
99 global _state
100 if _state and _state._file:
101 raise RuntimeError, "input() already active"
102 _state = FileInput(files, inplace, backup, bufsize)
103 return _state
105 def close():
106 """Close the sequence."""
107 global _state
108 state = _state
109 _state = None
110 if state:
111 state.close()
113 def nextfile():
115 Close the current file so that the next iteration will read the first
116 line from the next file (if any); lines not read from the file will
117 not count towards the cumulative line count. The filename is not
118 changed until after the first line of the next file has been read.
119 Before the first line has been read, this function has no effect;
120 it cannot be used to skip the first file. After the last line of the
121 last file has been read, this function has no effect.
123 if not _state:
124 raise RuntimeError, "no active input()"
125 return _state.nextfile()
127 def filename():
129 Return the name of the file currently being read.
130 Before the first line has been read, returns None.
132 if not _state:
133 raise RuntimeError, "no active input()"
134 return _state.filename()
136 def lineno():
138 Return the cumulative line number of the line that has just been read.
139 Before the first line has been read, returns 0. After the last line
140 of the last file has been read, returns the line number of that line.
142 if not _state:
143 raise RuntimeError, "no active input()"
144 return _state.lineno()
146 def filelineno():
148 Return the line number in the current file. Before the first line
149 has been read, returns 0. After the last line of the last file has
150 been read, returns the line number of that line within the file.
152 if not _state:
153 raise RuntimeError, "no active input()"
154 return _state.filelineno()
156 def isfirstline():
158 Returns true the line just read is the first line of its file,
159 otherwise returns false.
161 if not _state:
162 raise RuntimeError, "no active input()"
163 return _state.isfirstline()
165 def isstdin():
167 Returns true if the last line was read from sys.stdin,
168 otherwise returns false.
170 if not _state:
171 raise RuntimeError, "no active input()"
172 return _state.isstdin()
174 class FileInput:
175 """class FileInput([files[, inplace[, backup]]])
177 Class FileInput is the implementation of the module; its methods
178 filename(), lineno(), fileline(), isfirstline(), isstdin(), nextfile()
179 and close() correspond to the functions of the same name in the module.
180 In addition it has a readline() method which returns the next
181 input line, and a __getitem__() method which implements the
182 sequence behavior. The sequence must be accessed in strictly
183 sequential order; random access and readline() cannot be mixed.
186 def __init__(self, files=None, inplace=0, backup="", bufsize=0):
187 if type(files) == type(''):
188 files = (files,)
189 else:
190 if files is None:
191 files = sys.argv[1:]
192 if not files:
193 files = ('-',)
194 else:
195 files = tuple(files)
196 self._files = files
197 self._inplace = inplace
198 self._backup = backup
199 self._bufsize = bufsize or DEFAULT_BUFSIZE
200 self._savestdout = None
201 self._output = None
202 self._filename = None
203 self._lineno = 0
204 self._filelineno = 0
205 self._file = None
206 self._isstdin = False
207 self._backupfilename = None
208 self._buffer = []
209 self._bufindex = 0
211 def __del__(self):
212 self.close()
214 def close(self):
215 self.nextfile()
216 self._files = ()
218 def __iter__(self):
219 return self
221 def next(self):
222 try:
223 line = self._buffer[self._bufindex]
224 except IndexError:
225 pass
226 else:
227 self._bufindex += 1
228 self._lineno += 1
229 self._filelineno += 1
230 return line
231 line = self.readline()
232 if not line:
233 raise StopIteration
234 return line
236 def __getitem__(self, i):
237 if i != self._lineno:
238 raise RuntimeError, "accessing lines out of order"
239 try:
240 return self.next()
241 except StopIteration:
242 raise IndexError, "end of input reached"
244 def nextfile(self):
245 savestdout = self._savestdout
246 self._savestdout = 0
247 if savestdout:
248 sys.stdout = savestdout
250 output = self._output
251 self._output = 0
252 if output:
253 output.close()
255 file = self._file
256 self._file = 0
257 if file and not self._isstdin:
258 file.close()
260 backupfilename = self._backupfilename
261 self._backupfilename = 0
262 if backupfilename and not self._backup:
263 try: os.unlink(backupfilename)
264 except OSError: pass
266 self._isstdin = False
267 self._buffer = []
268 self._bufindex = 0
270 def readline(self):
271 try:
272 line = self._buffer[self._bufindex]
273 except IndexError:
274 pass
275 else:
276 self._bufindex += 1
277 self._lineno += 1
278 self._filelineno += 1
279 return line
280 if not self._file:
281 if not self._files:
282 return ""
283 self._filename = self._files[0]
284 self._files = self._files[1:]
285 self._filelineno = 0
286 self._file = None
287 self._isstdin = False
288 self._backupfilename = 0
289 if self._filename == '-':
290 self._filename = '<stdin>'
291 self._file = sys.stdin
292 self._isstdin = True
293 else:
294 if self._inplace:
295 self._backupfilename = (
296 self._filename + (self._backup or os.extsep+"bak"))
297 try: os.unlink(self._backupfilename)
298 except os.error: pass
299 # The next few lines may raise IOError
300 os.rename(self._filename, self._backupfilename)
301 self._file = open(self._backupfilename, "r")
302 try:
303 perm = os.fstat(self._file.fileno()).st_mode
304 except OSError:
305 self._output = open(self._filename, "w")
306 else:
307 fd = os.open(self._filename,
308 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
309 perm)
310 self._output = os.fdopen(fd, "w")
311 try:
312 if hasattr(os, 'chmod'):
313 os.chmod(self._filename, perm)
314 except OSError:
315 pass
316 self._savestdout = sys.stdout
317 sys.stdout = self._output
318 else:
319 # This may raise IOError
320 self._file = open(self._filename, "r")
321 self._buffer = self._file.readlines(self._bufsize)
322 self._bufindex = 0
323 if not self._buffer:
324 self.nextfile()
325 # Recursive call
326 return self.readline()
328 def filename(self):
329 return self._filename
331 def lineno(self):
332 return self._lineno
334 def filelineno(self):
335 return self._filelineno
337 def isfirstline(self):
338 return self._filelineno == 1
340 def isstdin(self):
341 return self._isstdin
343 def _test():
344 import getopt
345 inplace = 0
346 backup = 0
347 opts, args = getopt.getopt(sys.argv[1:], "ib:")
348 for o, a in opts:
349 if o == '-i': inplace = 1
350 if o == '-b': backup = a
351 for line in input(args, inplace=inplace, backup=backup):
352 if line[-1:] == '\n': line = line[:-1]
353 if line[-1:] == '\r': line = line[:-1]
354 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
355 isfirstline() and "*" or "", line)
356 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
358 if __name__ == '__main__':
359 _test()