Merged revisions 82952,82954 via svnmerge from
[python/dscho.git] / Lib / fileinput.py
blob90a600bf11801c7ad2e1e8b58b31827538833486
1 """Helper class to quickly write a loop over all standard input files.
3 Typical use is:
5 import fileinput
6 for line in fileinput.input():
7 process(line)
9 This iterates over the lines of all files listed in sys.argv[1:],
10 defaulting to sys.stdin if the list is empty. If a filename is '-' it
11 is also replaced by sys.stdin. To specify an alternative list of
12 filenames, pass it as the argument to input(). A single file name is
13 also allowed.
15 Functions filename(), lineno() return the filename and cumulative line
16 number of the line that has just been read; filelineno() returns its
17 line number in the current file; isfirstline() returns true iff the
18 line just read is the first line of its file; isstdin() returns true
19 iff the line was read from sys.stdin. Function nextfile() closes the
20 current file so that the next iteration will read the first line from
21 the next file (if any); lines not read from the file will not count
22 towards the cumulative line count; the filename is not changed until
23 after the first line of the next file has been read. Function close()
24 closes the sequence.
26 Before any lines have been read, filename() returns None and both line
27 numbers are zero; nextfile() has no effect. After all lines have been
28 read, filename() and the line number functions return the values
29 pertaining to the last line read; nextfile() has no effect.
31 All files are opened in text mode by default, you can override this by
32 setting the mode parameter to input() or FileInput.__init__().
33 If an I/O error occurs during opening or reading a file, the IOError
34 exception is raised.
36 If sys.stdin is used more than once, the second and further use will
37 return no lines, except perhaps for interactive use, or if it has been
38 explicitly reset (e.g. using sys.stdin.seek(0)).
40 Empty files are opened and immediately closed; the only time their
41 presence in the list of filenames is noticeable at all is when the
42 last file opened is empty.
44 It is possible that the last line of a file doesn't end in a newline
45 character; otherwise lines are returned including the trailing
46 newline.
48 Class FileInput is the implementation; its methods filename(),
49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50 correspond to the functions in the module. In addition it has a
51 readline() method which returns the next input line, and a
52 __getitem__() method which implements the sequence behavior. The
53 sequence must be accessed in strictly sequential order; sequence
54 access and readline() cannot be mixed.
56 Optional in-place filtering: if the keyword argument inplace=1 is
57 passed to input() or to the FileInput constructor, the file is moved
58 to a backup file and standard output is directed to the input file.
59 This makes it possible to write a filter that rewrites its input file
60 in place. If the keyword argument backup=".<some extension>" is also
61 given, it specifies the extension for the backup file, and the backup
62 file remains around; by default, the extension is ".bak" and it is
63 deleted when the output file is closed. In-place filtering is
64 disabled when standard input is read. XXX The current implementation
65 does not work for MS-DOS 8+3 filesystems.
67 Performance: this module is unfortunately one of the slower ways of
68 processing large numbers of input lines. Nevertheless, a significant
69 speed-up has been obtained by using readlines(bufsize) instead of
70 readline(). A new keyword argument, bufsize=N, is present on the
71 input() function and the FileInput() class to override the default
72 buffer size.
74 XXX Possible additions:
76 - optional getopt argument processing
77 - isatty()
78 - read(), read(size), even readlines()
80 """
82 import sys, os
84 __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
85 "isfirstline", "isstdin", "FileInput"]
87 _state = None
89 DEFAULT_BUFSIZE = 8*1024
91 def input(files=None, inplace=False, backup="", bufsize=0,
92 mode="r", openhook=None):
93 """input(files=None, inplace=False, backup="", bufsize=0, \
94 mode="r", openhook=None)
96 Create an instance of the FileInput class. The instance will be used
97 as global state for the functions of this module, and is also returned
98 to use during iteration. The parameters to this function will be passed
99 along to the constructor of the FileInput class.
101 global _state
102 if _state and _state._file:
103 raise RuntimeError("input() already active")
104 _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
105 return _state
107 def close():
108 """Close the sequence."""
109 global _state
110 state = _state
111 _state = None
112 if state:
113 state.close()
115 def nextfile():
117 Close the current file so that the next iteration will read the first
118 line from the next file (if any); lines not read from the file will
119 not count towards the cumulative line count. The filename is not
120 changed until after the first line of the next file has been read.
121 Before the first line has been read, this function has no effect;
122 it cannot be used to skip the first file. After the last line of the
123 last file has been read, this function has no effect.
125 if not _state:
126 raise RuntimeError("no active input()")
127 return _state.nextfile()
129 def filename():
131 Return the name of the file currently being read.
132 Before the first line has been read, returns None.
134 if not _state:
135 raise RuntimeError("no active input()")
136 return _state.filename()
138 def lineno():
140 Return the cumulative line number of the line that has just been read.
141 Before the first line has been read, returns 0. After the last line
142 of the last file has been read, returns the line number of that line.
144 if not _state:
145 raise RuntimeError("no active input()")
146 return _state.lineno()
148 def filelineno():
150 Return the line number in the current file. Before the first line
151 has been read, returns 0. After the last line of the last file has
152 been read, returns the line number of that line within the file.
154 if not _state:
155 raise RuntimeError("no active input()")
156 return _state.filelineno()
158 def fileno():
160 Return the file number of the current file. When no file is currently
161 opened, returns -1.
163 if not _state:
164 raise RuntimeError("no active input()")
165 return _state.fileno()
167 def isfirstline():
169 Returns true the line just read is the first line of its file,
170 otherwise returns false.
172 if not _state:
173 raise RuntimeError("no active input()")
174 return _state.isfirstline()
176 def isstdin():
178 Returns true if the last line was read from sys.stdin,
179 otherwise returns false.
181 if not _state:
182 raise RuntimeError("no active input()")
183 return _state.isstdin()
185 class FileInput:
186 """class FileInput([files[, inplace[, backup[, mode[, openhook]]]]])
188 Class FileInput is the implementation of the module; its methods
189 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
190 nextfile() and close() correspond to the functions of the same name
191 in the module.
192 In addition it has a readline() method which returns the next
193 input line, and a __getitem__() method which implements the
194 sequence behavior. The sequence must be accessed in strictly
195 sequential order; random access and readline() cannot be mixed.
198 def __init__(self, files=None, inplace=False, backup="", bufsize=0,
199 mode="r", openhook=None):
200 if isinstance(files, str):
201 files = (files,)
202 else:
203 if files is None:
204 files = sys.argv[1:]
205 if not files:
206 files = ('-',)
207 else:
208 files = tuple(files)
209 self._files = files
210 self._inplace = inplace
211 self._backup = backup
212 self._bufsize = bufsize or DEFAULT_BUFSIZE
213 self._savestdout = None
214 self._output = None
215 self._filename = None
216 self._lineno = 0
217 self._filelineno = 0
218 self._file = None
219 self._isstdin = False
220 self._backupfilename = None
221 self._buffer = []
222 self._bufindex = 0
223 # restrict mode argument to reading modes
224 if mode not in ('r', 'rU', 'U', 'rb'):
225 raise ValueError("FileInput opening mode must be one of "
226 "'r', 'rU', 'U' and 'rb'")
227 self._mode = mode
228 if inplace and openhook:
229 raise ValueError("FileInput cannot use an opening hook in inplace mode")
230 elif openhook and not hasattr(openhook, '__call__'):
231 raise ValueError("FileInput openhook must be callable")
232 self._openhook = openhook
234 def __del__(self):
235 self.close()
237 def close(self):
238 self.nextfile()
239 self._files = ()
241 def __iter__(self):
242 return self
244 def __next__(self):
245 try:
246 line = self._buffer[self._bufindex]
247 except IndexError:
248 pass
249 else:
250 self._bufindex += 1
251 self._lineno += 1
252 self._filelineno += 1
253 return line
254 line = self.readline()
255 if not line:
256 raise StopIteration
257 return line
259 def __getitem__(self, i):
260 if i != self._lineno:
261 raise RuntimeError("accessing lines out of order")
262 try:
263 return self.__next__()
264 except StopIteration:
265 raise IndexError("end of input reached")
267 def nextfile(self):
268 savestdout = self._savestdout
269 self._savestdout = 0
270 if savestdout:
271 sys.stdout = savestdout
273 output = self._output
274 self._output = 0
275 if output:
276 output.close()
278 file = self._file
279 self._file = 0
280 if file and not self._isstdin:
281 file.close()
283 backupfilename = self._backupfilename
284 self._backupfilename = 0
285 if backupfilename and not self._backup:
286 try: os.unlink(backupfilename)
287 except OSError: pass
289 self._isstdin = False
290 self._buffer = []
291 self._bufindex = 0
293 def readline(self):
294 try:
295 line = self._buffer[self._bufindex]
296 except IndexError:
297 pass
298 else:
299 self._bufindex += 1
300 self._lineno += 1
301 self._filelineno += 1
302 return line
303 if not self._file:
304 if not self._files:
305 return ""
306 self._filename = self._files[0]
307 self._files = self._files[1:]
308 self._filelineno = 0
309 self._file = None
310 self._isstdin = False
311 self._backupfilename = 0
312 if self._filename == '-':
313 self._filename = '<stdin>'
314 self._file = sys.stdin
315 self._isstdin = True
316 else:
317 if self._inplace:
318 self._backupfilename = (
319 self._filename + (self._backup or ".bak"))
320 try: os.unlink(self._backupfilename)
321 except os.error: pass
322 # The next few lines may raise IOError
323 os.rename(self._filename, self._backupfilename)
324 self._file = open(self._backupfilename, self._mode)
325 try:
326 perm = os.fstat(self._file.fileno()).st_mode
327 except OSError:
328 self._output = open(self._filename, "w")
329 else:
330 mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
331 if hasattr(os, 'O_BINARY'):
332 mode |= os.O_BINARY
334 fd = os.open(self._filename, mode, perm)
335 self._output = os.fdopen(fd, "w")
336 try:
337 if hasattr(os, 'chmod'):
338 os.chmod(self._filename, perm)
339 except OSError:
340 pass
341 self._savestdout = sys.stdout
342 sys.stdout = self._output
343 else:
344 # This may raise IOError
345 if self._openhook:
346 self._file = self._openhook(self._filename, self._mode)
347 else:
348 self._file = open(self._filename, self._mode)
349 self._buffer = self._file.readlines(self._bufsize)
350 self._bufindex = 0
351 if not self._buffer:
352 self.nextfile()
353 # Recursive call
354 return self.readline()
356 def filename(self):
357 return self._filename
359 def lineno(self):
360 return self._lineno
362 def filelineno(self):
363 return self._filelineno
365 def fileno(self):
366 if self._file:
367 try:
368 return self._file.fileno()
369 except ValueError:
370 return -1
371 else:
372 return -1
374 def isfirstline(self):
375 return self._filelineno == 1
377 def isstdin(self):
378 return self._isstdin
381 def hook_compressed(filename, mode):
382 ext = os.path.splitext(filename)[1]
383 if ext == '.gz':
384 import gzip
385 return gzip.open(filename, mode)
386 elif ext == '.bz2':
387 import bz2
388 return bz2.BZ2File(filename, mode)
389 else:
390 return open(filename, mode)
393 def hook_encoded(encoding):
394 import codecs
395 def openhook(filename, mode):
396 return codecs.open(filename, mode, encoding)
397 return openhook
400 def _test():
401 import getopt
402 inplace = False
403 backup = False
404 opts, args = getopt.getopt(sys.argv[1:], "ib:")
405 for o, a in opts:
406 if o == '-i': inplace = True
407 if o == '-b': backup = a
408 for line in input(args, inplace=inplace, backup=backup):
409 if line[-1:] == '\n': line = line[:-1]
410 if line[-1:] == '\r': line = line[:-1]
411 print("%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
412 isfirstline() and "*" or "", line))
413 print("%d: %s[%d]" % (lineno(), filename(), filelineno()))
415 if __name__ == '__main__':
416 _test()