Issue #7632: Fix a serious wrong output bug for string -> float conversion.
[python.git] / Lib / _pyio.py
blob49fbe19f32659a81d9b61233d73cc9a6ae4a62cf
1 """
2 Python implementation of the io module.
3 """
5 from __future__ import print_function
6 from __future__ import unicode_literals
8 import os
9 import abc
10 import codecs
11 import warnings
12 # Import _thread instead of threading to reduce startup cost
13 try:
14 from thread import allocate_lock as Lock
15 except ImportError:
16 from dummy_thread import allocate_lock as Lock
18 import io
19 from io import __all__
20 from io import SEEK_SET, SEEK_CUR, SEEK_END
22 __metaclass__ = type
24 # open() uses st_blksize whenever we can
25 DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
27 # NOTE: Base classes defined here are registered with the "official" ABCs
28 # defined in io.py. We don't use real inheritance though, because we don't
29 # want to inherit the C implementations.
32 class BlockingIOError(IOError):
34 """Exception raised when I/O would block on a non-blocking I/O stream."""
36 def __init__(self, errno, strerror, characters_written=0):
37 super(IOError, self).__init__(errno, strerror)
38 if not isinstance(characters_written, (int, long)):
39 raise TypeError("characters_written must be a integer")
40 self.characters_written = characters_written
43 def open(file, mode="r", buffering=None,
44 encoding=None, errors=None,
45 newline=None, closefd=True):
47 r"""Open file and return a stream. Raise IOError upon failure.
49 file is either a text or byte string giving the name (and the path
50 if the file isn't in the current working directory) of the file to
51 be opened or an integer file descriptor of the file to be
52 wrapped. (If a file descriptor is given, it is closed when the
53 returned I/O object is closed, unless closefd is set to False.)
55 mode is an optional string that specifies the mode in which the file
56 is opened. It defaults to 'r' which means open for reading in text
57 mode. Other common values are 'w' for writing (truncating the file if
58 it already exists), and 'a' for appending (which on some Unix systems,
59 means that all writes append to the end of the file regardless of the
60 current seek position). In text mode, if encoding is not specified the
61 encoding used is platform dependent. (For reading and writing raw
62 bytes use binary mode and leave encoding unspecified.) The available
63 modes are:
65 ========= ===============================================================
66 Character Meaning
67 --------- ---------------------------------------------------------------
68 'r' open for reading (default)
69 'w' open for writing, truncating the file first
70 'a' open for writing, appending to the end of the file if it exists
71 'b' binary mode
72 't' text mode (default)
73 '+' open a disk file for updating (reading and writing)
74 'U' universal newline mode (for backwards compatibility; unneeded
75 for new code)
76 ========= ===============================================================
78 The default mode is 'rt' (open for reading text). For binary random
79 access, the mode 'w+b' opens and truncates the file to 0 bytes, while
80 'r+b' opens the file without truncation.
82 Python distinguishes between files opened in binary and text modes,
83 even when the underlying operating system doesn't. Files opened in
84 binary mode (appending 'b' to the mode argument) return contents as
85 bytes objects without any decoding. In text mode (the default, or when
86 't' is appended to the mode argument), the contents of the file are
87 returned as strings, the bytes having been first decoded using a
88 platform-dependent encoding or using the specified encoding if given.
90 buffering is an optional integer used to set the buffering policy.
91 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
92 line buffering (only usable in text mode), and an integer > 1 to indicate
93 the size of a fixed-size chunk buffer. When no buffering argument is
94 given, the default buffering policy works as follows:
96 * Binary files are buffered in fixed-size chunks; the size of the buffer
97 is chosen using a heuristic trying to determine the underlying device's
98 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
99 On many systems, the buffer will typically be 4096 or 8192 bytes long.
101 * "Interactive" text files (files for which isatty() returns True)
102 use line buffering. Other text files use the policy described above
103 for binary files.
105 encoding is the name of the encoding used to decode or encode the
106 file. This should only be used in text mode. The default encoding is
107 platform dependent, but any encoding supported by Python can be
108 passed. See the codecs module for the list of supported encodings.
110 errors is an optional string that specifies how encoding errors are to
111 be handled---this argument should not be used in binary mode. Pass
112 'strict' to raise a ValueError exception if there is an encoding error
113 (the default of None has the same effect), or pass 'ignore' to ignore
114 errors. (Note that ignoring encoding errors can lead to data loss.)
115 See the documentation for codecs.register for a list of the permitted
116 encoding error strings.
118 newline controls how universal newlines works (it only applies to text
119 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
120 follows:
122 * On input, if newline is None, universal newlines mode is
123 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
124 these are translated into '\n' before being returned to the
125 caller. If it is '', universal newline mode is enabled, but line
126 endings are returned to the caller untranslated. If it has any of
127 the other legal values, input lines are only terminated by the given
128 string, and the line ending is returned to the caller untranslated.
130 * On output, if newline is None, any '\n' characters written are
131 translated to the system default line separator, os.linesep. If
132 newline is '', no translation takes place. If newline is any of the
133 other legal values, any '\n' characters written are translated to
134 the given string.
136 If closefd is False, the underlying file descriptor will be kept open
137 when the file is closed. This does not work when a file name is given
138 and must be True in that case.
140 open() returns a file object whose type depends on the mode, and
141 through which the standard file operations such as reading and writing
142 are performed. When open() is used to open a file in a text mode ('w',
143 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
144 a file in a binary mode, the returned class varies: in read binary
145 mode, it returns a BufferedReader; in write binary and append binary
146 modes, it returns a BufferedWriter, and in read/write mode, it returns
147 a BufferedRandom.
149 It is also possible to use a string or bytearray as a file for both
150 reading and writing. For strings StringIO can be used like a file
151 opened in a text mode, and for bytes a BytesIO can be used like a file
152 opened in a binary mode.
154 if not isinstance(file, (basestring, int, long)):
155 raise TypeError("invalid file: %r" % file)
156 if not isinstance(mode, basestring):
157 raise TypeError("invalid mode: %r" % mode)
158 if buffering is not None and not isinstance(buffering, (int, long)):
159 raise TypeError("invalid buffering: %r" % buffering)
160 if encoding is not None and not isinstance(encoding, basestring):
161 raise TypeError("invalid encoding: %r" % encoding)
162 if errors is not None and not isinstance(errors, basestring):
163 raise TypeError("invalid errors: %r" % errors)
164 modes = set(mode)
165 if modes - set("arwb+tU") or len(mode) > len(modes):
166 raise ValueError("invalid mode: %r" % mode)
167 reading = "r" in modes
168 writing = "w" in modes
169 appending = "a" in modes
170 updating = "+" in modes
171 text = "t" in modes
172 binary = "b" in modes
173 if "U" in modes:
174 if writing or appending:
175 raise ValueError("can't use U and writing mode at once")
176 reading = True
177 if text and binary:
178 raise ValueError("can't have text and binary mode at once")
179 if reading + writing + appending > 1:
180 raise ValueError("can't have read/write/append mode at once")
181 if not (reading or writing or appending):
182 raise ValueError("must have exactly one of read/write/append mode")
183 if binary and encoding is not None:
184 raise ValueError("binary mode doesn't take an encoding argument")
185 if binary and errors is not None:
186 raise ValueError("binary mode doesn't take an errors argument")
187 if binary and newline is not None:
188 raise ValueError("binary mode doesn't take a newline argument")
189 raw = FileIO(file,
190 (reading and "r" or "") +
191 (writing and "w" or "") +
192 (appending and "a" or "") +
193 (updating and "+" or ""),
194 closefd)
195 if buffering is None:
196 buffering = -1
197 line_buffering = False
198 if buffering == 1 or buffering < 0 and raw.isatty():
199 buffering = -1
200 line_buffering = True
201 if buffering < 0:
202 buffering = DEFAULT_BUFFER_SIZE
203 try:
204 bs = os.fstat(raw.fileno()).st_blksize
205 except (os.error, AttributeError):
206 pass
207 else:
208 if bs > 1:
209 buffering = bs
210 if buffering < 0:
211 raise ValueError("invalid buffering size")
212 if buffering == 0:
213 if binary:
214 return raw
215 raise ValueError("can't have unbuffered text I/O")
216 if updating:
217 buffer = BufferedRandom(raw, buffering)
218 elif writing or appending:
219 buffer = BufferedWriter(raw, buffering)
220 elif reading:
221 buffer = BufferedReader(raw, buffering)
222 else:
223 raise ValueError("unknown mode: %r" % mode)
224 if binary:
225 return buffer
226 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
227 text.mode = mode
228 return text
231 class DocDescriptor:
232 """Helper for builtins.open.__doc__
234 def __get__(self, obj, typ):
235 return (
236 "open(file, mode='r', buffering=None, encoding=None, "
237 "errors=None, newline=None, closefd=True)\n\n" +
238 open.__doc__)
240 class OpenWrapper:
241 """Wrapper for builtins.open
243 Trick so that open won't become a bound method when stored
244 as a class variable (as dbm.dumb does).
246 See initstdio() in Python/pythonrun.c.
248 __doc__ = DocDescriptor()
250 def __new__(cls, *args, **kwargs):
251 return open(*args, **kwargs)
254 class UnsupportedOperation(ValueError, IOError):
255 pass
258 class IOBase:
259 __metaclass__ = abc.ABCMeta
261 """The abstract base class for all I/O classes, acting on streams of
262 bytes. There is no public constructor.
264 This class provides dummy implementations for many methods that
265 derived classes can override selectively; the default implementations
266 represent a file that cannot be read, written or seeked.
268 Even though IOBase does not declare read, readinto, or write because
269 their signatures will vary, implementations and clients should
270 consider those methods part of the interface. Also, implementations
271 may raise a IOError when operations they do not support are called.
273 The basic type used for binary data read from or written to a file is
274 bytes. bytearrays are accepted too, and in some cases (such as
275 readinto) needed. Text I/O classes work with str data.
277 Note that calling any method (even inquiries) on a closed stream is
278 undefined. Implementations may raise IOError in this case.
280 IOBase (and its subclasses) support the iterator protocol, meaning
281 that an IOBase object can be iterated over yielding the lines in a
282 stream.
284 IOBase also supports the :keyword:`with` statement. In this example,
285 fp is closed after the suite of the with statement is complete:
287 with open('spam.txt', 'r') as fp:
288 fp.write('Spam and eggs!')
291 ### Internal ###
293 def _unsupported(self, name):
294 """Internal: raise an exception for unsupported operations."""
295 raise UnsupportedOperation("%s.%s() not supported" %
296 (self.__class__.__name__, name))
298 ### Positioning ###
300 def seek(self, pos, whence=0):
301 """Change stream position.
303 Change the stream position to byte offset offset. offset is
304 interpreted relative to the position indicated by whence. Values
305 for whence are:
307 * 0 -- start of stream (the default); offset should be zero or positive
308 * 1 -- current stream position; offset may be negative
309 * 2 -- end of stream; offset is usually negative
311 Return the new absolute position.
313 self._unsupported("seek")
315 def tell(self):
316 """Return current stream position."""
317 return self.seek(0, 1)
319 def truncate(self, pos=None):
320 """Truncate file to size bytes.
322 Size defaults to the current IO position as reported by tell(). Return
323 the new size.
325 self._unsupported("truncate")
327 ### Flush and close ###
329 def flush(self):
330 """Flush write buffers, if applicable.
332 This is not implemented for read-only and non-blocking streams.
334 # XXX Should this return the number of bytes written???
336 __closed = False
338 def close(self):
339 """Flush and close the IO object.
341 This method has no effect if the file is already closed.
343 if not self.__closed:
344 try:
345 self.flush()
346 except IOError:
347 pass # If flush() fails, just give up
348 self.__closed = True
350 def __del__(self):
351 """Destructor. Calls close()."""
352 # The try/except block is in case this is called at program
353 # exit time, when it's possible that globals have already been
354 # deleted, and then the close() call might fail. Since
355 # there's nothing we can do about such failures and they annoy
356 # the end users, we suppress the traceback.
357 try:
358 self.close()
359 except:
360 pass
362 ### Inquiries ###
364 def seekable(self):
365 """Return whether object supports random access.
367 If False, seek(), tell() and truncate() will raise IOError.
368 This method may need to do a test seek().
370 return False
372 def _checkSeekable(self, msg=None):
373 """Internal: raise an IOError if file is not seekable
375 if not self.seekable():
376 raise IOError("File or stream is not seekable."
377 if msg is None else msg)
380 def readable(self):
381 """Return whether object was opened for reading.
383 If False, read() will raise IOError.
385 return False
387 def _checkReadable(self, msg=None):
388 """Internal: raise an IOError if file is not readable
390 if not self.readable():
391 raise IOError("File or stream is not readable."
392 if msg is None else msg)
394 def writable(self):
395 """Return whether object was opened for writing.
397 If False, write() and truncate() will raise IOError.
399 return False
401 def _checkWritable(self, msg=None):
402 """Internal: raise an IOError if file is not writable
404 if not self.writable():
405 raise IOError("File or stream is not writable."
406 if msg is None else msg)
408 @property
409 def closed(self):
410 """closed: bool. True iff the file has been closed.
412 For backwards compatibility, this is a property, not a predicate.
414 return self.__closed
416 def _checkClosed(self, msg=None):
417 """Internal: raise an ValueError if file is closed
419 if self.closed:
420 raise ValueError("I/O operation on closed file."
421 if msg is None else msg)
423 ### Context manager ###
425 def __enter__(self):
426 """Context management protocol. Returns self."""
427 self._checkClosed()
428 return self
430 def __exit__(self, *args):
431 """Context management protocol. Calls close()"""
432 self.close()
434 ### Lower-level APIs ###
436 # XXX Should these be present even if unimplemented?
438 def fileno(self):
439 """Returns underlying file descriptor if one exists.
441 An IOError is raised if the IO object does not use a file descriptor.
443 self._unsupported("fileno")
445 def isatty(self):
446 """Return whether this is an 'interactive' stream.
448 Return False if it can't be determined.
450 self._checkClosed()
451 return False
453 ### Readline[s] and writelines ###
455 def readline(self, limit=-1):
456 r"""Read and return a line from the stream.
458 If limit is specified, at most limit bytes will be read.
460 The line terminator is always b'\n' for binary files; for text
461 files, the newlines argument to open can be used to select the line
462 terminator(s) recognized.
464 # For backwards compatibility, a (slowish) readline().
465 if hasattr(self, "peek"):
466 def nreadahead():
467 readahead = self.peek(1)
468 if not readahead:
469 return 1
470 n = (readahead.find(b"\n") + 1) or len(readahead)
471 if limit >= 0:
472 n = min(n, limit)
473 return n
474 else:
475 def nreadahead():
476 return 1
477 if limit is None:
478 limit = -1
479 elif not isinstance(limit, (int, long)):
480 raise TypeError("limit must be an integer")
481 res = bytearray()
482 while limit < 0 or len(res) < limit:
483 b = self.read(nreadahead())
484 if not b:
485 break
486 res += b
487 if res.endswith(b"\n"):
488 break
489 return bytes(res)
491 def __iter__(self):
492 self._checkClosed()
493 return self
495 def next(self):
496 line = self.readline()
497 if not line:
498 raise StopIteration
499 return line
501 def readlines(self, hint=None):
502 """Return a list of lines from the stream.
504 hint can be specified to control the number of lines read: no more
505 lines will be read if the total size (in bytes/characters) of all
506 lines so far exceeds hint.
508 if hint is not None and not isinstance(hint, (int, long)):
509 raise TypeError("integer or None expected")
510 if hint is None or hint <= 0:
511 return list(self)
512 n = 0
513 lines = []
514 for line in self:
515 lines.append(line)
516 n += len(line)
517 if n >= hint:
518 break
519 return lines
521 def writelines(self, lines):
522 self._checkClosed()
523 for line in lines:
524 self.write(line)
526 io.IOBase.register(IOBase)
529 class RawIOBase(IOBase):
531 """Base class for raw binary I/O."""
533 # The read() method is implemented by calling readinto(); derived
534 # classes that want to support read() only need to implement
535 # readinto() as a primitive operation. In general, readinto() can be
536 # more efficient than read().
538 # (It would be tempting to also provide an implementation of
539 # readinto() in terms of read(), in case the latter is a more suitable
540 # primitive operation, but that would lead to nasty recursion in case
541 # a subclass doesn't implement either.)
543 def read(self, n=-1):
544 """Read and return up to n bytes.
546 Returns an empty bytes object on EOF, or None if the object is
547 set not to block and has no data to read.
549 if n is None:
550 n = -1
551 if n < 0:
552 return self.readall()
553 b = bytearray(n.__index__())
554 n = self.readinto(b)
555 del b[n:]
556 return bytes(b)
558 def readall(self):
559 """Read until EOF, using multiple read() call."""
560 res = bytearray()
561 while True:
562 data = self.read(DEFAULT_BUFFER_SIZE)
563 if not data:
564 break
565 res += data
566 return bytes(res)
568 def readinto(self, b):
569 """Read up to len(b) bytes into b.
571 Returns number of bytes read (0 for EOF), or None if the object
572 is set not to block as has no data to read.
574 self._unsupported("readinto")
576 def write(self, b):
577 """Write the given buffer to the IO stream.
579 Returns the number of bytes written, which may be less than len(b).
581 self._unsupported("write")
583 io.RawIOBase.register(RawIOBase)
584 from _io import FileIO
585 RawIOBase.register(FileIO)
588 class BufferedIOBase(IOBase):
590 """Base class for buffered IO objects.
592 The main difference with RawIOBase is that the read() method
593 supports omitting the size argument, and does not have a default
594 implementation that defers to readinto().
596 In addition, read(), readinto() and write() may raise
597 BlockingIOError if the underlying raw stream is in non-blocking
598 mode and not ready; unlike their raw counterparts, they will never
599 return None.
601 A typical implementation should not inherit from a RawIOBase
602 implementation, but wrap one.
605 def read(self, n=None):
606 """Read and return up to n bytes.
608 If the argument is omitted, None, or negative, reads and
609 returns all data until EOF.
611 If the argument is positive, and the underlying raw stream is
612 not 'interactive', multiple raw reads may be issued to satisfy
613 the byte count (unless EOF is reached first). But for
614 interactive raw streams (XXX and for pipes?), at most one raw
615 read will be issued, and a short result does not imply that
616 EOF is imminent.
618 Returns an empty bytes array on EOF.
620 Raises BlockingIOError if the underlying raw stream has no
621 data at the moment.
623 self._unsupported("read")
625 def read1(self, n=None):
626 """Read up to n bytes with at most one read() system call."""
627 self._unsupported("read1")
629 def readinto(self, b):
630 """Read up to len(b) bytes into b.
632 Like read(), this may issue multiple reads to the underlying raw
633 stream, unless the latter is 'interactive'.
635 Returns the number of bytes read (0 for EOF).
637 Raises BlockingIOError if the underlying raw stream has no
638 data at the moment.
640 # XXX This ought to work with anything that supports the buffer API
641 data = self.read(len(b))
642 n = len(data)
643 try:
644 b[:n] = data
645 except TypeError as err:
646 import array
647 if not isinstance(b, array.array):
648 raise err
649 b[:n] = array.array(b'b', data)
650 return n
652 def write(self, b):
653 """Write the given buffer to the IO stream.
655 Return the number of bytes written, which is never less than
656 len(b).
658 Raises BlockingIOError if the buffer is full and the
659 underlying raw stream cannot accept more data at the moment.
661 self._unsupported("write")
663 def detach(self):
665 Separate the underlying raw stream from the buffer and return it.
667 After the raw stream has been detached, the buffer is in an unusable
668 state.
670 self._unsupported("detach")
672 io.BufferedIOBase.register(BufferedIOBase)
675 class _BufferedIOMixin(BufferedIOBase):
677 """A mixin implementation of BufferedIOBase with an underlying raw stream.
679 This passes most requests on to the underlying raw stream. It
680 does *not* provide implementations of read(), readinto() or
681 write().
684 def __init__(self, raw):
685 self.raw = raw
687 ### Positioning ###
689 def seek(self, pos, whence=0):
690 new_position = self.raw.seek(pos, whence)
691 if new_position < 0:
692 raise IOError("seek() returned an invalid position")
693 return new_position
695 def tell(self):
696 pos = self.raw.tell()
697 if pos < 0:
698 raise IOError("tell() returned an invalid position")
699 return pos
701 def truncate(self, pos=None):
702 # Flush the stream. We're mixing buffered I/O with lower-level I/O,
703 # and a flush may be necessary to synch both views of the current
704 # file state.
705 self.flush()
707 if pos is None:
708 pos = self.tell()
709 # XXX: Should seek() be used, instead of passing the position
710 # XXX directly to truncate?
711 return self.raw.truncate(pos)
713 ### Flush and close ###
715 def flush(self):
716 self.raw.flush()
718 def close(self):
719 if not self.closed and self.raw is not None:
720 try:
721 self.flush()
722 except IOError:
723 pass # If flush() fails, just give up
724 self.raw.close()
726 def detach(self):
727 if self.raw is None:
728 raise ValueError("raw stream already detached")
729 self.flush()
730 raw = self.raw
731 self.raw = None
732 return raw
734 ### Inquiries ###
736 def seekable(self):
737 return self.raw.seekable()
739 def readable(self):
740 return self.raw.readable()
742 def writable(self):
743 return self.raw.writable()
745 @property
746 def closed(self):
747 return self.raw.closed
749 @property
750 def name(self):
751 return self.raw.name
753 @property
754 def mode(self):
755 return self.raw.mode
757 def __repr__(self):
758 clsname = self.__class__.__name__
759 try:
760 name = self.name
761 except AttributeError:
762 return "<_pyio.{0}>".format(clsname)
763 else:
764 return "<_pyio.{0} name={1!r}>".format(clsname, name)
766 ### Lower-level APIs ###
768 def fileno(self):
769 return self.raw.fileno()
771 def isatty(self):
772 return self.raw.isatty()
775 class BytesIO(BufferedIOBase):
777 """Buffered I/O implementation using an in-memory bytes buffer."""
779 def __init__(self, initial_bytes=None):
780 buf = bytearray()
781 if initial_bytes is not None:
782 buf.extend(initial_bytes)
783 self._buffer = buf
784 self._pos = 0
786 def __getstate__(self):
787 if self.closed:
788 raise ValueError("__getstate__ on closed file")
789 return self.__dict__.copy()
791 def getvalue(self):
792 """Return the bytes value (contents) of the buffer
794 if self.closed:
795 raise ValueError("getvalue on closed file")
796 return bytes(self._buffer)
798 def read(self, n=None):
799 if self.closed:
800 raise ValueError("read from closed file")
801 if n is None:
802 n = -1
803 if not isinstance(n, (int, long)):
804 raise TypeError("integer argument expected, got {0!r}".format(
805 type(n)))
806 if n < 0:
807 n = len(self._buffer)
808 if len(self._buffer) <= self._pos:
809 return b""
810 newpos = min(len(self._buffer), self._pos + n)
811 b = self._buffer[self._pos : newpos]
812 self._pos = newpos
813 return bytes(b)
815 def read1(self, n):
816 """This is the same as read.
818 return self.read(n)
820 def write(self, b):
821 if self.closed:
822 raise ValueError("write to closed file")
823 if isinstance(b, unicode):
824 raise TypeError("can't write unicode to binary stream")
825 n = len(b)
826 if n == 0:
827 return 0
828 pos = self._pos
829 if pos > len(self._buffer):
830 # Inserts null bytes between the current end of the file
831 # and the new write position.
832 padding = b'\x00' * (pos - len(self._buffer))
833 self._buffer += padding
834 self._buffer[pos:pos + n] = b
835 self._pos += n
836 return n
838 def seek(self, pos, whence=0):
839 if self.closed:
840 raise ValueError("seek on closed file")
841 try:
842 pos = pos.__index__()
843 except AttributeError as err:
844 raise TypeError("an integer is required")
845 if whence == 0:
846 if pos < 0:
847 raise ValueError("negative seek position %r" % (pos,))
848 self._pos = pos
849 elif whence == 1:
850 self._pos = max(0, self._pos + pos)
851 elif whence == 2:
852 self._pos = max(0, len(self._buffer) + pos)
853 else:
854 raise ValueError("invalid whence value")
855 return self._pos
857 def tell(self):
858 if self.closed:
859 raise ValueError("tell on closed file")
860 return self._pos
862 def truncate(self, pos=None):
863 if self.closed:
864 raise ValueError("truncate on closed file")
865 if pos is None:
866 pos = self._pos
867 elif pos < 0:
868 raise ValueError("negative truncate position %r" % (pos,))
869 del self._buffer[pos:]
870 return self.seek(pos)
872 def readable(self):
873 return True
875 def writable(self):
876 return True
878 def seekable(self):
879 return True
882 class BufferedReader(_BufferedIOMixin):
884 """BufferedReader(raw[, buffer_size])
886 A buffer for a readable, sequential BaseRawIO object.
888 The constructor creates a BufferedReader for the given readable raw
889 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
890 is used.
893 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
894 """Create a new buffered reader using the given readable raw IO object.
896 if not raw.readable():
897 raise IOError('"raw" argument must be readable.')
899 _BufferedIOMixin.__init__(self, raw)
900 if buffer_size <= 0:
901 raise ValueError("invalid buffer size")
902 self.buffer_size = buffer_size
903 self._reset_read_buf()
904 self._read_lock = Lock()
906 def _reset_read_buf(self):
907 self._read_buf = b""
908 self._read_pos = 0
910 def read(self, n=None):
911 """Read n bytes.
913 Returns exactly n bytes of data unless the underlying raw IO
914 stream reaches EOF or if the call would block in non-blocking
915 mode. If n is negative, read until EOF or until read() would
916 block.
918 if n is not None and n < -1:
919 raise ValueError("invalid number of bytes to read")
920 with self._read_lock:
921 return self._read_unlocked(n)
923 def _read_unlocked(self, n=None):
924 nodata_val = b""
925 empty_values = (b"", None)
926 buf = self._read_buf
927 pos = self._read_pos
929 # Special case for when the number of bytes to read is unspecified.
930 if n is None or n == -1:
931 self._reset_read_buf()
932 chunks = [buf[pos:]] # Strip the consumed bytes.
933 current_size = 0
934 while True:
935 # Read until EOF or until read() would block.
936 chunk = self.raw.read()
937 if chunk in empty_values:
938 nodata_val = chunk
939 break
940 current_size += len(chunk)
941 chunks.append(chunk)
942 return b"".join(chunks) or nodata_val
944 # The number of bytes to read is specified, return at most n bytes.
945 avail = len(buf) - pos # Length of the available buffered data.
946 if n <= avail:
947 # Fast path: the data to read is fully buffered.
948 self._read_pos += n
949 return buf[pos:pos+n]
950 # Slow path: read from the stream until enough bytes are read,
951 # or until an EOF occurs or until read() would block.
952 chunks = [buf[pos:]]
953 wanted = max(self.buffer_size, n)
954 while avail < n:
955 chunk = self.raw.read(wanted)
956 if chunk in empty_values:
957 nodata_val = chunk
958 break
959 avail += len(chunk)
960 chunks.append(chunk)
961 # n is more then avail only when an EOF occurred or when
962 # read() would have blocked.
963 n = min(n, avail)
964 out = b"".join(chunks)
965 self._read_buf = out[n:] # Save the extra data in the buffer.
966 self._read_pos = 0
967 return out[:n] if out else nodata_val
969 def peek(self, n=0):
970 """Returns buffered bytes without advancing the position.
972 The argument indicates a desired minimal number of bytes; we
973 do at most one raw read to satisfy it. We never return more
974 than self.buffer_size.
976 with self._read_lock:
977 return self._peek_unlocked(n)
979 def _peek_unlocked(self, n=0):
980 want = min(n, self.buffer_size)
981 have = len(self._read_buf) - self._read_pos
982 if have < want or have <= 0:
983 to_read = self.buffer_size - have
984 current = self.raw.read(to_read)
985 if current:
986 self._read_buf = self._read_buf[self._read_pos:] + current
987 self._read_pos = 0
988 return self._read_buf[self._read_pos:]
990 def read1(self, n):
991 """Reads up to n bytes, with at most one read() system call."""
992 # Returns up to n bytes. If at least one byte is buffered, we
993 # only return buffered bytes. Otherwise, we do one raw read.
994 if n < 0:
995 raise ValueError("number of bytes to read must be positive")
996 if n == 0:
997 return b""
998 with self._read_lock:
999 self._peek_unlocked(1)
1000 return self._read_unlocked(
1001 min(n, len(self._read_buf) - self._read_pos))
1003 def tell(self):
1004 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1006 def seek(self, pos, whence=0):
1007 if not (0 <= whence <= 2):
1008 raise ValueError("invalid whence value")
1009 with self._read_lock:
1010 if whence == 1:
1011 pos -= len(self._read_buf) - self._read_pos
1012 pos = _BufferedIOMixin.seek(self, pos, whence)
1013 self._reset_read_buf()
1014 return pos
1016 class BufferedWriter(_BufferedIOMixin):
1018 """A buffer for a writeable sequential RawIO object.
1020 The constructor creates a BufferedWriter for the given writeable raw
1021 stream. If the buffer_size is not given, it defaults to
1022 DEFAULT_BUFFER_SIZE.
1025 _warning_stack_offset = 2
1027 def __init__(self, raw,
1028 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1029 if not raw.writable():
1030 raise IOError('"raw" argument must be writable.')
1032 _BufferedIOMixin.__init__(self, raw)
1033 if buffer_size <= 0:
1034 raise ValueError("invalid buffer size")
1035 if max_buffer_size is not None:
1036 warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1037 self._warning_stack_offset)
1038 self.buffer_size = buffer_size
1039 self._write_buf = bytearray()
1040 self._write_lock = Lock()
1042 def write(self, b):
1043 if self.closed:
1044 raise ValueError("write to closed file")
1045 if isinstance(b, unicode):
1046 raise TypeError("can't write unicode to binary stream")
1047 with self._write_lock:
1048 # XXX we can implement some more tricks to try and avoid
1049 # partial writes
1050 if len(self._write_buf) > self.buffer_size:
1051 # We're full, so let's pre-flush the buffer
1052 try:
1053 self._flush_unlocked()
1054 except BlockingIOError as e:
1055 # We can't accept anything else.
1056 # XXX Why not just let the exception pass through?
1057 raise BlockingIOError(e.errno, e.strerror, 0)
1058 before = len(self._write_buf)
1059 self._write_buf.extend(b)
1060 written = len(self._write_buf) - before
1061 if len(self._write_buf) > self.buffer_size:
1062 try:
1063 self._flush_unlocked()
1064 except BlockingIOError as e:
1065 if len(self._write_buf) > self.buffer_size:
1066 # We've hit the buffer_size. We have to accept a partial
1067 # write and cut back our buffer.
1068 overage = len(self._write_buf) - self.buffer_size
1069 written -= overage
1070 self._write_buf = self._write_buf[:self.buffer_size]
1071 raise BlockingIOError(e.errno, e.strerror, written)
1072 return written
1074 def truncate(self, pos=None):
1075 with self._write_lock:
1076 self._flush_unlocked()
1077 if pos is None:
1078 pos = self.raw.tell()
1079 return self.raw.truncate(pos)
1081 def flush(self):
1082 with self._write_lock:
1083 self._flush_unlocked()
1085 def _flush_unlocked(self):
1086 if self.closed:
1087 raise ValueError("flush of closed file")
1088 written = 0
1089 try:
1090 while self._write_buf:
1091 n = self.raw.write(self._write_buf)
1092 if n > len(self._write_buf) or n < 0:
1093 raise IOError("write() returned incorrect number of bytes")
1094 del self._write_buf[:n]
1095 written += n
1096 except BlockingIOError as e:
1097 n = e.characters_written
1098 del self._write_buf[:n]
1099 written += n
1100 raise BlockingIOError(e.errno, e.strerror, written)
1102 def tell(self):
1103 return _BufferedIOMixin.tell(self) + len(self._write_buf)
1105 def seek(self, pos, whence=0):
1106 if not (0 <= whence <= 2):
1107 raise ValueError("invalid whence")
1108 with self._write_lock:
1109 self._flush_unlocked()
1110 return _BufferedIOMixin.seek(self, pos, whence)
1113 class BufferedRWPair(BufferedIOBase):
1115 """A buffered reader and writer object together.
1117 A buffered reader object and buffered writer object put together to
1118 form a sequential IO object that can read and write. This is typically
1119 used with a socket or two-way pipe.
1121 reader and writer are RawIOBase objects that are readable and
1122 writeable respectively. If the buffer_size is omitted it defaults to
1123 DEFAULT_BUFFER_SIZE.
1126 # XXX The usefulness of this (compared to having two separate IO
1127 # objects) is questionable.
1129 def __init__(self, reader, writer,
1130 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1131 """Constructor.
1133 The arguments are two RawIO instances.
1135 if max_buffer_size is not None:
1136 warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1138 if not reader.readable():
1139 raise IOError('"reader" argument must be readable.')
1141 if not writer.writable():
1142 raise IOError('"writer" argument must be writable.')
1144 self.reader = BufferedReader(reader, buffer_size)
1145 self.writer = BufferedWriter(writer, buffer_size)
1147 def read(self, n=None):
1148 if n is None:
1149 n = -1
1150 return self.reader.read(n)
1152 def readinto(self, b):
1153 return self.reader.readinto(b)
1155 def write(self, b):
1156 return self.writer.write(b)
1158 def peek(self, n=0):
1159 return self.reader.peek(n)
1161 def read1(self, n):
1162 return self.reader.read1(n)
1164 def readable(self):
1165 return self.reader.readable()
1167 def writable(self):
1168 return self.writer.writable()
1170 def flush(self):
1171 return self.writer.flush()
1173 def close(self):
1174 self.writer.close()
1175 self.reader.close()
1177 def isatty(self):
1178 return self.reader.isatty() or self.writer.isatty()
1180 @property
1181 def closed(self):
1182 return self.writer.closed
1185 class BufferedRandom(BufferedWriter, BufferedReader):
1187 """A buffered interface to random access streams.
1189 The constructor creates a reader and writer for a seekable stream,
1190 raw, given in the first argument. If the buffer_size is omitted it
1191 defaults to DEFAULT_BUFFER_SIZE.
1194 _warning_stack_offset = 3
1196 def __init__(self, raw,
1197 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1198 raw._checkSeekable()
1199 BufferedReader.__init__(self, raw, buffer_size)
1200 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1202 def seek(self, pos, whence=0):
1203 if not (0 <= whence <= 2):
1204 raise ValueError("invalid whence")
1205 self.flush()
1206 if self._read_buf:
1207 # Undo read ahead.
1208 with self._read_lock:
1209 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1210 # First do the raw seek, then empty the read buffer, so that
1211 # if the raw seek fails, we don't lose buffered data forever.
1212 pos = self.raw.seek(pos, whence)
1213 with self._read_lock:
1214 self._reset_read_buf()
1215 if pos < 0:
1216 raise IOError("seek() returned invalid position")
1217 return pos
1219 def tell(self):
1220 if self._write_buf:
1221 return BufferedWriter.tell(self)
1222 else:
1223 return BufferedReader.tell(self)
1225 def truncate(self, pos=None):
1226 if pos is None:
1227 pos = self.tell()
1228 # Use seek to flush the read buffer.
1229 self.seek(pos)
1230 return BufferedWriter.truncate(self)
1232 def read(self, n=None):
1233 if n is None:
1234 n = -1
1235 self.flush()
1236 return BufferedReader.read(self, n)
1238 def readinto(self, b):
1239 self.flush()
1240 return BufferedReader.readinto(self, b)
1242 def peek(self, n=0):
1243 self.flush()
1244 return BufferedReader.peek(self, n)
1246 def read1(self, n):
1247 self.flush()
1248 return BufferedReader.read1(self, n)
1250 def write(self, b):
1251 if self._read_buf:
1252 # Undo readahead
1253 with self._read_lock:
1254 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1255 self._reset_read_buf()
1256 return BufferedWriter.write(self, b)
1259 class TextIOBase(IOBase):
1261 """Base class for text I/O.
1263 This class provides a character and line based interface to stream
1264 I/O. There is no readinto method because Python's character strings
1265 are immutable. There is no public constructor.
1268 def read(self, n=-1):
1269 """Read at most n characters from stream.
1271 Read from underlying buffer until we have n characters or we hit EOF.
1272 If n is negative or omitted, read until EOF.
1274 self._unsupported("read")
1276 def write(self, s):
1277 """Write string s to stream."""
1278 self._unsupported("write")
1280 def truncate(self, pos=None):
1281 """Truncate size to pos."""
1282 self._unsupported("truncate")
1284 def readline(self):
1285 """Read until newline or EOF.
1287 Returns an empty string if EOF is hit immediately.
1289 self._unsupported("readline")
1291 def detach(self):
1293 Separate the underlying buffer from the TextIOBase and return it.
1295 After the underlying buffer has been detached, the TextIO is in an
1296 unusable state.
1298 self._unsupported("detach")
1300 @property
1301 def encoding(self):
1302 """Subclasses should override."""
1303 return None
1305 @property
1306 def newlines(self):
1307 """Line endings translated so far.
1309 Only line endings translated during reading are considered.
1311 Subclasses should override.
1313 return None
1315 @property
1316 def errors(self):
1317 """Error setting of the decoder or encoder.
1319 Subclasses should override."""
1320 return None
1322 io.TextIOBase.register(TextIOBase)
1325 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1326 r"""Codec used when reading a file in universal newlines mode. It wraps
1327 another incremental decoder, translating \r\n and \r into \n. It also
1328 records the types of newlines encountered. When used with
1329 translate=False, it ensures that the newline sequence is returned in
1330 one piece.
1332 def __init__(self, decoder, translate, errors='strict'):
1333 codecs.IncrementalDecoder.__init__(self, errors=errors)
1334 self.translate = translate
1335 self.decoder = decoder
1336 self.seennl = 0
1337 self.pendingcr = False
1339 def decode(self, input, final=False):
1340 # decode input (with the eventual \r from a previous pass)
1341 if self.decoder is None:
1342 output = input
1343 else:
1344 output = self.decoder.decode(input, final=final)
1345 if self.pendingcr and (output or final):
1346 output = "\r" + output
1347 self.pendingcr = False
1349 # retain last \r even when not translating data:
1350 # then readline() is sure to get \r\n in one pass
1351 if output.endswith("\r") and not final:
1352 output = output[:-1]
1353 self.pendingcr = True
1355 # Record which newlines are read
1356 crlf = output.count('\r\n')
1357 cr = output.count('\r') - crlf
1358 lf = output.count('\n') - crlf
1359 self.seennl |= (lf and self._LF) | (cr and self._CR) \
1360 | (crlf and self._CRLF)
1362 if self.translate:
1363 if crlf:
1364 output = output.replace("\r\n", "\n")
1365 if cr:
1366 output = output.replace("\r", "\n")
1368 return output
1370 def getstate(self):
1371 if self.decoder is None:
1372 buf = b""
1373 flag = 0
1374 else:
1375 buf, flag = self.decoder.getstate()
1376 flag <<= 1
1377 if self.pendingcr:
1378 flag |= 1
1379 return buf, flag
1381 def setstate(self, state):
1382 buf, flag = state
1383 self.pendingcr = bool(flag & 1)
1384 if self.decoder is not None:
1385 self.decoder.setstate((buf, flag >> 1))
1387 def reset(self):
1388 self.seennl = 0
1389 self.pendingcr = False
1390 if self.decoder is not None:
1391 self.decoder.reset()
1393 _LF = 1
1394 _CR = 2
1395 _CRLF = 4
1397 @property
1398 def newlines(self):
1399 return (None,
1400 "\n",
1401 "\r",
1402 ("\r", "\n"),
1403 "\r\n",
1404 ("\n", "\r\n"),
1405 ("\r", "\r\n"),
1406 ("\r", "\n", "\r\n")
1407 )[self.seennl]
1410 class TextIOWrapper(TextIOBase):
1412 r"""Character and line based layer over a BufferedIOBase object, buffer.
1414 encoding gives the name of the encoding that the stream will be
1415 decoded or encoded with. It defaults to locale.getpreferredencoding.
1417 errors determines the strictness of encoding and decoding (see the
1418 codecs.register) and defaults to "strict".
1420 newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1421 handling of line endings. If it is None, universal newlines is
1422 enabled. With this enabled, on input, the lines endings '\n', '\r',
1423 or '\r\n' are translated to '\n' before being returned to the
1424 caller. Conversely, on output, '\n' is translated to the system
1425 default line seperator, os.linesep. If newline is any other of its
1426 legal values, that newline becomes the newline when the file is read
1427 and it is returned untranslated. On output, '\n' is converted to the
1428 newline.
1430 If line_buffering is True, a call to flush is implied when a call to
1431 write contains a newline character.
1434 _CHUNK_SIZE = 2048
1436 def __init__(self, buffer, encoding=None, errors=None, newline=None,
1437 line_buffering=False):
1438 if newline is not None and not isinstance(newline, basestring):
1439 raise TypeError("illegal newline type: %r" % (type(newline),))
1440 if newline not in (None, "", "\n", "\r", "\r\n"):
1441 raise ValueError("illegal newline value: %r" % (newline,))
1442 if encoding is None:
1443 try:
1444 encoding = os.device_encoding(buffer.fileno())
1445 except (AttributeError, UnsupportedOperation):
1446 pass
1447 if encoding is None:
1448 try:
1449 import locale
1450 except ImportError:
1451 # Importing locale may fail if Python is being built
1452 encoding = "ascii"
1453 else:
1454 encoding = locale.getpreferredencoding()
1456 if not isinstance(encoding, basestring):
1457 raise ValueError("invalid encoding: %r" % encoding)
1459 if errors is None:
1460 errors = "strict"
1461 else:
1462 if not isinstance(errors, basestring):
1463 raise ValueError("invalid errors: %r" % errors)
1465 self.buffer = buffer
1466 self._line_buffering = line_buffering
1467 self._encoding = encoding
1468 self._errors = errors
1469 self._readuniversal = not newline
1470 self._readtranslate = newline is None
1471 self._readnl = newline
1472 self._writetranslate = newline != ''
1473 self._writenl = newline or os.linesep
1474 self._encoder = None
1475 self._decoder = None
1476 self._decoded_chars = '' # buffer for text returned from decoder
1477 self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1478 self._snapshot = None # info for reconstructing decoder state
1479 self._seekable = self._telling = self.buffer.seekable()
1481 if self._seekable and self.writable():
1482 position = self.buffer.tell()
1483 if position != 0:
1484 try:
1485 self._get_encoder().setstate(0)
1486 except LookupError:
1487 # Sometimes the encoder doesn't exist
1488 pass
1490 # self._snapshot is either None, or a tuple (dec_flags, next_input)
1491 # where dec_flags is the second (integer) item of the decoder state
1492 # and next_input is the chunk of input bytes that comes next after the
1493 # snapshot point. We use this to reconstruct decoder states in tell().
1495 # Naming convention:
1496 # - "bytes_..." for integer variables that count input bytes
1497 # - "chars_..." for integer variables that count decoded characters
1499 def __repr__(self):
1500 try:
1501 name = self.name
1502 except AttributeError:
1503 return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1504 else:
1505 return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1506 name, self.encoding)
1508 @property
1509 def encoding(self):
1510 return self._encoding
1512 @property
1513 def errors(self):
1514 return self._errors
1516 @property
1517 def line_buffering(self):
1518 return self._line_buffering
1520 def seekable(self):
1521 return self._seekable
1523 def readable(self):
1524 return self.buffer.readable()
1526 def writable(self):
1527 return self.buffer.writable()
1529 def flush(self):
1530 self.buffer.flush()
1531 self._telling = self._seekable
1533 def close(self):
1534 if self.buffer is not None:
1535 try:
1536 self.flush()
1537 except IOError:
1538 pass # If flush() fails, just give up
1539 self.buffer.close()
1541 @property
1542 def closed(self):
1543 return self.buffer.closed
1545 @property
1546 def name(self):
1547 return self.buffer.name
1549 def fileno(self):
1550 return self.buffer.fileno()
1552 def isatty(self):
1553 return self.buffer.isatty()
1555 def write(self, s):
1556 if self.closed:
1557 raise ValueError("write to closed file")
1558 if not isinstance(s, unicode):
1559 raise TypeError("can't write %s to text stream" %
1560 s.__class__.__name__)
1561 length = len(s)
1562 haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1563 if haslf and self._writetranslate and self._writenl != "\n":
1564 s = s.replace("\n", self._writenl)
1565 encoder = self._encoder or self._get_encoder()
1566 # XXX What if we were just reading?
1567 b = encoder.encode(s)
1568 self.buffer.write(b)
1569 if self._line_buffering and (haslf or "\r" in s):
1570 self.flush()
1571 self._snapshot = None
1572 if self._decoder:
1573 self._decoder.reset()
1574 return length
1576 def _get_encoder(self):
1577 make_encoder = codecs.getincrementalencoder(self._encoding)
1578 self._encoder = make_encoder(self._errors)
1579 return self._encoder
1581 def _get_decoder(self):
1582 make_decoder = codecs.getincrementaldecoder(self._encoding)
1583 decoder = make_decoder(self._errors)
1584 if self._readuniversal:
1585 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1586 self._decoder = decoder
1587 return decoder
1589 # The following three methods implement an ADT for _decoded_chars.
1590 # Text returned from the decoder is buffered here until the client
1591 # requests it by calling our read() or readline() method.
1592 def _set_decoded_chars(self, chars):
1593 """Set the _decoded_chars buffer."""
1594 self._decoded_chars = chars
1595 self._decoded_chars_used = 0
1597 def _get_decoded_chars(self, n=None):
1598 """Advance into the _decoded_chars buffer."""
1599 offset = self._decoded_chars_used
1600 if n is None:
1601 chars = self._decoded_chars[offset:]
1602 else:
1603 chars = self._decoded_chars[offset:offset + n]
1604 self._decoded_chars_used += len(chars)
1605 return chars
1607 def _rewind_decoded_chars(self, n):
1608 """Rewind the _decoded_chars buffer."""
1609 if self._decoded_chars_used < n:
1610 raise AssertionError("rewind decoded_chars out of bounds")
1611 self._decoded_chars_used -= n
1613 def _read_chunk(self):
1615 Read and decode the next chunk of data from the BufferedReader.
1618 # The return value is True unless EOF was reached. The decoded
1619 # string is placed in self._decoded_chars (replacing its previous
1620 # value). The entire input chunk is sent to the decoder, though
1621 # some of it may remain buffered in the decoder, yet to be
1622 # converted.
1624 if self._decoder is None:
1625 raise ValueError("no decoder")
1627 if self._telling:
1628 # To prepare for tell(), we need to snapshot a point in the
1629 # file where the decoder's input buffer is empty.
1631 dec_buffer, dec_flags = self._decoder.getstate()
1632 # Given this, we know there was a valid snapshot point
1633 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1635 # Read a chunk, decode it, and put the result in self._decoded_chars.
1636 input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1637 eof = not input_chunk
1638 self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1640 if self._telling:
1641 # At the snapshot point, len(dec_buffer) bytes before the read,
1642 # the next input to be decoded is dec_buffer + input_chunk.
1643 self._snapshot = (dec_flags, dec_buffer + input_chunk)
1645 return not eof
1647 def _pack_cookie(self, position, dec_flags=0,
1648 bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1649 # The meaning of a tell() cookie is: seek to position, set the
1650 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1651 # into the decoder with need_eof as the EOF flag, then skip
1652 # chars_to_skip characters of the decoded result. For most simple
1653 # decoders, tell() will often just give a byte offset in the file.
1654 return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1655 (chars_to_skip<<192) | bool(need_eof)<<256)
1657 def _unpack_cookie(self, bigint):
1658 rest, position = divmod(bigint, 1<<64)
1659 rest, dec_flags = divmod(rest, 1<<64)
1660 rest, bytes_to_feed = divmod(rest, 1<<64)
1661 need_eof, chars_to_skip = divmod(rest, 1<<64)
1662 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1664 def tell(self):
1665 if not self._seekable:
1666 raise IOError("underlying stream is not seekable")
1667 if not self._telling:
1668 raise IOError("telling position disabled by next() call")
1669 self.flush()
1670 position = self.buffer.tell()
1671 decoder = self._decoder
1672 if decoder is None or self._snapshot is None:
1673 if self._decoded_chars:
1674 # This should never happen.
1675 raise AssertionError("pending decoded text")
1676 return position
1678 # Skip backward to the snapshot point (see _read_chunk).
1679 dec_flags, next_input = self._snapshot
1680 position -= len(next_input)
1682 # How many decoded characters have been used up since the snapshot?
1683 chars_to_skip = self._decoded_chars_used
1684 if chars_to_skip == 0:
1685 # We haven't moved from the snapshot point.
1686 return self._pack_cookie(position, dec_flags)
1688 # Starting from the snapshot position, we will walk the decoder
1689 # forward until it gives us enough decoded characters.
1690 saved_state = decoder.getstate()
1691 try:
1692 # Note our initial start point.
1693 decoder.setstate((b'', dec_flags))
1694 start_pos = position
1695 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1696 need_eof = 0
1698 # Feed the decoder one byte at a time. As we go, note the
1699 # nearest "safe start point" before the current location
1700 # (a point where the decoder has nothing buffered, so seek()
1701 # can safely start from there and advance to this location).
1702 for next_byte in next_input:
1703 bytes_fed += 1
1704 chars_decoded += len(decoder.decode(next_byte))
1705 dec_buffer, dec_flags = decoder.getstate()
1706 if not dec_buffer and chars_decoded <= chars_to_skip:
1707 # Decoder buffer is empty, so this is a safe start point.
1708 start_pos += bytes_fed
1709 chars_to_skip -= chars_decoded
1710 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1711 if chars_decoded >= chars_to_skip:
1712 break
1713 else:
1714 # We didn't get enough decoded data; signal EOF to get more.
1715 chars_decoded += len(decoder.decode(b'', final=True))
1716 need_eof = 1
1717 if chars_decoded < chars_to_skip:
1718 raise IOError("can't reconstruct logical file position")
1720 # The returned cookie corresponds to the last safe start point.
1721 return self._pack_cookie(
1722 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1723 finally:
1724 decoder.setstate(saved_state)
1726 def truncate(self, pos=None):
1727 self.flush()
1728 if pos is None:
1729 pos = self.tell()
1730 self.seek(pos)
1731 return self.buffer.truncate()
1733 def detach(self):
1734 if self.buffer is None:
1735 raise ValueError("buffer is already detached")
1736 self.flush()
1737 buffer = self.buffer
1738 self.buffer = None
1739 return buffer
1741 def seek(self, cookie, whence=0):
1742 if self.closed:
1743 raise ValueError("tell on closed file")
1744 if not self._seekable:
1745 raise IOError("underlying stream is not seekable")
1746 if whence == 1: # seek relative to current position
1747 if cookie != 0:
1748 raise IOError("can't do nonzero cur-relative seeks")
1749 # Seeking to the current position should attempt to
1750 # sync the underlying buffer with the current position.
1751 whence = 0
1752 cookie = self.tell()
1753 if whence == 2: # seek relative to end of file
1754 if cookie != 0:
1755 raise IOError("can't do nonzero end-relative seeks")
1756 self.flush()
1757 position = self.buffer.seek(0, 2)
1758 self._set_decoded_chars('')
1759 self._snapshot = None
1760 if self._decoder:
1761 self._decoder.reset()
1762 return position
1763 if whence != 0:
1764 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1765 (whence,))
1766 if cookie < 0:
1767 raise ValueError("negative seek position %r" % (cookie,))
1768 self.flush()
1770 # The strategy of seek() is to go back to the safe start point
1771 # and replay the effect of read(chars_to_skip) from there.
1772 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1773 self._unpack_cookie(cookie)
1775 # Seek back to the safe start point.
1776 self.buffer.seek(start_pos)
1777 self._set_decoded_chars('')
1778 self._snapshot = None
1780 # Restore the decoder to its state from the safe start point.
1781 if cookie == 0 and self._decoder:
1782 self._decoder.reset()
1783 elif self._decoder or dec_flags or chars_to_skip:
1784 self._decoder = self._decoder or self._get_decoder()
1785 self._decoder.setstate((b'', dec_flags))
1786 self._snapshot = (dec_flags, b'')
1788 if chars_to_skip:
1789 # Just like _read_chunk, feed the decoder and save a snapshot.
1790 input_chunk = self.buffer.read(bytes_to_feed)
1791 self._set_decoded_chars(
1792 self._decoder.decode(input_chunk, need_eof))
1793 self._snapshot = (dec_flags, input_chunk)
1795 # Skip chars_to_skip of the decoded characters.
1796 if len(self._decoded_chars) < chars_to_skip:
1797 raise IOError("can't restore logical file position")
1798 self._decoded_chars_used = chars_to_skip
1800 # Finally, reset the encoder (merely useful for proper BOM handling)
1801 try:
1802 encoder = self._encoder or self._get_encoder()
1803 except LookupError:
1804 # Sometimes the encoder doesn't exist
1805 pass
1806 else:
1807 if cookie != 0:
1808 encoder.setstate(0)
1809 else:
1810 encoder.reset()
1811 return cookie
1813 def read(self, n=None):
1814 self._checkReadable()
1815 if n is None:
1816 n = -1
1817 decoder = self._decoder or self._get_decoder()
1818 if n < 0:
1819 # Read everything.
1820 result = (self._get_decoded_chars() +
1821 decoder.decode(self.buffer.read(), final=True))
1822 self._set_decoded_chars('')
1823 self._snapshot = None
1824 return result
1825 else:
1826 # Keep reading chunks until we have n characters to return.
1827 eof = False
1828 result = self._get_decoded_chars(n)
1829 while len(result) < n and not eof:
1830 eof = not self._read_chunk()
1831 result += self._get_decoded_chars(n - len(result))
1832 return result
1834 def next(self):
1835 self._telling = False
1836 line = self.readline()
1837 if not line:
1838 self._snapshot = None
1839 self._telling = self._seekable
1840 raise StopIteration
1841 return line
1843 def readline(self, limit=None):
1844 if self.closed:
1845 raise ValueError("read from closed file")
1846 if limit is None:
1847 limit = -1
1848 elif not isinstance(limit, (int, long)):
1849 raise TypeError("limit must be an integer")
1851 # Grab all the decoded text (we will rewind any extra bits later).
1852 line = self._get_decoded_chars()
1854 start = 0
1855 # Make the decoder if it doesn't already exist.
1856 if not self._decoder:
1857 self._get_decoder()
1859 pos = endpos = None
1860 while True:
1861 if self._readtranslate:
1862 # Newlines are already translated, only search for \n
1863 pos = line.find('\n', start)
1864 if pos >= 0:
1865 endpos = pos + 1
1866 break
1867 else:
1868 start = len(line)
1870 elif self._readuniversal:
1871 # Universal newline search. Find any of \r, \r\n, \n
1872 # The decoder ensures that \r\n are not split in two pieces
1874 # In C we'd look for these in parallel of course.
1875 nlpos = line.find("\n", start)
1876 crpos = line.find("\r", start)
1877 if crpos == -1:
1878 if nlpos == -1:
1879 # Nothing found
1880 start = len(line)
1881 else:
1882 # Found \n
1883 endpos = nlpos + 1
1884 break
1885 elif nlpos == -1:
1886 # Found lone \r
1887 endpos = crpos + 1
1888 break
1889 elif nlpos < crpos:
1890 # Found \n
1891 endpos = nlpos + 1
1892 break
1893 elif nlpos == crpos + 1:
1894 # Found \r\n
1895 endpos = crpos + 2
1896 break
1897 else:
1898 # Found \r
1899 endpos = crpos + 1
1900 break
1901 else:
1902 # non-universal
1903 pos = line.find(self._readnl)
1904 if pos >= 0:
1905 endpos = pos + len(self._readnl)
1906 break
1908 if limit >= 0 and len(line) >= limit:
1909 endpos = limit # reached length limit
1910 break
1912 # No line ending seen yet - get more data'
1913 while self._read_chunk():
1914 if self._decoded_chars:
1915 break
1916 if self._decoded_chars:
1917 line += self._get_decoded_chars()
1918 else:
1919 # end of file
1920 self._set_decoded_chars('')
1921 self._snapshot = None
1922 return line
1924 if limit >= 0 and endpos > limit:
1925 endpos = limit # don't exceed limit
1927 # Rewind _decoded_chars to just after the line ending we found.
1928 self._rewind_decoded_chars(len(line) - endpos)
1929 return line[:endpos]
1931 @property
1932 def newlines(self):
1933 return self._decoder.newlines if self._decoder else None
1936 class StringIO(TextIOWrapper):
1937 """Text I/O implementation using an in-memory buffer.
1939 The initial_value argument sets the value of object. The newline
1940 argument is like the one of TextIOWrapper's constructor.
1943 def __init__(self, initial_value="", newline="\n"):
1944 super(StringIO, self).__init__(BytesIO(),
1945 encoding="utf-8",
1946 errors="strict",
1947 newline=newline)
1948 # Issue #5645: make universal newlines semantics the same as in the
1949 # C version, even under Windows.
1950 if newline is None:
1951 self._writetranslate = False
1952 if initial_value:
1953 if not isinstance(initial_value, unicode):
1954 initial_value = unicode(initial_value)
1955 self.write(initial_value)
1956 self.seek(0)
1958 def getvalue(self):
1959 self.flush()
1960 return self.buffer.getvalue().decode(self._encoding, self._errors)
1962 def __repr__(self):
1963 # TextIOWrapper tells the encoding in its repr. In StringIO,
1964 # that's a implementation detail.
1965 return object.__repr__(self)
1967 @property
1968 def errors(self):
1969 return None
1971 @property
1972 def encoding(self):
1973 return None
1975 def detach(self):
1976 # This doesn't make sense on StringIO.
1977 self._unsupported("detach")