Lib/_pyio.py

   1 """
   2 Python implementation of the io module.
   3 """
   4
   5 from __future__ import print_function
   6 from __future__ import unicode_literals
   7
   8 import os
   9 import abc
  10 import codecs
  11 import warnings
  12 # Import _thread instead of threading to reduce startup cost
  13 try:
  14     from thread import allocate_lock as Lock
  15 except ImportError:
  16     from dummy_thread import allocate_lock as Lock
  17
  18 import io
  19 from io import __all__
  20 from io import SEEK_SET, SEEK_CUR, SEEK_END
  21
  22 __metaclass__ = type
  23
  24 # open() uses st_blksize whenever we can
  25 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  26
  27 # NOTE: Base classes defined here are registered with the "official" ABCs
  28 # defined in io.py. We don't use real inheritance though, because we don't
  29 # want to inherit the C implementations.
  30
  31
  32 class BlockingIOError(IOError):
  33
  34     """Exception raised when I/O would block on a non-blocking I/O stream."""
  35
  36     def __init__(self, errno, strerror, characters_written=0):
  37         super(IOError, self).__init__(errno, strerror)
  38         if not isinstance(characters_written, (int, long)):
  39             raise TypeError("characters_written must be a integer")
  40         self.characters_written = characters_written
  41
  42
  43 def open(file, mode="r", buffering=None,
  44          encoding=None, errors=None,
  45          newline=None, closefd=True):
  46
  47     r"""Open file and return a stream.  Raise IOError upon failure.
  48
  49     file is either a text or byte string giving the name (and the path
  50     if the file isn't in the current working directory) of the file to
  51     be opened or an integer file descriptor of the file to be
  52     wrapped. (If a file descriptor is given, it is closed when the
  53     returned I/O object is closed, unless closefd is set to False.)
  54
  55     mode is an optional string that specifies the mode in which the file
  56     is opened. It defaults to 'r' which means open for reading in text
  57     mode.  Other common values are 'w' for writing (truncating the file if
  58     it already exists), and 'a' for appending (which on some Unix systems,
  59     means that all writes append to the end of the file regardless of the
  60     current seek position). In text mode, if encoding is not specified the
  61     encoding used is platform dependent. (For reading and writing raw
  62     bytes use binary mode and leave encoding unspecified.) The available
  63     modes are:
  64
  65     ========= ===============================================================
  66     Character Meaning
  67     --------- ---------------------------------------------------------------
  68     'r'       open for reading (default)
  69     'w'       open for writing, truncating the file first
  70     'a'       open for writing, appending to the end of the file if it exists
  71     'b'       binary mode
  72     't'       text mode (default)
  73     '+'       open a disk file for updating (reading and writing)
  74     'U'       universal newline mode (for backwards compatibility; unneeded
  75               for new code)
  76     ========= ===============================================================
  77
  78     The default mode is 'rt' (open for reading text). For binary random
  79     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
  80     'r+b' opens the file without truncation.
  81
  82     Python distinguishes between files opened in binary and text modes,
  83     even when the underlying operating system doesn't. Files opened in
  84     binary mode (appending 'b' to the mode argument) return contents as
  85     bytes objects without any decoding. In text mode (the default, or when
  86     't' is appended to the mode argument), the contents of the file are
  87     returned as strings, the bytes having been first decoded using a
  88     platform-dependent encoding or using the specified encoding if given.
  89
  90     buffering is an optional integer used to set the buffering policy. By
  91     default full buffering is on. Pass 0 to switch buffering off (only
  92     allowed in binary mode), 1 to set line buffering, and an integer > 1
  93     for full buffering.
  94
  95     encoding is the name of the encoding used to decode or encode the
  96     file. This should only be used in text mode. The default encoding is
  97     platform dependent, but any encoding supported by Python can be
  98     passed.  See the codecs module for the list of supported encodings.
  99
 100     errors is an optional string that specifies how encoding errors are to
 101     be handled---this argument should not be used in binary mode. Pass
 102     'strict' to raise a ValueError exception if there is an encoding error
 103     (the default of None has the same effect), or pass 'ignore' to ignore
 104     errors. (Note that ignoring encoding errors can lead to data loss.)
 105     See the documentation for codecs.register for a list of the permitted
 106     encoding error strings.
 107
 108     newline controls how universal newlines works (it only applies to text
 109     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 110     follows:
 111
 112     * On input, if newline is None, universal newlines mode is
 113       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 114       these are translated into '\n' before being returned to the
 115       caller. If it is '', universal newline mode is enabled, but line
 116       endings are returned to the caller untranslated. If it has any of
 117       the other legal values, input lines are only terminated by the given
 118       string, and the line ending is returned to the caller untranslated.
 119
 120     * On output, if newline is None, any '\n' characters written are
 121       translated to the system default line separator, os.linesep. If
 122       newline is '', no translation takes place. If newline is any of the
 123       other legal values, any '\n' characters written are translated to
 124       the given string.
 125
 126     If closefd is False, the underlying file descriptor will be kept open
 127     when the file is closed. This does not work when a file name is given
 128     and must be True in that case.
 129
 130     open() returns a file object whose type depends on the mode, and
 131     through which the standard file operations such as reading and writing
 132     are performed. When open() is used to open a file in a text mode ('w',
 133     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 134     a file in a binary mode, the returned class varies: in read binary
 135     mode, it returns a BufferedReader; in write binary and append binary
 136     modes, it returns a BufferedWriter, and in read/write mode, it returns
 137     a BufferedRandom.
 138
 139     It is also possible to use a string or bytearray as a file for both
 140     reading and writing. For strings StringIO can be used like a file
 141     opened in a text mode, and for bytes a BytesIO can be used like a file
 142     opened in a binary mode.
 143     """
 144     if not isinstance(file, (basestring, int, long)):
 145         raise TypeError("invalid file: %r" % file)
 146     if not isinstance(mode, basestring):
 147         raise TypeError("invalid mode: %r" % mode)
 148     if buffering is not None and not isinstance(buffering, (int, long)):
 149         raise TypeError("invalid buffering: %r" % buffering)
 150     if encoding is not None and not isinstance(encoding, basestring):
 151         raise TypeError("invalid encoding: %r" % encoding)
 152     if errors is not None and not isinstance(errors, basestring):
 153         raise TypeError("invalid errors: %r" % errors)
 154     modes = set(mode)
 155     if modes - set("arwb+tU") or len(mode) > len(modes):
 156         raise ValueError("invalid mode: %r" % mode)
 157     reading = "r" in modes
 158     writing = "w" in modes
 159     appending = "a" in modes
 160     updating = "+" in modes
 161     text = "t" in modes
 162     binary = "b" in modes
 163     if "U" in modes:
 164         if writing or appending:
 165             raise ValueError("can't use U and writing mode at once")
 166         reading = True
 167     if text and binary:
 168         raise ValueError("can't have text and binary mode at once")
 169     if reading + writing + appending > 1:
 170         raise ValueError("can't have read/write/append mode at once")
 171     if not (reading or writing or appending):
 172         raise ValueError("must have exactly one of read/write/append mode")
 173     if binary and encoding is not None:
 174         raise ValueError("binary mode doesn't take an encoding argument")
 175     if binary and errors is not None:
 176         raise ValueError("binary mode doesn't take an errors argument")
 177     if binary and newline is not None:
 178         raise ValueError("binary mode doesn't take a newline argument")
 179     raw = FileIO(file,
 180                  (reading and "r" or "") +
 181                  (writing and "w" or "") +
 182                  (appending and "a" or "") +
 183                  (updating and "+" or ""),
 184                  closefd)
 185     if buffering is None:
 186         buffering = -1
 187     line_buffering = False
 188     if buffering == 1 or buffering < 0 and raw.isatty():
 189         buffering = -1
 190         line_buffering = True
 191     if buffering < 0:
 192         buffering = DEFAULT_BUFFER_SIZE
 193         try:
 194             bs = os.fstat(raw.fileno()).st_blksize
 195         except (os.error, AttributeError):
 196             pass
 197         else:
 198             if bs > 1:
 199                 buffering = bs
 200     if buffering < 0:
 201         raise ValueError("invalid buffering size")
 202     if buffering == 0:
 203         if binary:
 204             return raw
 205         raise ValueError("can't have unbuffered text I/O")
 206     if updating:
 207         buffer = BufferedRandom(raw, buffering)
 208     elif writing or appending:
 209         buffer = BufferedWriter(raw, buffering)
 210     elif reading:
 211         buffer = BufferedReader(raw, buffering)
 212     else:
 213         raise ValueError("unknown mode: %r" % mode)
 214     if binary:
 215         return buffer
 216     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 217     text.mode = mode
 218     return text
 219
 220
 221 class DocDescriptor:
 222     """Helper for builtins.open.__doc__
 223     """
 224     def __get__(self, obj, typ):
 225         return (
 226             "open(file, mode='r', buffering=None, encoding=None, "
 227                  "errors=None, newline=None, closefd=True)\n\n" +
 228             open.__doc__)
 229
 230 class OpenWrapper:
 231     """Wrapper for builtins.open
 232
 233     Trick so that open won't become a bound method when stored
 234     as a class variable (as dbm.dumb does).
 235
 236     See initstdio() in Python/pythonrun.c.
 237     """
 238     __doc__ = DocDescriptor()
 239
 240     def __new__(cls, *args, **kwargs):
 241         return open(*args, **kwargs)
 242
 243
 244 class UnsupportedOperation(ValueError, IOError):
 245     pass
 246
 247
 248 class IOBase:
 249     __metaclass__ = abc.ABCMeta
 250
 251     """The abstract base class for all I/O classes, acting on streams of
 252     bytes. There is no public constructor.
 253
 254     This class provides dummy implementations for many methods that
 255     derived classes can override selectively; the default implementations
 256     represent a file that cannot be read, written or seeked.
 257
 258     Even though IOBase does not declare read, readinto, or write because
 259     their signatures will vary, implementations and clients should
 260     consider those methods part of the interface. Also, implementations
 261     may raise a IOError when operations they do not support are called.
 262
 263     The basic type used for binary data read from or written to a file is
 264     bytes. bytearrays are accepted too, and in some cases (such as
 265     readinto) needed. Text I/O classes work with str data.
 266
 267     Note that calling any method (even inquiries) on a closed stream is
 268     undefined. Implementations may raise IOError in this case.
 269
 270     IOBase (and its subclasses) support the iterator protocol, meaning
 271     that an IOBase object can be iterated over yielding the lines in a
 272     stream.
 273
 274     IOBase also supports the :keyword:`with` statement. In this example,
 275     fp is closed after the suite of the with statement is complete:
 276
 277     with open('spam.txt', 'r') as fp:
 278         fp.write('Spam and eggs!')
 279     """
 280
 281     ### Internal ###
 282
 283     def _unsupported(self, name):
 284         """Internal: raise an exception for unsupported operations."""
 285         raise UnsupportedOperation("%s.%s() not supported" %
 286                                    (self.__class__.__name__, name))
 287
 288     ### Positioning ###
 289
 290     def seek(self, pos, whence=0):
 291         """Change stream position.
 292
 293         Change the stream position to byte offset offset. offset is
 294         interpreted relative to the position indicated by whence.  Values
 295         for whence are:
 296
 297         * 0 -- start of stream (the default); offset should be zero or positive
 298         * 1 -- current stream position; offset may be negative
 299         * 2 -- end of stream; offset is usually negative
 300
 301         Return the new absolute position.
 302         """
 303         self._unsupported("seek")
 304
 305     def tell(self):
 306         """Return current stream position."""
 307         return self.seek(0, 1)
 308
 309     def truncate(self, pos=None):
 310         """Truncate file to size bytes.
 311
 312         Size defaults to the current IO position as reported by tell().  Return
 313         the new size.
 314         """
 315         self._unsupported("truncate")
 316
 317     ### Flush and close ###
 318
 319     def flush(self):
 320         """Flush write buffers, if applicable.
 321
 322         This is not implemented for read-only and non-blocking streams.
 323         """
 324         # XXX Should this return the number of bytes written???
 325
 326     __closed = False
 327
 328     def close(self):
 329         """Flush and close the IO object.
 330
 331         This method has no effect if the file is already closed.
 332         """
 333         if not self.__closed:
 334             try:
 335                 self.flush()
 336             except IOError:
 337                 pass  # If flush() fails, just give up
 338             self.__closed = True
 339
 340     def __del__(self):
 341         """Destructor.  Calls close()."""
 342         # The try/except block is in case this is called at program
 343         # exit time, when it's possible that globals have already been
 344         # deleted, and then the close() call might fail.  Since
 345         # there's nothing we can do about such failures and they annoy
 346         # the end users, we suppress the traceback.
 347         try:
 348             self.close()
 349         except:
 350             pass
 351
 352     ### Inquiries ###
 353
 354     def seekable(self):
 355         """Return whether object supports random access.
 356
 357         If False, seek(), tell() and truncate() will raise IOError.
 358         This method may need to do a test seek().
 359         """
 360         return False
 361
 362     def _checkSeekable(self, msg=None):
 363         """Internal: raise an IOError if file is not seekable
 364         """
 365         if not self.seekable():
 366             raise IOError("File or stream is not seekable."
 367                           if msg is None else msg)
 368
 369
 370     def readable(self):
 371         """Return whether object was opened for reading.
 372
 373         If False, read() will raise IOError.
 374         """
 375         return False
 376
 377     def _checkReadable(self, msg=None):
 378         """Internal: raise an IOError if file is not readable
 379         """
 380         if not self.readable():
 381             raise IOError("File or stream is not readable."
 382                           if msg is None else msg)
 383
 384     def writable(self):
 385         """Return whether object was opened for writing.
 386
 387         If False, write() and truncate() will raise IOError.
 388         """
 389         return False
 390
 391     def _checkWritable(self, msg=None):
 392         """Internal: raise an IOError if file is not writable
 393         """
 394         if not self.writable():
 395             raise IOError("File or stream is not writable."
 396                           if msg is None else msg)
 397
 398     @property
 399     def closed(self):
 400         """closed: bool.  True iff the file has been closed.
 401
 402         For backwards compatibility, this is a property, not a predicate.
 403         """
 404         return self.__closed
 405
 406     def _checkClosed(self, msg=None):
 407         """Internal: raise an ValueError if file is closed
 408         """
 409         if self.closed:
 410             raise ValueError("I/O operation on closed file."
 411                              if msg is None else msg)
 412
 413     ### Context manager ###
 414
 415     def __enter__(self):
 416         """Context management protocol.  Returns self."""
 417         self._checkClosed()
 418         return self
 419
 420     def __exit__(self, *args):
 421         """Context management protocol.  Calls close()"""
 422         self.close()
 423
 424     ### Lower-level APIs ###
 425
 426     # XXX Should these be present even if unimplemented?
 427
 428     def fileno(self):
 429         """Returns underlying file descriptor if one exists.
 430
 431         An IOError is raised if the IO object does not use a file descriptor.
 432         """
 433         self._unsupported("fileno")
 434
 435     def isatty(self):
 436         """Return whether this is an 'interactive' stream.
 437
 438         Return False if it can't be determined.
 439         """
 440         self._checkClosed()
 441         return False
 442
 443     ### Readline[s] and writelines ###
 444
 445     def readline(self, limit=-1):
 446         r"""Read and return a line from the stream.
 447
 448         If limit is specified, at most limit bytes will be read.
 449
 450         The line terminator is always b'\n' for binary files; for text
 451         files, the newlines argument to open can be used to select the line
 452         terminator(s) recognized.
 453         """
 454         # For backwards compatibility, a (slowish) readline().
 455         if hasattr(self, "peek"):
 456             def nreadahead():
 457                 readahead = self.peek(1)
 458                 if not readahead:
 459                     return 1
 460                 n = (readahead.find(b"\n") + 1) or len(readahead)
 461                 if limit >= 0:
 462                     n = min(n, limit)
 463                 return n
 464         else:
 465             def nreadahead():
 466                 return 1
 467         if limit is None:
 468             limit = -1
 469         elif not isinstance(limit, (int, long)):
 470             raise TypeError("limit must be an integer")
 471         res = bytearray()
 472         while limit < 0 or len(res) < limit:
 473             b = self.read(nreadahead())
 474             if not b:
 475                 break
 476             res += b
 477             if res.endswith(b"\n"):
 478                 break
 479         return bytes(res)
 480
 481     def __iter__(self):
 482         self._checkClosed()
 483         return self
 484
 485     def next(self):
 486         line = self.readline()
 487         if not line:
 488             raise StopIteration
 489         return line
 490
 491     def readlines(self, hint=None):
 492         """Return a list of lines from the stream.
 493
 494         hint can be specified to control the number of lines read: no more
 495         lines will be read if the total size (in bytes/characters) of all
 496         lines so far exceeds hint.
 497         """
 498         if hint is not None and not isinstance(hint, (int, long)):
 499             raise TypeError("integer or None expected")
 500         if hint is None or hint <= 0:
 501             return list(self)
 502         n = 0
 503         lines = []
 504         for line in self:
 505             lines.append(line)
 506             n += len(line)
 507             if n >= hint:
 508                 break
 509         return lines
 510
 511     def writelines(self, lines):
 512         self._checkClosed()
 513         for line in lines:
 514             self.write(line)
 515
 516 io.IOBase.register(IOBase)
 517
 518
 519 class RawIOBase(IOBase):
 520
 521     """Base class for raw binary I/O."""
 522
 523     # The read() method is implemented by calling readinto(); derived
 524     # classes that want to support read() only need to implement
 525     # readinto() as a primitive operation.  In general, readinto() can be
 526     # more efficient than read().
 527
 528     # (It would be tempting to also provide an implementation of
 529     # readinto() in terms of read(), in case the latter is a more suitable
 530     # primitive operation, but that would lead to nasty recursion in case
 531     # a subclass doesn't implement either.)
 532
 533     def read(self, n=-1):
 534         """Read and return up to n bytes.
 535
 536         Returns an empty bytes object on EOF, or None if the object is
 537         set not to block and has no data to read.
 538         """
 539         if n is None:
 540             n = -1
 541         if n < 0:
 542             return self.readall()
 543         b = bytearray(n.__index__())
 544         n = self.readinto(b)
 545         del b[n:]
 546         return bytes(b)
 547
 548     def readall(self):
 549         """Read until EOF, using multiple read() call."""
 550         res = bytearray()
 551         while True:
 552             data = self.read(DEFAULT_BUFFER_SIZE)
 553             if not data:
 554                 break
 555             res += data
 556         return bytes(res)
 557
 558     def readinto(self, b):
 559         """Read up to len(b) bytes into b.
 560
 561         Returns number of bytes read (0 for EOF), or None if the object
 562         is set not to block as has no data to read.
 563         """
 564         self._unsupported("readinto")
 565
 566     def write(self, b):
 567         """Write the given buffer to the IO stream.
 568
 569         Returns the number of bytes written, which may be less than len(b).
 570         """
 571         self._unsupported("write")
 572
 573 io.RawIOBase.register(RawIOBase)
 574 from _io import FileIO
 575 RawIOBase.register(FileIO)
 576
 577
 578 class BufferedIOBase(IOBase):
 579
 580     """Base class for buffered IO objects.
 581
 582     The main difference with RawIOBase is that the read() method
 583     supports omitting the size argument, and does not have a default
 584     implementation that defers to readinto().
 585
 586     In addition, read(), readinto() and write() may raise
 587     BlockingIOError if the underlying raw stream is in non-blocking
 588     mode and not ready; unlike their raw counterparts, they will never
 589     return None.
 590
 591     A typical implementation should not inherit from a RawIOBase
 592     implementation, but wrap one.
 593     """
 594
 595     def read(self, n=None):
 596         """Read and return up to n bytes.
 597
 598         If the argument is omitted, None, or negative, reads and
 599         returns all data until EOF.
 600
 601         If the argument is positive, and the underlying raw stream is
 602         not 'interactive', multiple raw reads may be issued to satisfy
 603         the byte count (unless EOF is reached first).  But for
 604         interactive raw streams (XXX and for pipes?), at most one raw
 605         read will be issued, and a short result does not imply that
 606         EOF is imminent.
 607
 608         Returns an empty bytes array on EOF.
 609
 610         Raises BlockingIOError if the underlying raw stream has no
 611         data at the moment.
 612         """
 613         self._unsupported("read")
 614
 615     def read1(self, n=None):
 616         """Read up to n bytes with at most one read() system call."""
 617         self._unsupported("read1")
 618
 619     def readinto(self, b):
 620         """Read up to len(b) bytes into b.
 621
 622         Like read(), this may issue multiple reads to the underlying raw
 623         stream, unless the latter is 'interactive'.
 624
 625         Returns the number of bytes read (0 for EOF).
 626
 627         Raises BlockingIOError if the underlying raw stream has no
 628         data at the moment.
 629         """
 630         # XXX This ought to work with anything that supports the buffer API
 631         data = self.read(len(b))
 632         n = len(data)
 633         try:
 634             b[:n] = data
 635         except TypeError as err:
 636             import array
 637             if not isinstance(b, array.array):
 638                 raise err
 639             b[:n] = array.array(b'b', data)
 640         return n
 641
 642     def write(self, b):
 643         """Write the given buffer to the IO stream.
 644
 645         Return the number of bytes written, which is never less than
 646         len(b).
 647
 648         Raises BlockingIOError if the buffer is full and the
 649         underlying raw stream cannot accept more data at the moment.
 650         """
 651         self._unsupported("write")
 652
 653     def detach(self):
 654         """
 655         Separate the underlying raw stream from the buffer and return it.
 656
 657         After the raw stream has been detached, the buffer is in an unusable
 658         state.
 659         """
 660         self._unsupported("detach")
 661
 662 io.BufferedIOBase.register(BufferedIOBase)
 663
 664
 665 class _BufferedIOMixin(BufferedIOBase):
 666
 667     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 668
 669     This passes most requests on to the underlying raw stream.  It
 670     does *not* provide implementations of read(), readinto() or
 671     write().
 672     """
 673
 674     def __init__(self, raw):
 675         self.raw = raw
 676
 677     ### Positioning ###
 678
 679     def seek(self, pos, whence=0):
 680         new_position = self.raw.seek(pos, whence)
 681         if new_position < 0:
 682             raise IOError("seek() returned an invalid position")
 683         return new_position
 684
 685     def tell(self):
 686         pos = self.raw.tell()
 687         if pos < 0:
 688             raise IOError("tell() returned an invalid position")
 689         return pos
 690
 691     def truncate(self, pos=None):
 692         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 693         # and a flush may be necessary to synch both views of the current
 694         # file state.
 695         self.flush()
 696
 697         if pos is None:
 698             pos = self.tell()
 699         # XXX: Should seek() be used, instead of passing the position
 700         # XXX  directly to truncate?
 701         return self.raw.truncate(pos)
 702
 703     ### Flush and close ###
 704
 705     def flush(self):
 706         self.raw.flush()
 707
 708     def close(self):
 709         if not self.closed and self.raw is not None:
 710             try:
 711                 self.flush()
 712             except IOError:
 713                 pass  # If flush() fails, just give up
 714             self.raw.close()
 715
 716     def detach(self):
 717         if self.raw is None:
 718             raise ValueError("raw stream already detached")
 719         self.flush()
 720         raw = self.raw
 721         self.raw = None
 722         return raw
 723
 724     ### Inquiries ###
 725
 726     def seekable(self):
 727         return self.raw.seekable()
 728
 729     def readable(self):
 730         return self.raw.readable()
 731
 732     def writable(self):
 733         return self.raw.writable()
 734
 735     @property
 736     def closed(self):
 737         return self.raw.closed
 738
 739     @property
 740     def name(self):
 741         return self.raw.name
 742
 743     @property
 744     def mode(self):
 745         return self.raw.mode
 746
 747     def __repr__(self):
 748         clsname = self.__class__.__name__
 749         try:
 750             name = self.name
 751         except AttributeError:
 752             return "<_pyio.{0}>".format(clsname)
 753         else:
 754             return "<_pyio.{0} name={1!r}>".format(clsname, name)
 755
 756     ### Lower-level APIs ###
 757
 758     def fileno(self):
 759         return self.raw.fileno()
 760
 761     def isatty(self):
 762         return self.raw.isatty()
 763
 764
 765 class BytesIO(BufferedIOBase):
 766
 767     """Buffered I/O implementation using an in-memory bytes buffer."""
 768
 769     def __init__(self, initial_bytes=None):
 770         buf = bytearray()
 771         if initial_bytes is not None:
 772             buf.extend(initial_bytes)
 773         self._buffer = buf
 774         self._pos = 0
 775
 776     def getvalue(self):
 777         """Return the bytes value (contents) of the buffer
 778         """
 779         if self.closed:
 780             raise ValueError("getvalue on closed file")
 781         return bytes(self._buffer)
 782
 783     def read(self, n=None):
 784         if self.closed:
 785             raise ValueError("read from closed file")
 786         if n is None:
 787             n = -1
 788         if not isinstance(n, (int, long)):
 789             raise TypeError("integer argument expected, got {0!r}".format(
 790                 type(n)))
 791         if n < 0:
 792             n = len(self._buffer)
 793         if len(self._buffer) <= self._pos:
 794             return b""
 795         newpos = min(len(self._buffer), self._pos + n)
 796         b = self._buffer[self._pos : newpos]
 797         self._pos = newpos
 798         return bytes(b)
 799
 800     def read1(self, n):
 801         """This is the same as read.
 802         """
 803         return self.read(n)
 804
 805     def write(self, b):
 806         if self.closed:
 807             raise ValueError("write to closed file")
 808         if isinstance(b, unicode):
 809             raise TypeError("can't write unicode to binary stream")
 810         n = len(b)
 811         if n == 0:
 812             return 0
 813         pos = self._pos
 814         if pos > len(self._buffer):
 815             # Inserts null bytes between the current end of the file
 816             # and the new write position.
 817             padding = b'\x00' * (pos - len(self._buffer))
 818             self._buffer += padding
 819         self._buffer[pos:pos + n] = b
 820         self._pos += n
 821         return n
 822
 823     def seek(self, pos, whence=0):
 824         if self.closed:
 825             raise ValueError("seek on closed file")
 826         try:
 827             pos = pos.__index__()
 828         except AttributeError as err:
 829             raise TypeError("an integer is required")
 830         if whence == 0:
 831             if pos < 0:
 832                 raise ValueError("negative seek position %r" % (pos,))
 833             self._pos = pos
 834         elif whence == 1:
 835             self._pos = max(0, self._pos + pos)
 836         elif whence == 2:
 837             self._pos = max(0, len(self._buffer) + pos)
 838         else:
 839             raise ValueError("invalid whence value")
 840         return self._pos
 841
 842     def tell(self):
 843         if self.closed:
 844             raise ValueError("tell on closed file")
 845         return self._pos
 846
 847     def truncate(self, pos=None):
 848         if self.closed:
 849             raise ValueError("truncate on closed file")
 850         if pos is None:
 851             pos = self._pos
 852         elif pos < 0:
 853             raise ValueError("negative truncate position %r" % (pos,))
 854         del self._buffer[pos:]
 855         return self.seek(pos)
 856
 857     def readable(self):
 858         return True
 859
 860     def writable(self):
 861         return True
 862
 863     def seekable(self):
 864         return True
 865
 866
 867 class BufferedReader(_BufferedIOMixin):
 868
 869     """BufferedReader(raw[, buffer_size])
 870
 871     A buffer for a readable, sequential BaseRawIO object.
 872
 873     The constructor creates a BufferedReader for the given readable raw
 874     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 875     is used.
 876     """
 877
 878     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 879         """Create a new buffered reader using the given readable raw IO object.
 880         """
 881         if not raw.readable():
 882             raise IOError('"raw" argument must be readable.')
 883
 884         _BufferedIOMixin.__init__(self, raw)
 885         if buffer_size <= 0:
 886             raise ValueError("invalid buffer size")
 887         self.buffer_size = buffer_size
 888         self._reset_read_buf()
 889         self._read_lock = Lock()
 890
 891     def _reset_read_buf(self):
 892         self._read_buf = b""
 893         self._read_pos = 0
 894
 895     def read(self, n=None):
 896         """Read n bytes.
 897
 898         Returns exactly n bytes of data unless the underlying raw IO
 899         stream reaches EOF or if the call would block in non-blocking
 900         mode. If n is negative, read until EOF or until read() would
 901         block.
 902         """
 903         if n is not None and n < -1:
 904             raise ValueError("invalid number of bytes to read")
 905         with self._read_lock:
 906             return self._read_unlocked(n)
 907
 908     def _read_unlocked(self, n=None):
 909         nodata_val = b""
 910         empty_values = (b"", None)
 911         buf = self._read_buf
 912         pos = self._read_pos
 913
 914         # Special case for when the number of bytes to read is unspecified.
 915         if n is None or n == -1:
 916             self._reset_read_buf()
 917             chunks = [buf[pos:]]  # Strip the consumed bytes.
 918             current_size = 0
 919             while True:
 920                 # Read until EOF or until read() would block.
 921                 chunk = self.raw.read()
 922                 if chunk in empty_values:
 923                     nodata_val = chunk
 924                     break
 925                 current_size += len(chunk)
 926                 chunks.append(chunk)
 927             return b"".join(chunks) or nodata_val
 928
 929         # The number of bytes to read is specified, return at most n bytes.
 930         avail = len(buf) - pos  # Length of the available buffered data.
 931         if n <= avail:
 932             # Fast path: the data to read is fully buffered.
 933             self._read_pos += n
 934             return buf[pos:pos+n]
 935         # Slow path: read from the stream until enough bytes are read,
 936         # or until an EOF occurs or until read() would block.
 937         chunks = [buf[pos:]]
 938         wanted = max(self.buffer_size, n)
 939         while avail < n:
 940             chunk = self.raw.read(wanted)
 941             if chunk in empty_values:
 942                 nodata_val = chunk
 943                 break
 944             avail += len(chunk)
 945             chunks.append(chunk)
 946         # n is more then avail only when an EOF occurred or when
 947         # read() would have blocked.
 948         n = min(n, avail)
 949         out = b"".join(chunks)
 950         self._read_buf = out[n:]  # Save the extra data in the buffer.
 951         self._read_pos = 0
 952         return out[:n] if out else nodata_val
 953
 954     def peek(self, n=0):
 955         """Returns buffered bytes without advancing the position.
 956
 957         The argument indicates a desired minimal number of bytes; we
 958         do at most one raw read to satisfy it.  We never return more
 959         than self.buffer_size.
 960         """
 961         with self._read_lock:
 962             return self._peek_unlocked(n)
 963
 964     def _peek_unlocked(self, n=0):
 965         want = min(n, self.buffer_size)
 966         have = len(self._read_buf) - self._read_pos
 967         if have < want or have <= 0:
 968             to_read = self.buffer_size - have
 969             current = self.raw.read(to_read)
 970             if current:
 971                 self._read_buf = self._read_buf[self._read_pos:] + current
 972                 self._read_pos = 0
 973         return self._read_buf[self._read_pos:]
 974
 975     def read1(self, n):
 976         """Reads up to n bytes, with at most one read() system call."""
 977         # Returns up to n bytes.  If at least one byte is buffered, we
 978         # only return buffered bytes.  Otherwise, we do one raw read.
 979         if n < 0:
 980             raise ValueError("number of bytes to read must be positive")
 981         if n == 0:
 982             return b""
 983         with self._read_lock:
 984             self._peek_unlocked(1)
 985             return self._read_unlocked(
 986                 min(n, len(self._read_buf) - self._read_pos))
 987
 988     def tell(self):
 989         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
 990
 991     def seek(self, pos, whence=0):
 992         if not (0 <= whence <= 2):
 993             raise ValueError("invalid whence value")
 994         with self._read_lock:
 995             if whence == 1:
 996                 pos -= len(self._read_buf) - self._read_pos
 997             pos = _BufferedIOMixin.seek(self, pos, whence)
 998             self._reset_read_buf()
 999             return pos
1000
1001 class BufferedWriter(_BufferedIOMixin):
1002
1003     """A buffer for a writeable sequential RawIO object.
1004
1005     The constructor creates a BufferedWriter for the given writeable raw
1006     stream. If the buffer_size is not given, it defaults to
1007     DEFAULT_BUFFER_SIZE.
1008     """
1009
1010     _warning_stack_offset = 2
1011
1012     def __init__(self, raw,
1013                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1014         if not raw.writable():
1015             raise IOError('"raw" argument must be writable.')
1016
1017         _BufferedIOMixin.__init__(self, raw)
1018         if buffer_size <= 0:
1019             raise ValueError("invalid buffer size")
1020         if max_buffer_size is not None:
1021             warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1022                           self._warning_stack_offset)
1023         self.buffer_size = buffer_size
1024         self._write_buf = bytearray()
1025         self._write_lock = Lock()
1026
1027     def write(self, b):
1028         if self.closed:
1029             raise ValueError("write to closed file")
1030         if isinstance(b, unicode):
1031             raise TypeError("can't write unicode to binary stream")
1032         with self._write_lock:
1033             # XXX we can implement some more tricks to try and avoid
1034             # partial writes
1035             if len(self._write_buf) > self.buffer_size:
1036                 # We're full, so let's pre-flush the buffer
1037                 try:
1038                     self._flush_unlocked()
1039                 except BlockingIOError as e:
1040                     # We can't accept anything else.
1041                     # XXX Why not just let the exception pass through?
1042                     raise BlockingIOError(e.errno, e.strerror, 0)
1043             before = len(self._write_buf)
1044             self._write_buf.extend(b)
1045             written = len(self._write_buf) - before
1046             if len(self._write_buf) > self.buffer_size:
1047                 try:
1048                     self._flush_unlocked()
1049                 except BlockingIOError as e:
1050                     if len(self._write_buf) > self.buffer_size:
1051                         # We've hit the buffer_size. We have to accept a partial
1052                         # write and cut back our buffer.
1053                         overage = len(self._write_buf) - self.buffer_size
1054                         written -= overage
1055                         self._write_buf = self._write_buf[:self.buffer_size]
1056                         raise BlockingIOError(e.errno, e.strerror, written)
1057             return written
1058
1059     def truncate(self, pos=None):
1060         with self._write_lock:
1061             self._flush_unlocked()
1062             if pos is None:
1063                 pos = self.raw.tell()
1064             return self.raw.truncate(pos)
1065
1066     def flush(self):
1067         with self._write_lock:
1068             self._flush_unlocked()
1069
1070     def _flush_unlocked(self):
1071         if self.closed:
1072             raise ValueError("flush of closed file")
1073         written = 0
1074         try:
1075             while self._write_buf:
1076                 n = self.raw.write(self._write_buf)
1077                 if n > len(self._write_buf) or n < 0:
1078                     raise IOError("write() returned incorrect number of bytes")
1079                 del self._write_buf[:n]
1080                 written += n
1081         except BlockingIOError as e:
1082             n = e.characters_written
1083             del self._write_buf[:n]
1084             written += n
1085             raise BlockingIOError(e.errno, e.strerror, written)
1086
1087     def tell(self):
1088         return _BufferedIOMixin.tell(self) + len(self._write_buf)
1089
1090     def seek(self, pos, whence=0):
1091         if not (0 <= whence <= 2):
1092             raise ValueError("invalid whence")
1093         with self._write_lock:
1094             self._flush_unlocked()
1095             return _BufferedIOMixin.seek(self, pos, whence)
1096
1097
1098 class BufferedRWPair(BufferedIOBase):
1099
1100     """A buffered reader and writer object together.
1101
1102     A buffered reader object and buffered writer object put together to
1103     form a sequential IO object that can read and write. This is typically
1104     used with a socket or two-way pipe.
1105
1106     reader and writer are RawIOBase objects that are readable and
1107     writeable respectively. If the buffer_size is omitted it defaults to
1108     DEFAULT_BUFFER_SIZE.
1109     """
1110
1111     # XXX The usefulness of this (compared to having two separate IO
1112     # objects) is questionable.
1113
1114     def __init__(self, reader, writer,
1115                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1116         """Constructor.
1117
1118         The arguments are two RawIO instances.
1119         """
1120         if max_buffer_size is not None:
1121             warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1122
1123         if not reader.readable():
1124             raise IOError('"reader" argument must be readable.')
1125
1126         if not writer.writable():
1127             raise IOError('"writer" argument must be writable.')
1128
1129         self.reader = BufferedReader(reader, buffer_size)
1130         self.writer = BufferedWriter(writer, buffer_size)
1131
1132     def read(self, n=None):
1133         if n is None:
1134             n = -1
1135         return self.reader.read(n)
1136
1137     def readinto(self, b):
1138         return self.reader.readinto(b)
1139
1140     def write(self, b):
1141         return self.writer.write(b)
1142
1143     def peek(self, n=0):
1144         return self.reader.peek(n)
1145
1146     def read1(self, n):
1147         return self.reader.read1(n)
1148
1149     def readable(self):
1150         return self.reader.readable()
1151
1152     def writable(self):
1153         return self.writer.writable()
1154
1155     def flush(self):
1156         return self.writer.flush()
1157
1158     def close(self):
1159         self.writer.close()
1160         self.reader.close()
1161
1162     def isatty(self):
1163         return self.reader.isatty() or self.writer.isatty()
1164
1165     @property
1166     def closed(self):
1167         return self.writer.closed
1168
1169
1170 class BufferedRandom(BufferedWriter, BufferedReader):
1171
1172     """A buffered interface to random access streams.
1173
1174     The constructor creates a reader and writer for a seekable stream,
1175     raw, given in the first argument. If the buffer_size is omitted it
1176     defaults to DEFAULT_BUFFER_SIZE.
1177     """
1178
1179     _warning_stack_offset = 3
1180
1181     def __init__(self, raw,
1182                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1183         raw._checkSeekable()
1184         BufferedReader.__init__(self, raw, buffer_size)
1185         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1186
1187     def seek(self, pos, whence=0):
1188         if not (0 <= whence <= 2):
1189             raise ValueError("invalid whence")
1190         self.flush()
1191         if self._read_buf:
1192             # Undo read ahead.
1193             with self._read_lock:
1194                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1195         # First do the raw seek, then empty the read buffer, so that
1196         # if the raw seek fails, we don't lose buffered data forever.
1197         pos = self.raw.seek(pos, whence)
1198         with self._read_lock:
1199             self._reset_read_buf()
1200         if pos < 0:
1201             raise IOError("seek() returned invalid position")
1202         return pos
1203
1204     def tell(self):
1205         if self._write_buf:
1206             return BufferedWriter.tell(self)
1207         else:
1208             return BufferedReader.tell(self)
1209
1210     def truncate(self, pos=None):
1211         if pos is None:
1212             pos = self.tell()
1213         # Use seek to flush the read buffer.
1214         self.seek(pos)
1215         return BufferedWriter.truncate(self)
1216
1217     def read(self, n=None):
1218         if n is None:
1219             n = -1
1220         self.flush()
1221         return BufferedReader.read(self, n)
1222
1223     def readinto(self, b):
1224         self.flush()
1225         return BufferedReader.readinto(self, b)
1226
1227     def peek(self, n=0):
1228         self.flush()
1229         return BufferedReader.peek(self, n)
1230
1231     def read1(self, n):
1232         self.flush()
1233         return BufferedReader.read1(self, n)
1234
1235     def write(self, b):
1236         if self._read_buf:
1237             # Undo readahead
1238             with self._read_lock:
1239                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1240                 self._reset_read_buf()
1241         return BufferedWriter.write(self, b)
1242
1243
1244 class TextIOBase(IOBase):
1245
1246     """Base class for text I/O.
1247
1248     This class provides a character and line based interface to stream
1249     I/O. There is no readinto method because Python's character strings
1250     are immutable. There is no public constructor.
1251     """
1252
1253     def read(self, n=-1):
1254         """Read at most n characters from stream.
1255
1256         Read from underlying buffer until we have n characters or we hit EOF.
1257         If n is negative or omitted, read until EOF.
1258         """
1259         self._unsupported("read")
1260
1261     def write(self, s):
1262         """Write string s to stream."""
1263         self._unsupported("write")
1264
1265     def truncate(self, pos=None):
1266         """Truncate size to pos."""
1267         self._unsupported("truncate")
1268
1269     def readline(self):
1270         """Read until newline or EOF.
1271
1272         Returns an empty string if EOF is hit immediately.
1273         """
1274         self._unsupported("readline")
1275
1276     def detach(self):
1277         """
1278         Separate the underlying buffer from the TextIOBase and return it.
1279
1280         After the underlying buffer has been detached, the TextIO is in an
1281         unusable state.
1282         """
1283         self._unsupported("detach")
1284
1285     @property
1286     def encoding(self):
1287         """Subclasses should override."""
1288         return None
1289
1290     @property
1291     def newlines(self):
1292         """Line endings translated so far.
1293
1294         Only line endings translated during reading are considered.
1295
1296         Subclasses should override.
1297         """
1298         return None
1299
1300     @property
1301     def errors(self):
1302         """Error setting of the decoder or encoder.
1303
1304         Subclasses should override."""
1305         return None
1306
1307 io.TextIOBase.register(TextIOBase)
1308
1309
1310 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1311     r"""Codec used when reading a file in universal newlines mode.  It wraps
1312     another incremental decoder, translating \r\n and \r into \n.  It also
1313     records the types of newlines encountered.  When used with
1314     translate=False, it ensures that the newline sequence is returned in
1315     one piece.
1316     """
1317     def __init__(self, decoder, translate, errors='strict'):
1318         codecs.IncrementalDecoder.__init__(self, errors=errors)
1319         self.translate = translate
1320         self.decoder = decoder
1321         self.seennl = 0
1322         self.pendingcr = False
1323
1324     def decode(self, input, final=False):
1325         # decode input (with the eventual \r from a previous pass)
1326         if self.decoder is None:
1327             output = input
1328         else:
1329             output = self.decoder.decode(input, final=final)
1330         if self.pendingcr and (output or final):
1331             output = "\r" + output
1332             self.pendingcr = False
1333
1334         # retain last \r even when not translating data:
1335         # then readline() is sure to get \r\n in one pass
1336         if output.endswith("\r") and not final:
1337             output = output[:-1]
1338             self.pendingcr = True
1339
1340         # Record which newlines are read
1341         crlf = output.count('\r\n')
1342         cr = output.count('\r') - crlf
1343         lf = output.count('\n') - crlf
1344         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1345                     | (crlf and self._CRLF)
1346
1347         if self.translate:
1348             if crlf:
1349                 output = output.replace("\r\n", "\n")
1350             if cr:
1351                 output = output.replace("\r", "\n")
1352
1353         return output
1354
1355     def getstate(self):
1356         if self.decoder is None:
1357             buf = b""
1358             flag = 0
1359         else:
1360             buf, flag = self.decoder.getstate()
1361         flag <<= 1
1362         if self.pendingcr:
1363             flag |= 1
1364         return buf, flag
1365
1366     def setstate(self, state):
1367         buf, flag = state
1368         self.pendingcr = bool(flag & 1)
1369         if self.decoder is not None:
1370             self.decoder.setstate((buf, flag >> 1))
1371
1372     def reset(self):
1373         self.seennl = 0
1374         self.pendingcr = False
1375         if self.decoder is not None:
1376             self.decoder.reset()
1377
1378     _LF = 1
1379     _CR = 2
1380     _CRLF = 4
1381
1382     @property
1383     def newlines(self):
1384         return (None,
1385                 "\n",
1386                 "\r",
1387                 ("\r", "\n"),
1388                 "\r\n",
1389                 ("\n", "\r\n"),
1390                 ("\r", "\r\n"),
1391                 ("\r", "\n", "\r\n")
1392                )[self.seennl]
1393
1394
1395 class TextIOWrapper(TextIOBase):
1396
1397     r"""Character and line based layer over a BufferedIOBase object, buffer.
1398
1399     encoding gives the name of the encoding that the stream will be
1400     decoded or encoded with. It defaults to locale.getpreferredencoding.
1401
1402     errors determines the strictness of encoding and decoding (see the
1403     codecs.register) and defaults to "strict".
1404
1405     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1406     handling of line endings. If it is None, universal newlines is
1407     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1408     or '\r\n' are translated to '\n' before being returned to the
1409     caller. Conversely, on output, '\n' is translated to the system
1410     default line seperator, os.linesep. If newline is any other of its
1411     legal values, that newline becomes the newline when the file is read
1412     and it is returned untranslated. On output, '\n' is converted to the
1413     newline.
1414
1415     If line_buffering is True, a call to flush is implied when a call to
1416     write contains a newline character.
1417     """
1418
1419     _CHUNK_SIZE = 2048
1420
1421     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1422                  line_buffering=False):
1423         if newline is not None and not isinstance(newline, basestring):
1424             raise TypeError("illegal newline type: %r" % (type(newline),))
1425         if newline not in (None, "", "\n", "\r", "\r\n"):
1426             raise ValueError("illegal newline value: %r" % (newline,))
1427         if encoding is None:
1428             try:
1429                 encoding = os.device_encoding(buffer.fileno())
1430             except (AttributeError, UnsupportedOperation):
1431                 pass
1432             if encoding is None:
1433                 try:
1434                     import locale
1435                 except ImportError:
1436                     # Importing locale may fail if Python is being built
1437                     encoding = "ascii"
1438                 else:
1439                     encoding = locale.getpreferredencoding()
1440
1441         if not isinstance(encoding, basestring):
1442             raise ValueError("invalid encoding: %r" % encoding)
1443
1444         if errors is None:
1445             errors = "strict"
1446         else:
1447             if not isinstance(errors, basestring):
1448                 raise ValueError("invalid errors: %r" % errors)
1449
1450         self.buffer = buffer
1451         self._line_buffering = line_buffering
1452         self._encoding = encoding
1453         self._errors = errors
1454         self._readuniversal = not newline
1455         self._readtranslate = newline is None
1456         self._readnl = newline
1457         self._writetranslate = newline != ''
1458         self._writenl = newline or os.linesep
1459         self._encoder = None
1460         self._decoder = None
1461         self._decoded_chars = ''  # buffer for text returned from decoder
1462         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1463         self._snapshot = None  # info for reconstructing decoder state
1464         self._seekable = self._telling = self.buffer.seekable()
1465
1466         if self._seekable and self.writable():
1467             position = self.buffer.tell()
1468             if position != 0:
1469                 try:
1470                     self._get_encoder().setstate(0)
1471                 except LookupError:
1472                     # Sometimes the encoder doesn't exist
1473                     pass
1474
1475     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1476     # where dec_flags is the second (integer) item of the decoder state
1477     # and next_input is the chunk of input bytes that comes next after the
1478     # snapshot point.  We use this to reconstruct decoder states in tell().
1479
1480     # Naming convention:
1481     #   - "bytes_..." for integer variables that count input bytes
1482     #   - "chars_..." for integer variables that count decoded characters
1483
1484     def __repr__(self):
1485         try:
1486             name = self.name
1487         except AttributeError:
1488             return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1489         else:
1490             return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1491                 name, self.encoding)
1492
1493     @property
1494     def encoding(self):
1495         return self._encoding
1496
1497     @property
1498     def errors(self):
1499         return self._errors
1500
1501     @property
1502     def line_buffering(self):
1503         return self._line_buffering
1504
1505     def seekable(self):
1506         return self._seekable
1507
1508     def readable(self):
1509         return self.buffer.readable()
1510
1511     def writable(self):
1512         return self.buffer.writable()
1513
1514     def flush(self):
1515         self.buffer.flush()
1516         self._telling = self._seekable
1517
1518     def close(self):
1519         if self.buffer is not None:
1520             try:
1521                 self.flush()
1522             except IOError:
1523                 pass  # If flush() fails, just give up
1524             self.buffer.close()
1525
1526     @property
1527     def closed(self):
1528         return self.buffer.closed
1529
1530     @property
1531     def name(self):
1532         return self.buffer.name
1533
1534     def fileno(self):
1535         return self.buffer.fileno()
1536
1537     def isatty(self):
1538         return self.buffer.isatty()
1539
1540     def write(self, s):
1541         if self.closed:
1542             raise ValueError("write to closed file")
1543         if not isinstance(s, unicode):
1544             raise TypeError("can't write %s to text stream" %
1545                             s.__class__.__name__)
1546         length = len(s)
1547         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1548         if haslf and self._writetranslate and self._writenl != "\n":
1549             s = s.replace("\n", self._writenl)
1550         encoder = self._encoder or self._get_encoder()
1551         # XXX What if we were just reading?
1552         b = encoder.encode(s)
1553         self.buffer.write(b)
1554         if self._line_buffering and (haslf or "\r" in s):
1555             self.flush()
1556         self._snapshot = None
1557         if self._decoder:
1558             self._decoder.reset()
1559         return length
1560
1561     def _get_encoder(self):
1562         make_encoder = codecs.getincrementalencoder(self._encoding)
1563         self._encoder = make_encoder(self._errors)
1564         return self._encoder
1565
1566     def _get_decoder(self):
1567         make_decoder = codecs.getincrementaldecoder(self._encoding)
1568         decoder = make_decoder(self._errors)
1569         if self._readuniversal:
1570             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1571         self._decoder = decoder
1572         return decoder
1573
1574     # The following three methods implement an ADT for _decoded_chars.
1575     # Text returned from the decoder is buffered here until the client
1576     # requests it by calling our read() or readline() method.
1577     def _set_decoded_chars(self, chars):
1578         """Set the _decoded_chars buffer."""
1579         self._decoded_chars = chars
1580         self._decoded_chars_used = 0
1581
1582     def _get_decoded_chars(self, n=None):
1583         """Advance into the _decoded_chars buffer."""
1584         offset = self._decoded_chars_used
1585         if n is None:
1586             chars = self._decoded_chars[offset:]
1587         else:
1588             chars = self._decoded_chars[offset:offset + n]
1589         self._decoded_chars_used += len(chars)
1590         return chars
1591
1592     def _rewind_decoded_chars(self, n):
1593         """Rewind the _decoded_chars buffer."""
1594         if self._decoded_chars_used < n:
1595             raise AssertionError("rewind decoded_chars out of bounds")
1596         self._decoded_chars_used -= n
1597
1598     def _read_chunk(self):
1599         """
1600         Read and decode the next chunk of data from the BufferedReader.
1601         """
1602
1603         # The return value is True unless EOF was reached.  The decoded
1604         # string is placed in self._decoded_chars (replacing its previous
1605         # value).  The entire input chunk is sent to the decoder, though
1606         # some of it may remain buffered in the decoder, yet to be
1607         # converted.
1608
1609         if self._decoder is None:
1610             raise ValueError("no decoder")
1611
1612         if self._telling:
1613             # To prepare for tell(), we need to snapshot a point in the
1614             # file where the decoder's input buffer is empty.
1615
1616             dec_buffer, dec_flags = self._decoder.getstate()
1617             # Given this, we know there was a valid snapshot point
1618             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1619
1620         # Read a chunk, decode it, and put the result in self._decoded_chars.
1621         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1622         eof = not input_chunk
1623         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1624
1625         if self._telling:
1626             # At the snapshot point, len(dec_buffer) bytes before the read,
1627             # the next input to be decoded is dec_buffer + input_chunk.
1628             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1629
1630         return not eof
1631
1632     def _pack_cookie(self, position, dec_flags=0,
1633                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1634         # The meaning of a tell() cookie is: seek to position, set the
1635         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1636         # into the decoder with need_eof as the EOF flag, then skip
1637         # chars_to_skip characters of the decoded result.  For most simple
1638         # decoders, tell() will often just give a byte offset in the file.
1639         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1640                (chars_to_skip<<192) | bool(need_eof)<<256)
1641
1642     def _unpack_cookie(self, bigint):
1643         rest, position = divmod(bigint, 1<<64)
1644         rest, dec_flags = divmod(rest, 1<<64)
1645         rest, bytes_to_feed = divmod(rest, 1<<64)
1646         need_eof, chars_to_skip = divmod(rest, 1<<64)
1647         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1648
1649     def tell(self):
1650         if not self._seekable:
1651             raise IOError("underlying stream is not seekable")
1652         if not self._telling:
1653             raise IOError("telling position disabled by next() call")
1654         self.flush()
1655         position = self.buffer.tell()
1656         decoder = self._decoder
1657         if decoder is None or self._snapshot is None:
1658             if self._decoded_chars:
1659                 # This should never happen.
1660                 raise AssertionError("pending decoded text")
1661             return position
1662
1663         # Skip backward to the snapshot point (see _read_chunk).
1664         dec_flags, next_input = self._snapshot
1665         position -= len(next_input)
1666
1667         # How many decoded characters have been used up since the snapshot?
1668         chars_to_skip = self._decoded_chars_used
1669         if chars_to_skip == 0:
1670             # We haven't moved from the snapshot point.
1671             return self._pack_cookie(position, dec_flags)
1672
1673         # Starting from the snapshot position, we will walk the decoder
1674         # forward until it gives us enough decoded characters.
1675         saved_state = decoder.getstate()
1676         try:
1677             # Note our initial start point.
1678             decoder.setstate((b'', dec_flags))
1679             start_pos = position
1680             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1681             need_eof = 0
1682
1683             # Feed the decoder one byte at a time.  As we go, note the
1684             # nearest "safe start point" before the current location
1685             # (a point where the decoder has nothing buffered, so seek()
1686             # can safely start from there and advance to this location).
1687             for next_byte in next_input:
1688                 bytes_fed += 1
1689                 chars_decoded += len(decoder.decode(next_byte))
1690                 dec_buffer, dec_flags = decoder.getstate()
1691                 if not dec_buffer and chars_decoded <= chars_to_skip:
1692                     # Decoder buffer is empty, so this is a safe start point.
1693                     start_pos += bytes_fed
1694                     chars_to_skip -= chars_decoded
1695                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1696                 if chars_decoded >= chars_to_skip:
1697                     break
1698             else:
1699                 # We didn't get enough decoded data; signal EOF to get more.
1700                 chars_decoded += len(decoder.decode(b'', final=True))
1701                 need_eof = 1
1702                 if chars_decoded < chars_to_skip:
1703                     raise IOError("can't reconstruct logical file position")
1704
1705             # The returned cookie corresponds to the last safe start point.
1706             return self._pack_cookie(
1707                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1708         finally:
1709             decoder.setstate(saved_state)
1710
1711     def truncate(self, pos=None):
1712         self.flush()
1713         if pos is None:
1714             pos = self.tell()
1715         self.seek(pos)
1716         return self.buffer.truncate()
1717
1718     def detach(self):
1719         if self.buffer is None:
1720             raise ValueError("buffer is already detached")
1721         self.flush()
1722         buffer = self.buffer
1723         self.buffer = None
1724         return buffer
1725
1726     def seek(self, cookie, whence=0):
1727         if self.closed:
1728             raise ValueError("tell on closed file")
1729         if not self._seekable:
1730             raise IOError("underlying stream is not seekable")
1731         if whence == 1: # seek relative to current position
1732             if cookie != 0:
1733                 raise IOError("can't do nonzero cur-relative seeks")
1734             # Seeking to the current position should attempt to
1735             # sync the underlying buffer with the current position.
1736             whence = 0
1737             cookie = self.tell()
1738         if whence == 2: # seek relative to end of file
1739             if cookie != 0:
1740                 raise IOError("can't do nonzero end-relative seeks")
1741             self.flush()
1742             position = self.buffer.seek(0, 2)
1743             self._set_decoded_chars('')
1744             self._snapshot = None
1745             if self._decoder:
1746                 self._decoder.reset()
1747             return position
1748         if whence != 0:
1749             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1750                              (whence,))
1751         if cookie < 0:
1752             raise ValueError("negative seek position %r" % (cookie,))
1753         self.flush()
1754
1755         # The strategy of seek() is to go back to the safe start point
1756         # and replay the effect of read(chars_to_skip) from there.
1757         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1758             self._unpack_cookie(cookie)
1759
1760         # Seek back to the safe start point.
1761         self.buffer.seek(start_pos)
1762         self._set_decoded_chars('')
1763         self._snapshot = None
1764
1765         # Restore the decoder to its state from the safe start point.
1766         if cookie == 0 and self._decoder:
1767             self._decoder.reset()
1768         elif self._decoder or dec_flags or chars_to_skip:
1769             self._decoder = self._decoder or self._get_decoder()
1770             self._decoder.setstate((b'', dec_flags))
1771             self._snapshot = (dec_flags, b'')
1772
1773         if chars_to_skip:
1774             # Just like _read_chunk, feed the decoder and save a snapshot.
1775             input_chunk = self.buffer.read(bytes_to_feed)
1776             self._set_decoded_chars(
1777                 self._decoder.decode(input_chunk, need_eof))
1778             self._snapshot = (dec_flags, input_chunk)
1779
1780             # Skip chars_to_skip of the decoded characters.
1781             if len(self._decoded_chars) < chars_to_skip:
1782                 raise IOError("can't restore logical file position")
1783             self._decoded_chars_used = chars_to_skip
1784
1785         # Finally, reset the encoder (merely useful for proper BOM handling)
1786         try:
1787             encoder = self._encoder or self._get_encoder()
1788         except LookupError:
1789             # Sometimes the encoder doesn't exist
1790             pass
1791         else:
1792             if cookie != 0:
1793                 encoder.setstate(0)
1794             else:
1795                 encoder.reset()
1796         return cookie
1797
1798     def read(self, n=None):
1799         self._checkReadable()
1800         if n is None:
1801             n = -1
1802         decoder = self._decoder or self._get_decoder()
1803         if n < 0:
1804             # Read everything.
1805             result = (self._get_decoded_chars() +
1806                       decoder.decode(self.buffer.read(), final=True))
1807             self._set_decoded_chars('')
1808             self._snapshot = None
1809             return result
1810         else:
1811             # Keep reading chunks until we have n characters to return.
1812             eof = False
1813             result = self._get_decoded_chars(n)
1814             while len(result) < n and not eof:
1815                 eof = not self._read_chunk()
1816                 result += self._get_decoded_chars(n - len(result))
1817             return result
1818
1819     def next(self):
1820         self._telling = False
1821         line = self.readline()
1822         if not line:
1823             self._snapshot = None
1824             self._telling = self._seekable
1825             raise StopIteration
1826         return line
1827
1828     def readline(self, limit=None):
1829         if self.closed:
1830             raise ValueError("read from closed file")
1831         if limit is None:
1832             limit = -1
1833         elif not isinstance(limit, (int, long)):
1834             raise TypeError("limit must be an integer")
1835
1836         # Grab all the decoded text (we will rewind any extra bits later).
1837         line = self._get_decoded_chars()
1838
1839         start = 0
1840         # Make the decoder if it doesn't already exist.
1841         if not self._decoder:
1842             self._get_decoder()
1843
1844         pos = endpos = None
1845         while True:
1846             if self._readtranslate:
1847                 # Newlines are already translated, only search for \n
1848                 pos = line.find('\n', start)
1849                 if pos >= 0:
1850                     endpos = pos + 1
1851                     break
1852                 else:
1853                     start = len(line)
1854
1855             elif self._readuniversal:
1856                 # Universal newline search. Find any of \r, \r\n, \n
1857                 # The decoder ensures that \r\n are not split in two pieces
1858
1859                 # In C we'd look for these in parallel of course.
1860                 nlpos = line.find("\n", start)
1861                 crpos = line.find("\r", start)
1862                 if crpos == -1:
1863                     if nlpos == -1:
1864                         # Nothing found
1865                         start = len(line)
1866                     else:
1867                         # Found \n
1868                         endpos = nlpos + 1
1869                         break
1870                 elif nlpos == -1:
1871                     # Found lone \r
1872                     endpos = crpos + 1
1873                     break
1874                 elif nlpos < crpos:
1875                     # Found \n
1876                     endpos = nlpos + 1
1877                     break
1878                 elif nlpos == crpos + 1:
1879                     # Found \r\n
1880                     endpos = crpos + 2
1881                     break
1882                 else:
1883                     # Found \r
1884                     endpos = crpos + 1
1885                     break
1886             else:
1887                 # non-universal
1888                 pos = line.find(self._readnl)
1889                 if pos >= 0:
1890                     endpos = pos + len(self._readnl)
1891                     break
1892
1893             if limit >= 0 and len(line) >= limit:
1894                 endpos = limit  # reached length limit
1895                 break
1896
1897             # No line ending seen yet - get more data'
1898             while self._read_chunk():
1899                 if self._decoded_chars:
1900                     break
1901             if self._decoded_chars:
1902                 line += self._get_decoded_chars()
1903             else:
1904                 # end of file
1905                 self._set_decoded_chars('')
1906                 self._snapshot = None
1907                 return line
1908
1909         if limit >= 0 and endpos > limit:
1910             endpos = limit  # don't exceed limit
1911
1912         # Rewind _decoded_chars to just after the line ending we found.
1913         self._rewind_decoded_chars(len(line) - endpos)
1914         return line[:endpos]
1915
1916     @property
1917     def newlines(self):
1918         return self._decoder.newlines if self._decoder else None
1919
1920
1921 class StringIO(TextIOWrapper):
1922     """Text I/O implementation using an in-memory buffer.
1923
1924     The initial_value argument sets the value of object.  The newline
1925     argument is like the one of TextIOWrapper's constructor.
1926     """
1927
1928     def __init__(self, initial_value="", newline="\n"):
1929         super(StringIO, self).__init__(BytesIO(),
1930                                        encoding="utf-8",
1931                                        errors="strict",
1932                                        newline=newline)
1933         # Issue #5645: make universal newlines semantics the same as in the
1934         # C version, even under Windows.
1935         if newline is None:
1936             self._writetranslate = False
1937         if initial_value:
1938             if not isinstance(initial_value, unicode):
1939                 initial_value = unicode(initial_value)
1940             self.write(initial_value)
1941             self.seek(0)
1942
1943     def getvalue(self):
1944         self.flush()
1945         return self.buffer.getvalue().decode(self._encoding, self._errors)
1946
1947     def __repr__(self):
1948         # TextIOWrapper tells the encoding in its repr. In StringIO,
1949         # that's a implementation detail.
1950         return object.__repr__(self)
1951
1952     @property
1953     def errors(self):
1954         return None
1955
1956     @property
1957     def encoding(self):
1958         return None
1959
1960     def detach(self):
1961         # This doesn't make sense on StringIO.
1962         self._unsupported("detach")