Lib/_pyio.py

   1 """
   2 Python implementation of the io module.
   3 """
   4
   5 from __future__ import (print_function, unicode_literals)
   6
   7 import os
   8 import abc
   9 import codecs
  10 import warnings
  11 # Import thread instead of threading to reduce startup cost
  12 try:
  13     from thread import allocate_lock as Lock
  14 except ImportError:
  15     from dummy_thread import allocate_lock as Lock
  16
  17 import io
  18 from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
  19
  20 __metaclass__ = type
  21
  22 # open() uses st_blksize whenever we can
  23 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  24
  25 # NOTE: Base classes defined here are registered with the "official" ABCs
  26 # defined in io.py. We don't use real inheritance though, because we don't
  27 # want to inherit the C implementations.
  28
  29
  30 class BlockingIOError(IOError):
  31
  32     """Exception raised when I/O would block on a non-blocking I/O stream."""
  33
  34     def __init__(self, errno, strerror, characters_written=0):
  35         super(IOError, self).__init__(errno, strerror)
  36         if not isinstance(characters_written, (int, long)):
  37             raise TypeError("characters_written must be a integer")
  38         self.characters_written = characters_written
  39
  40
  41 def open(file, mode="r", buffering=-1,
  42          encoding=None, errors=None,
  43          newline=None, closefd=True):
  44
  45     r"""Open file and return a stream.  Raise IOError upon failure.
  46
  47     file is either a text or byte string giving the name (and the path
  48     if the file isn't in the current working directory) of the file to
  49     be opened or an integer file descriptor of the file to be
  50     wrapped. (If a file descriptor is given, it is closed when the
  51     returned I/O object is closed, unless closefd is set to False.)
  52
  53     mode is an optional string that specifies the mode in which the file
  54     is opened. It defaults to 'r' which means open for reading in text
  55     mode.  Other common values are 'w' for writing (truncating the file if
  56     it already exists), and 'a' for appending (which on some Unix systems,
  57     means that all writes append to the end of the file regardless of the
  58     current seek position). In text mode, if encoding is not specified the
  59     encoding used is platform dependent. (For reading and writing raw
  60     bytes use binary mode and leave encoding unspecified.) The available
  61     modes are:
  62
  63     ========= ===============================================================
  64     Character Meaning
  65     --------- ---------------------------------------------------------------
  66     'r'       open for reading (default)
  67     'w'       open for writing, truncating the file first
  68     'a'       open for writing, appending to the end of the file if it exists
  69     'b'       binary mode
  70     't'       text mode (default)
  71     '+'       open a disk file for updating (reading and writing)
  72     'U'       universal newline mode (for backwards compatibility; unneeded
  73               for new code)
  74     ========= ===============================================================
  75
  76     The default mode is 'rt' (open for reading text). For binary random
  77     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
  78     'r+b' opens the file without truncation.
  79
  80     Python distinguishes between files opened in binary and text modes,
  81     even when the underlying operating system doesn't. Files opened in
  82     binary mode (appending 'b' to the mode argument) return contents as
  83     bytes objects without any decoding. In text mode (the default, or when
  84     't' is appended to the mode argument), the contents of the file are
  85     returned as strings, the bytes having been first decoded using a
  86     platform-dependent encoding or using the specified encoding if given.
  87
  88     buffering is an optional integer used to set the buffering policy.
  89     Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
  90     line buffering (only usable in text mode), and an integer > 1 to indicate
  91     the size of a fixed-size chunk buffer.  When no buffering argument is
  92     given, the default buffering policy works as follows:
  93
  94     * Binary files are buffered in fixed-size chunks; the size of the buffer
  95       is chosen using a heuristic trying to determine the underlying device's
  96       "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
  97       On many systems, the buffer will typically be 4096 or 8192 bytes long.
  98
  99     * "Interactive" text files (files for which isatty() returns True)
 100       use line buffering.  Other text files use the policy described above
 101       for binary files.
 102
 103     encoding is the name of the encoding used to decode or encode the
 104     file. This should only be used in text mode. The default encoding is
 105     platform dependent, but any encoding supported by Python can be
 106     passed.  See the codecs module for the list of supported encodings.
 107
 108     errors is an optional string that specifies how encoding errors are to
 109     be handled---this argument should not be used in binary mode. Pass
 110     'strict' to raise a ValueError exception if there is an encoding error
 111     (the default of None has the same effect), or pass 'ignore' to ignore
 112     errors. (Note that ignoring encoding errors can lead to data loss.)
 113     See the documentation for codecs.register for a list of the permitted
 114     encoding error strings.
 115
 116     newline controls how universal newlines works (it only applies to text
 117     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 118     follows:
 119
 120     * On input, if newline is None, universal newlines mode is
 121       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 122       these are translated into '\n' before being returned to the
 123       caller. If it is '', universal newline mode is enabled, but line
 124       endings are returned to the caller untranslated. If it has any of
 125       the other legal values, input lines are only terminated by the given
 126       string, and the line ending is returned to the caller untranslated.
 127
 128     * On output, if newline is None, any '\n' characters written are
 129       translated to the system default line separator, os.linesep. If
 130       newline is '', no translation takes place. If newline is any of the
 131       other legal values, any '\n' characters written are translated to
 132       the given string.
 133
 134     If closefd is False, the underlying file descriptor will be kept open
 135     when the file is closed. This does not work when a file name is given
 136     and must be True in that case.
 137
 138     open() returns a file object whose type depends on the mode, and
 139     through which the standard file operations such as reading and writing
 140     are performed. When open() is used to open a file in a text mode ('w',
 141     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 142     a file in a binary mode, the returned class varies: in read binary
 143     mode, it returns a BufferedReader; in write binary and append binary
 144     modes, it returns a BufferedWriter, and in read/write mode, it returns
 145     a BufferedRandom.
 146
 147     It is also possible to use a string or bytearray as a file for both
 148     reading and writing. For strings StringIO can be used like a file
 149     opened in a text mode, and for bytes a BytesIO can be used like a file
 150     opened in a binary mode.
 151     """
 152     if not isinstance(file, (basestring, int, long)):
 153         raise TypeError("invalid file: %r" % file)
 154     if not isinstance(mode, basestring):
 155         raise TypeError("invalid mode: %r" % mode)
 156     if not isinstance(buffering, (int, long)):
 157         raise TypeError("invalid buffering: %r" % buffering)
 158     if encoding is not None and not isinstance(encoding, basestring):
 159         raise TypeError("invalid encoding: %r" % encoding)
 160     if errors is not None and not isinstance(errors, basestring):
 161         raise TypeError("invalid errors: %r" % errors)
 162     modes = set(mode)
 163     if modes - set("arwb+tU") or len(mode) > len(modes):
 164         raise ValueError("invalid mode: %r" % mode)
 165     reading = "r" in modes
 166     writing = "w" in modes
 167     appending = "a" in modes
 168     updating = "+" in modes
 169     text = "t" in modes
 170     binary = "b" in modes
 171     if "U" in modes:
 172         if writing or appending:
 173             raise ValueError("can't use U and writing mode at once")
 174         reading = True
 175     if text and binary:
 176         raise ValueError("can't have text and binary mode at once")
 177     if reading + writing + appending > 1:
 178         raise ValueError("can't have read/write/append mode at once")
 179     if not (reading or writing or appending):
 180         raise ValueError("must have exactly one of read/write/append mode")
 181     if binary and encoding is not None:
 182         raise ValueError("binary mode doesn't take an encoding argument")
 183     if binary and errors is not None:
 184         raise ValueError("binary mode doesn't take an errors argument")
 185     if binary and newline is not None:
 186         raise ValueError("binary mode doesn't take a newline argument")
 187     raw = FileIO(file,
 188                  (reading and "r" or "") +
 189                  (writing and "w" or "") +
 190                  (appending and "a" or "") +
 191                  (updating and "+" or ""),
 192                  closefd)
 193     line_buffering = False
 194     if buffering == 1 or buffering < 0 and raw.isatty():
 195         buffering = -1
 196         line_buffering = True
 197     if buffering < 0:
 198         buffering = DEFAULT_BUFFER_SIZE
 199         try:
 200             bs = os.fstat(raw.fileno()).st_blksize
 201         except (os.error, AttributeError):
 202             pass
 203         else:
 204             if bs > 1:
 205                 buffering = bs
 206     if buffering < 0:
 207         raise ValueError("invalid buffering size")
 208     if buffering == 0:
 209         if binary:
 210             return raw
 211         raise ValueError("can't have unbuffered text I/O")
 212     if updating:
 213         buffer = BufferedRandom(raw, buffering)
 214     elif writing or appending:
 215         buffer = BufferedWriter(raw, buffering)
 216     elif reading:
 217         buffer = BufferedReader(raw, buffering)
 218     else:
 219         raise ValueError("unknown mode: %r" % mode)
 220     if binary:
 221         return buffer
 222     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 223     text.mode = mode
 224     return text
 225
 226
 227 class DocDescriptor:
 228     """Helper for builtins.open.__doc__
 229     """
 230     def __get__(self, obj, typ):
 231         return (
 232             "open(file, mode='r', buffering=None, encoding=None, "
 233                  "errors=None, newline=None, closefd=True)\n\n" +
 234             open.__doc__)
 235
 236 class OpenWrapper:
 237     """Wrapper for builtins.open
 238
 239     Trick so that open won't become a bound method when stored
 240     as a class variable (as dbm.dumb does).
 241
 242     See initstdio() in Python/pythonrun.c.
 243     """
 244     __doc__ = DocDescriptor()
 245
 246     def __new__(cls, *args, **kwargs):
 247         return open(*args, **kwargs)
 248
 249
 250 class UnsupportedOperation(ValueError, IOError):
 251     pass
 252
 253
 254 class IOBase:
 255     __metaclass__ = abc.ABCMeta
 256
 257     """The abstract base class for all I/O classes, acting on streams of
 258     bytes. There is no public constructor.
 259
 260     This class provides dummy implementations for many methods that
 261     derived classes can override selectively; the default implementations
 262     represent a file that cannot be read, written or seeked.
 263
 264     Even though IOBase does not declare read, readinto, or write because
 265     their signatures will vary, implementations and clients should
 266     consider those methods part of the interface. Also, implementations
 267     may raise a IOError when operations they do not support are called.
 268
 269     The basic type used for binary data read from or written to a file is
 270     bytes. bytearrays are accepted too, and in some cases (such as
 271     readinto) needed. Text I/O classes work with str data.
 272
 273     Note that calling any method (even inquiries) on a closed stream is
 274     undefined. Implementations may raise IOError in this case.
 275
 276     IOBase (and its subclasses) support the iterator protocol, meaning
 277     that an IOBase object can be iterated over yielding the lines in a
 278     stream.
 279
 280     IOBase also supports the :keyword:`with` statement. In this example,
 281     fp is closed after the suite of the with statement is complete:
 282
 283     with open('spam.txt', 'r') as fp:
 284         fp.write('Spam and eggs!')
 285     """
 286
 287     ### Internal ###
 288
 289     def _unsupported(self, name):
 290         """Internal: raise an exception for unsupported operations."""
 291         raise UnsupportedOperation("%s.%s() not supported" %
 292                                    (self.__class__.__name__, name))
 293
 294     ### Positioning ###
 295
 296     def seek(self, pos, whence=0):
 297         """Change stream position.
 298
 299         Change the stream position to byte offset offset. offset is
 300         interpreted relative to the position indicated by whence.  Values
 301         for whence are:
 302
 303         * 0 -- start of stream (the default); offset should be zero or positive
 304         * 1 -- current stream position; offset may be negative
 305         * 2 -- end of stream; offset is usually negative
 306
 307         Return the new absolute position.
 308         """
 309         self._unsupported("seek")
 310
 311     def tell(self):
 312         """Return current stream position."""
 313         return self.seek(0, 1)
 314
 315     def truncate(self, pos=None):
 316         """Truncate file to size bytes.
 317
 318         Size defaults to the current IO position as reported by tell().  Return
 319         the new size.
 320         """
 321         self._unsupported("truncate")
 322
 323     ### Flush and close ###
 324
 325     def flush(self):
 326         """Flush write buffers, if applicable.
 327
 328         This is not implemented for read-only and non-blocking streams.
 329         """
 330         # XXX Should this return the number of bytes written???
 331
 332     __closed = False
 333
 334     def close(self):
 335         """Flush and close the IO object.
 336
 337         This method has no effect if the file is already closed.
 338         """
 339         if not self.__closed:
 340             try:
 341                 self.flush()
 342             except IOError:
 343                 pass  # If flush() fails, just give up
 344             self.__closed = True
 345
 346     def __del__(self):
 347         """Destructor.  Calls close()."""
 348         # The try/except block is in case this is called at program
 349         # exit time, when it's possible that globals have already been
 350         # deleted, and then the close() call might fail.  Since
 351         # there's nothing we can do about such failures and they annoy
 352         # the end users, we suppress the traceback.
 353         try:
 354             self.close()
 355         except:
 356             pass
 357
 358     ### Inquiries ###
 359
 360     def seekable(self):
 361         """Return whether object supports random access.
 362
 363         If False, seek(), tell() and truncate() will raise IOError.
 364         This method may need to do a test seek().
 365         """
 366         return False
 367
 368     def _checkSeekable(self, msg=None):
 369         """Internal: raise an IOError if file is not seekable
 370         """
 371         if not self.seekable():
 372             raise IOError("File or stream is not seekable."
 373                           if msg is None else msg)
 374
 375
 376     def readable(self):
 377         """Return whether object was opened for reading.
 378
 379         If False, read() will raise IOError.
 380         """
 381         return False
 382
 383     def _checkReadable(self, msg=None):
 384         """Internal: raise an IOError if file is not readable
 385         """
 386         if not self.readable():
 387             raise IOError("File or stream is not readable."
 388                           if msg is None else msg)
 389
 390     def writable(self):
 391         """Return whether object was opened for writing.
 392
 393         If False, write() and truncate() will raise IOError.
 394         """
 395         return False
 396
 397     def _checkWritable(self, msg=None):
 398         """Internal: raise an IOError if file is not writable
 399         """
 400         if not self.writable():
 401             raise IOError("File or stream is not writable."
 402                           if msg is None else msg)
 403
 404     @property
 405     def closed(self):
 406         """closed: bool.  True iff the file has been closed.
 407
 408         For backwards compatibility, this is a property, not a predicate.
 409         """
 410         return self.__closed
 411
 412     def _checkClosed(self, msg=None):
 413         """Internal: raise an ValueError if file is closed
 414         """
 415         if self.closed:
 416             raise ValueError("I/O operation on closed file."
 417                              if msg is None else msg)
 418
 419     ### Context manager ###
 420
 421     def __enter__(self):
 422         """Context management protocol.  Returns self."""
 423         self._checkClosed()
 424         return self
 425
 426     def __exit__(self, *args):
 427         """Context management protocol.  Calls close()"""
 428         self.close()
 429
 430     ### Lower-level APIs ###
 431
 432     # XXX Should these be present even if unimplemented?
 433
 434     def fileno(self):
 435         """Returns underlying file descriptor if one exists.
 436
 437         An IOError is raised if the IO object does not use a file descriptor.
 438         """
 439         self._unsupported("fileno")
 440
 441     def isatty(self):
 442         """Return whether this is an 'interactive' stream.
 443
 444         Return False if it can't be determined.
 445         """
 446         self._checkClosed()
 447         return False
 448
 449     ### Readline[s] and writelines ###
 450
 451     def readline(self, limit=-1):
 452         r"""Read and return a line from the stream.
 453
 454         If limit is specified, at most limit bytes will be read.
 455
 456         The line terminator is always b'\n' for binary files; for text
 457         files, the newlines argument to open can be used to select the line
 458         terminator(s) recognized.
 459         """
 460         # For backwards compatibility, a (slowish) readline().
 461         if hasattr(self, "peek"):
 462             def nreadahead():
 463                 readahead = self.peek(1)
 464                 if not readahead:
 465                     return 1
 466                 n = (readahead.find(b"\n") + 1) or len(readahead)
 467                 if limit >= 0:
 468                     n = min(n, limit)
 469                 return n
 470         else:
 471             def nreadahead():
 472                 return 1
 473         if limit is None:
 474             limit = -1
 475         elif not isinstance(limit, (int, long)):
 476             raise TypeError("limit must be an integer")
 477         res = bytearray()
 478         while limit < 0 or len(res) < limit:
 479             b = self.read(nreadahead())
 480             if not b:
 481                 break
 482             res += b
 483             if res.endswith(b"\n"):
 484                 break
 485         return bytes(res)
 486
 487     def __iter__(self):
 488         self._checkClosed()
 489         return self
 490
 491     def next(self):
 492         line = self.readline()
 493         if not line:
 494             raise StopIteration
 495         return line
 496
 497     def readlines(self, hint=None):
 498         """Return a list of lines from the stream.
 499
 500         hint can be specified to control the number of lines read: no more
 501         lines will be read if the total size (in bytes/characters) of all
 502         lines so far exceeds hint.
 503         """
 504         if hint is not None and not isinstance(hint, (int, long)):
 505             raise TypeError("integer or None expected")
 506         if hint is None or hint <= 0:
 507             return list(self)
 508         n = 0
 509         lines = []
 510         for line in self:
 511             lines.append(line)
 512             n += len(line)
 513             if n >= hint:
 514                 break
 515         return lines
 516
 517     def writelines(self, lines):
 518         self._checkClosed()
 519         for line in lines:
 520             self.write(line)
 521
 522 io.IOBase.register(IOBase)
 523
 524
 525 class RawIOBase(IOBase):
 526
 527     """Base class for raw binary I/O."""
 528
 529     # The read() method is implemented by calling readinto(); derived
 530     # classes that want to support read() only need to implement
 531     # readinto() as a primitive operation.  In general, readinto() can be
 532     # more efficient than read().
 533
 534     # (It would be tempting to also provide an implementation of
 535     # readinto() in terms of read(), in case the latter is a more suitable
 536     # primitive operation, but that would lead to nasty recursion in case
 537     # a subclass doesn't implement either.)
 538
 539     def read(self, n=-1):
 540         """Read and return up to n bytes.
 541
 542         Returns an empty bytes object on EOF, or None if the object is
 543         set not to block and has no data to read.
 544         """
 545         if n is None:
 546             n = -1
 547         if n < 0:
 548             return self.readall()
 549         b = bytearray(n.__index__())
 550         n = self.readinto(b)
 551         del b[n:]
 552         return bytes(b)
 553
 554     def readall(self):
 555         """Read until EOF, using multiple read() call."""
 556         res = bytearray()
 557         while True:
 558             data = self.read(DEFAULT_BUFFER_SIZE)
 559             if not data:
 560                 break
 561             res += data
 562         return bytes(res)
 563
 564     def readinto(self, b):
 565         """Read up to len(b) bytes into b.
 566
 567         Returns number of bytes read (0 for EOF), or None if the object
 568         is set not to block as has no data to read.
 569         """
 570         self._unsupported("readinto")
 571
 572     def write(self, b):
 573         """Write the given buffer to the IO stream.
 574
 575         Returns the number of bytes written, which may be less than len(b).
 576         """
 577         self._unsupported("write")
 578
 579 io.RawIOBase.register(RawIOBase)
 580 from _io import FileIO
 581 RawIOBase.register(FileIO)
 582
 583
 584 class BufferedIOBase(IOBase):
 585
 586     """Base class for buffered IO objects.
 587
 588     The main difference with RawIOBase is that the read() method
 589     supports omitting the size argument, and does not have a default
 590     implementation that defers to readinto().
 591
 592     In addition, read(), readinto() and write() may raise
 593     BlockingIOError if the underlying raw stream is in non-blocking
 594     mode and not ready; unlike their raw counterparts, they will never
 595     return None.
 596
 597     A typical implementation should not inherit from a RawIOBase
 598     implementation, but wrap one.
 599     """
 600
 601     def read(self, n=None):
 602         """Read and return up to n bytes.
 603
 604         If the argument is omitted, None, or negative, reads and
 605         returns all data until EOF.
 606
 607         If the argument is positive, and the underlying raw stream is
 608         not 'interactive', multiple raw reads may be issued to satisfy
 609         the byte count (unless EOF is reached first).  But for
 610         interactive raw streams (XXX and for pipes?), at most one raw
 611         read will be issued, and a short result does not imply that
 612         EOF is imminent.
 613
 614         Returns an empty bytes array on EOF.
 615
 616         Raises BlockingIOError if the underlying raw stream has no
 617         data at the moment.
 618         """
 619         self._unsupported("read")
 620
 621     def read1(self, n=None):
 622         """Read up to n bytes with at most one read() system call."""
 623         self._unsupported("read1")
 624
 625     def readinto(self, b):
 626         """Read up to len(b) bytes into b.
 627
 628         Like read(), this may issue multiple reads to the underlying raw
 629         stream, unless the latter is 'interactive'.
 630
 631         Returns the number of bytes read (0 for EOF).
 632
 633         Raises BlockingIOError if the underlying raw stream has no
 634         data at the moment.
 635         """
 636         # XXX This ought to work with anything that supports the buffer API
 637         data = self.read(len(b))
 638         n = len(data)
 639         try:
 640             b[:n] = data
 641         except TypeError as err:
 642             import array
 643             if not isinstance(b, array.array):
 644                 raise err
 645             b[:n] = array.array(b'b', data)
 646         return n
 647
 648     def write(self, b):
 649         """Write the given buffer to the IO stream.
 650
 651         Return the number of bytes written, which is never less than
 652         len(b).
 653
 654         Raises BlockingIOError if the buffer is full and the
 655         underlying raw stream cannot accept more data at the moment.
 656         """
 657         self._unsupported("write")
 658
 659     def detach(self):
 660         """
 661         Separate the underlying raw stream from the buffer and return it.
 662
 663         After the raw stream has been detached, the buffer is in an unusable
 664         state.
 665         """
 666         self._unsupported("detach")
 667
 668 io.BufferedIOBase.register(BufferedIOBase)
 669
 670
 671 class _BufferedIOMixin(BufferedIOBase):
 672
 673     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 674
 675     This passes most requests on to the underlying raw stream.  It
 676     does *not* provide implementations of read(), readinto() or
 677     write().
 678     """
 679
 680     def __init__(self, raw):
 681         self.raw = raw
 682
 683     ### Positioning ###
 684
 685     def seek(self, pos, whence=0):
 686         new_position = self.raw.seek(pos, whence)
 687         if new_position < 0:
 688             raise IOError("seek() returned an invalid position")
 689         return new_position
 690
 691     def tell(self):
 692         pos = self.raw.tell()
 693         if pos < 0:
 694             raise IOError("tell() returned an invalid position")
 695         return pos
 696
 697     def truncate(self, pos=None):
 698         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 699         # and a flush may be necessary to synch both views of the current
 700         # file state.
 701         self.flush()
 702
 703         if pos is None:
 704             pos = self.tell()
 705         # XXX: Should seek() be used, instead of passing the position
 706         # XXX  directly to truncate?
 707         return self.raw.truncate(pos)
 708
 709     ### Flush and close ###
 710
 711     def flush(self):
 712         self.raw.flush()
 713
 714     def close(self):
 715         if not self.closed and self.raw is not None:
 716             try:
 717                 self.flush()
 718             except IOError:
 719                 pass  # If flush() fails, just give up
 720             self.raw.close()
 721
 722     def detach(self):
 723         if self.raw is None:
 724             raise ValueError("raw stream already detached")
 725         self.flush()
 726         raw = self.raw
 727         self.raw = None
 728         return raw
 729
 730     ### Inquiries ###
 731
 732     def seekable(self):
 733         return self.raw.seekable()
 734
 735     def readable(self):
 736         return self.raw.readable()
 737
 738     def writable(self):
 739         return self.raw.writable()
 740
 741     @property
 742     def closed(self):
 743         return self.raw.closed
 744
 745     @property
 746     def name(self):
 747         return self.raw.name
 748
 749     @property
 750     def mode(self):
 751         return self.raw.mode
 752
 753     def __repr__(self):
 754         clsname = self.__class__.__name__
 755         try:
 756             name = self.name
 757         except AttributeError:
 758             return "<_pyio.{0}>".format(clsname)
 759         else:
 760             return "<_pyio.{0} name={1!r}>".format(clsname, name)
 761
 762     ### Lower-level APIs ###
 763
 764     def fileno(self):
 765         return self.raw.fileno()
 766
 767     def isatty(self):
 768         return self.raw.isatty()
 769
 770
 771 class BytesIO(BufferedIOBase):
 772
 773     """Buffered I/O implementation using an in-memory bytes buffer."""
 774
 775     def __init__(self, initial_bytes=None):
 776         buf = bytearray()
 777         if initial_bytes is not None:
 778             buf.extend(initial_bytes)
 779         self._buffer = buf
 780         self._pos = 0
 781
 782     def __getstate__(self):
 783         if self.closed:
 784             raise ValueError("__getstate__ on closed file")
 785         return self.__dict__.copy()
 786
 787     def getvalue(self):
 788         """Return the bytes value (contents) of the buffer
 789         """
 790         if self.closed:
 791             raise ValueError("getvalue on closed file")
 792         return bytes(self._buffer)
 793
 794     def read(self, n=None):
 795         if self.closed:
 796             raise ValueError("read from closed file")
 797         if n is None:
 798             n = -1
 799         if not isinstance(n, (int, long)):
 800             raise TypeError("integer argument expected, got {0!r}".format(
 801                 type(n)))
 802         if n < 0:
 803             n = len(self._buffer)
 804         if len(self._buffer) <= self._pos:
 805             return b""
 806         newpos = min(len(self._buffer), self._pos + n)
 807         b = self._buffer[self._pos : newpos]
 808         self._pos = newpos
 809         return bytes(b)
 810
 811     def read1(self, n):
 812         """This is the same as read.
 813         """
 814         return self.read(n)
 815
 816     def write(self, b):
 817         if self.closed:
 818             raise ValueError("write to closed file")
 819         if isinstance(b, unicode):
 820             raise TypeError("can't write unicode to binary stream")
 821         n = len(b)
 822         if n == 0:
 823             return 0
 824         pos = self._pos
 825         if pos > len(self._buffer):
 826             # Inserts null bytes between the current end of the file
 827             # and the new write position.
 828             padding = b'\x00' * (pos - len(self._buffer))
 829             self._buffer += padding
 830         self._buffer[pos:pos + n] = b
 831         self._pos += n
 832         return n
 833
 834     def seek(self, pos, whence=0):
 835         if self.closed:
 836             raise ValueError("seek on closed file")
 837         try:
 838             pos.__index__
 839         except AttributeError:
 840             raise TypeError("an integer is required")
 841         if whence == 0:
 842             if pos < 0:
 843                 raise ValueError("negative seek position %r" % (pos,))
 844             self._pos = pos
 845         elif whence == 1:
 846             self._pos = max(0, self._pos + pos)
 847         elif whence == 2:
 848             self._pos = max(0, len(self._buffer) + pos)
 849         else:
 850             raise ValueError("invalid whence value")
 851         return self._pos
 852
 853     def tell(self):
 854         if self.closed:
 855             raise ValueError("tell on closed file")
 856         return self._pos
 857
 858     def truncate(self, pos=None):
 859         if self.closed:
 860             raise ValueError("truncate on closed file")
 861         if pos is None:
 862             pos = self._pos
 863         else:
 864             try:
 865                 pos.__index__
 866             except AttributeError:
 867                 raise TypeError("an integer is required")
 868             if pos < 0:
 869                 raise ValueError("negative truncate position %r" % (pos,))
 870         del self._buffer[pos:]
 871         return pos
 872
 873     def readable(self):
 874         return True
 875
 876     def writable(self):
 877         return True
 878
 879     def seekable(self):
 880         return True
 881
 882
 883 class BufferedReader(_BufferedIOMixin):
 884
 885     """BufferedReader(raw[, buffer_size])
 886
 887     A buffer for a readable, sequential BaseRawIO object.
 888
 889     The constructor creates a BufferedReader for the given readable raw
 890     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 891     is used.
 892     """
 893
 894     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 895         """Create a new buffered reader using the given readable raw IO object.
 896         """
 897         if not raw.readable():
 898             raise IOError('"raw" argument must be readable.')
 899
 900         _BufferedIOMixin.__init__(self, raw)
 901         if buffer_size <= 0:
 902             raise ValueError("invalid buffer size")
 903         self.buffer_size = buffer_size
 904         self._reset_read_buf()
 905         self._read_lock = Lock()
 906
 907     def _reset_read_buf(self):
 908         self._read_buf = b""
 909         self._read_pos = 0
 910
 911     def read(self, n=None):
 912         """Read n bytes.
 913
 914         Returns exactly n bytes of data unless the underlying raw IO
 915         stream reaches EOF or if the call would block in non-blocking
 916         mode. If n is negative, read until EOF or until read() would
 917         block.
 918         """
 919         if n is not None and n < -1:
 920             raise ValueError("invalid number of bytes to read")
 921         with self._read_lock:
 922             return self._read_unlocked(n)
 923
 924     def _read_unlocked(self, n=None):
 925         nodata_val = b""
 926         empty_values = (b"", None)
 927         buf = self._read_buf
 928         pos = self._read_pos
 929
 930         # Special case for when the number of bytes to read is unspecified.
 931         if n is None or n == -1:
 932             self._reset_read_buf()
 933             chunks = [buf[pos:]]  # Strip the consumed bytes.
 934             current_size = 0
 935             while True:
 936                 # Read until EOF or until read() would block.
 937                 chunk = self.raw.read()
 938                 if chunk in empty_values:
 939                     nodata_val = chunk
 940                     break
 941                 current_size += len(chunk)
 942                 chunks.append(chunk)
 943             return b"".join(chunks) or nodata_val
 944
 945         # The number of bytes to read is specified, return at most n bytes.
 946         avail = len(buf) - pos  # Length of the available buffered data.
 947         if n <= avail:
 948             # Fast path: the data to read is fully buffered.
 949             self._read_pos += n
 950             return buf[pos:pos+n]
 951         # Slow path: read from the stream until enough bytes are read,
 952         # or until an EOF occurs or until read() would block.
 953         chunks = [buf[pos:]]
 954         wanted = max(self.buffer_size, n)
 955         while avail < n:
 956             chunk = self.raw.read(wanted)
 957             if chunk in empty_values:
 958                 nodata_val = chunk
 959                 break
 960             avail += len(chunk)
 961             chunks.append(chunk)
 962         # n is more then avail only when an EOF occurred or when
 963         # read() would have blocked.
 964         n = min(n, avail)
 965         out = b"".join(chunks)
 966         self._read_buf = out[n:]  # Save the extra data in the buffer.
 967         self._read_pos = 0
 968         return out[:n] if out else nodata_val
 969
 970     def peek(self, n=0):
 971         """Returns buffered bytes without advancing the position.
 972
 973         The argument indicates a desired minimal number of bytes; we
 974         do at most one raw read to satisfy it.  We never return more
 975         than self.buffer_size.
 976         """
 977         with self._read_lock:
 978             return self._peek_unlocked(n)
 979
 980     def _peek_unlocked(self, n=0):
 981         want = min(n, self.buffer_size)
 982         have = len(self._read_buf) - self._read_pos
 983         if have < want or have <= 0:
 984             to_read = self.buffer_size - have
 985             current = self.raw.read(to_read)
 986             if current:
 987                 self._read_buf = self._read_buf[self._read_pos:] + current
 988                 self._read_pos = 0
 989         return self._read_buf[self._read_pos:]
 990
 991     def read1(self, n):
 992         """Reads up to n bytes, with at most one read() system call."""
 993         # Returns up to n bytes.  If at least one byte is buffered, we
 994         # only return buffered bytes.  Otherwise, we do one raw read.
 995         if n < 0:
 996             raise ValueError("number of bytes to read must be positive")
 997         if n == 0:
 998             return b""
 999         with self._read_lock:
1000             self._peek_unlocked(1)
1001             return self._read_unlocked(
1002                 min(n, len(self._read_buf) - self._read_pos))
1003
1004     def tell(self):
1005         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1006
1007     def seek(self, pos, whence=0):
1008         if not (0 <= whence <= 2):
1009             raise ValueError("invalid whence value")
1010         with self._read_lock:
1011             if whence == 1:
1012                 pos -= len(self._read_buf) - self._read_pos
1013             pos = _BufferedIOMixin.seek(self, pos, whence)
1014             self._reset_read_buf()
1015             return pos
1016
1017 class BufferedWriter(_BufferedIOMixin):
1018
1019     """A buffer for a writeable sequential RawIO object.
1020
1021     The constructor creates a BufferedWriter for the given writeable raw
1022     stream. If the buffer_size is not given, it defaults to
1023     DEFAULT_BUFFER_SIZE.
1024     """
1025
1026     _warning_stack_offset = 2
1027
1028     def __init__(self, raw,
1029                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1030         if not raw.writable():
1031             raise IOError('"raw" argument must be writable.')
1032
1033         _BufferedIOMixin.__init__(self, raw)
1034         if buffer_size <= 0:
1035             raise ValueError("invalid buffer size")
1036         if max_buffer_size is not None:
1037             warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1038                           self._warning_stack_offset)
1039         self.buffer_size = buffer_size
1040         self._write_buf = bytearray()
1041         self._write_lock = Lock()
1042
1043     def write(self, b):
1044         if self.closed:
1045             raise ValueError("write to closed file")
1046         if isinstance(b, unicode):
1047             raise TypeError("can't write unicode to binary stream")
1048         with self._write_lock:
1049             # XXX we can implement some more tricks to try and avoid
1050             # partial writes
1051             if len(self._write_buf) > self.buffer_size:
1052                 # We're full, so let's pre-flush the buffer
1053                 try:
1054                     self._flush_unlocked()
1055                 except BlockingIOError as e:
1056                     # We can't accept anything else.
1057                     # XXX Why not just let the exception pass through?
1058                     raise BlockingIOError(e.errno, e.strerror, 0)
1059             before = len(self._write_buf)
1060             self._write_buf.extend(b)
1061             written = len(self._write_buf) - before
1062             if len(self._write_buf) > self.buffer_size:
1063                 try:
1064                     self._flush_unlocked()
1065                 except BlockingIOError as e:
1066                     if len(self._write_buf) > self.buffer_size:
1067                         # We've hit the buffer_size. We have to accept a partial
1068                         # write and cut back our buffer.
1069                         overage = len(self._write_buf) - self.buffer_size
1070                         written -= overage
1071                         self._write_buf = self._write_buf[:self.buffer_size]
1072                         raise BlockingIOError(e.errno, e.strerror, written)
1073             return written
1074
1075     def truncate(self, pos=None):
1076         with self._write_lock:
1077             self._flush_unlocked()
1078             if pos is None:
1079                 pos = self.raw.tell()
1080             return self.raw.truncate(pos)
1081
1082     def flush(self):
1083         with self._write_lock:
1084             self._flush_unlocked()
1085
1086     def _flush_unlocked(self):
1087         if self.closed:
1088             raise ValueError("flush of closed file")
1089         written = 0
1090         try:
1091             while self._write_buf:
1092                 n = self.raw.write(self._write_buf)
1093                 if n > len(self._write_buf) or n < 0:
1094                     raise IOError("write() returned incorrect number of bytes")
1095                 del self._write_buf[:n]
1096                 written += n
1097         except BlockingIOError as e:
1098             n = e.characters_written
1099             del self._write_buf[:n]
1100             written += n
1101             raise BlockingIOError(e.errno, e.strerror, written)
1102
1103     def tell(self):
1104         return _BufferedIOMixin.tell(self) + len(self._write_buf)
1105
1106     def seek(self, pos, whence=0):
1107         if not (0 <= whence <= 2):
1108             raise ValueError("invalid whence")
1109         with self._write_lock:
1110             self._flush_unlocked()
1111             return _BufferedIOMixin.seek(self, pos, whence)
1112
1113
1114 class BufferedRWPair(BufferedIOBase):
1115
1116     """A buffered reader and writer object together.
1117
1118     A buffered reader object and buffered writer object put together to
1119     form a sequential IO object that can read and write. This is typically
1120     used with a socket or two-way pipe.
1121
1122     reader and writer are RawIOBase objects that are readable and
1123     writeable respectively. If the buffer_size is omitted it defaults to
1124     DEFAULT_BUFFER_SIZE.
1125     """
1126
1127     # XXX The usefulness of this (compared to having two separate IO
1128     # objects) is questionable.
1129
1130     def __init__(self, reader, writer,
1131                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1132         """Constructor.
1133
1134         The arguments are two RawIO instances.
1135         """
1136         if max_buffer_size is not None:
1137             warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1138
1139         if not reader.readable():
1140             raise IOError('"reader" argument must be readable.')
1141
1142         if not writer.writable():
1143             raise IOError('"writer" argument must be writable.')
1144
1145         self.reader = BufferedReader(reader, buffer_size)
1146         self.writer = BufferedWriter(writer, buffer_size)
1147
1148     def read(self, n=None):
1149         if n is None:
1150             n = -1
1151         return self.reader.read(n)
1152
1153     def readinto(self, b):
1154         return self.reader.readinto(b)
1155
1156     def write(self, b):
1157         return self.writer.write(b)
1158
1159     def peek(self, n=0):
1160         return self.reader.peek(n)
1161
1162     def read1(self, n):
1163         return self.reader.read1(n)
1164
1165     def readable(self):
1166         return self.reader.readable()
1167
1168     def writable(self):
1169         return self.writer.writable()
1170
1171     def flush(self):
1172         return self.writer.flush()
1173
1174     def close(self):
1175         self.writer.close()
1176         self.reader.close()
1177
1178     def isatty(self):
1179         return self.reader.isatty() or self.writer.isatty()
1180
1181     @property
1182     def closed(self):
1183         return self.writer.closed
1184
1185
1186 class BufferedRandom(BufferedWriter, BufferedReader):
1187
1188     """A buffered interface to random access streams.
1189
1190     The constructor creates a reader and writer for a seekable stream,
1191     raw, given in the first argument. If the buffer_size is omitted it
1192     defaults to DEFAULT_BUFFER_SIZE.
1193     """
1194
1195     _warning_stack_offset = 3
1196
1197     def __init__(self, raw,
1198                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1199         raw._checkSeekable()
1200         BufferedReader.__init__(self, raw, buffer_size)
1201         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1202
1203     def seek(self, pos, whence=0):
1204         if not (0 <= whence <= 2):
1205             raise ValueError("invalid whence")
1206         self.flush()
1207         if self._read_buf:
1208             # Undo read ahead.
1209             with self._read_lock:
1210                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1211         # First do the raw seek, then empty the read buffer, so that
1212         # if the raw seek fails, we don't lose buffered data forever.
1213         pos = self.raw.seek(pos, whence)
1214         with self._read_lock:
1215             self._reset_read_buf()
1216         if pos < 0:
1217             raise IOError("seek() returned invalid position")
1218         return pos
1219
1220     def tell(self):
1221         if self._write_buf:
1222             return BufferedWriter.tell(self)
1223         else:
1224             return BufferedReader.tell(self)
1225
1226     def truncate(self, pos=None):
1227         if pos is None:
1228             pos = self.tell()
1229         # Use seek to flush the read buffer.
1230         return BufferedWriter.truncate(self, pos)
1231
1232     def read(self, n=None):
1233         if n is None:
1234             n = -1
1235         self.flush()
1236         return BufferedReader.read(self, n)
1237
1238     def readinto(self, b):
1239         self.flush()
1240         return BufferedReader.readinto(self, b)
1241
1242     def peek(self, n=0):
1243         self.flush()
1244         return BufferedReader.peek(self, n)
1245
1246     def read1(self, n):
1247         self.flush()
1248         return BufferedReader.read1(self, n)
1249
1250     def write(self, b):
1251         if self._read_buf:
1252             # Undo readahead
1253             with self._read_lock:
1254                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1255                 self._reset_read_buf()
1256         return BufferedWriter.write(self, b)
1257
1258
1259 class TextIOBase(IOBase):
1260
1261     """Base class for text I/O.
1262
1263     This class provides a character and line based interface to stream
1264     I/O. There is no readinto method because Python's character strings
1265     are immutable. There is no public constructor.
1266     """
1267
1268     def read(self, n=-1):
1269         """Read at most n characters from stream.
1270
1271         Read from underlying buffer until we have n characters or we hit EOF.
1272         If n is negative or omitted, read until EOF.
1273         """
1274         self._unsupported("read")
1275
1276     def write(self, s):
1277         """Write string s to stream."""
1278         self._unsupported("write")
1279
1280     def truncate(self, pos=None):
1281         """Truncate size to pos."""
1282         self._unsupported("truncate")
1283
1284     def readline(self):
1285         """Read until newline or EOF.
1286
1287         Returns an empty string if EOF is hit immediately.
1288         """
1289         self._unsupported("readline")
1290
1291     def detach(self):
1292         """
1293         Separate the underlying buffer from the TextIOBase and return it.
1294
1295         After the underlying buffer has been detached, the TextIO is in an
1296         unusable state.
1297         """
1298         self._unsupported("detach")
1299
1300     @property
1301     def encoding(self):
1302         """Subclasses should override."""
1303         return None
1304
1305     @property
1306     def newlines(self):
1307         """Line endings translated so far.
1308
1309         Only line endings translated during reading are considered.
1310
1311         Subclasses should override.
1312         """
1313         return None
1314
1315     @property
1316     def errors(self):
1317         """Error setting of the decoder or encoder.
1318
1319         Subclasses should override."""
1320         return None
1321
1322 io.TextIOBase.register(TextIOBase)
1323
1324
1325 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1326     r"""Codec used when reading a file in universal newlines mode.  It wraps
1327     another incremental decoder, translating \r\n and \r into \n.  It also
1328     records the types of newlines encountered.  When used with
1329     translate=False, it ensures that the newline sequence is returned in
1330     one piece.
1331     """
1332     def __init__(self, decoder, translate, errors='strict'):
1333         codecs.IncrementalDecoder.__init__(self, errors=errors)
1334         self.translate = translate
1335         self.decoder = decoder
1336         self.seennl = 0
1337         self.pendingcr = False
1338
1339     def decode(self, input, final=False):
1340         # decode input (with the eventual \r from a previous pass)
1341         if self.decoder is None:
1342             output = input
1343         else:
1344             output = self.decoder.decode(input, final=final)
1345         if self.pendingcr and (output or final):
1346             output = "\r" + output
1347             self.pendingcr = False
1348
1349         # retain last \r even when not translating data:
1350         # then readline() is sure to get \r\n in one pass
1351         if output.endswith("\r") and not final:
1352             output = output[:-1]
1353             self.pendingcr = True
1354
1355         # Record which newlines are read
1356         crlf = output.count('\r\n')
1357         cr = output.count('\r') - crlf
1358         lf = output.count('\n') - crlf
1359         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1360                     | (crlf and self._CRLF)
1361
1362         if self.translate:
1363             if crlf:
1364                 output = output.replace("\r\n", "\n")
1365             if cr:
1366                 output = output.replace("\r", "\n")
1367
1368         return output
1369
1370     def getstate(self):
1371         if self.decoder is None:
1372             buf = b""
1373             flag = 0
1374         else:
1375             buf, flag = self.decoder.getstate()
1376         flag <<= 1
1377         if self.pendingcr:
1378             flag |= 1
1379         return buf, flag
1380
1381     def setstate(self, state):
1382         buf, flag = state
1383         self.pendingcr = bool(flag & 1)
1384         if self.decoder is not None:
1385             self.decoder.setstate((buf, flag >> 1))
1386
1387     def reset(self):
1388         self.seennl = 0
1389         self.pendingcr = False
1390         if self.decoder is not None:
1391             self.decoder.reset()
1392
1393     _LF = 1
1394     _CR = 2
1395     _CRLF = 4
1396
1397     @property
1398     def newlines(self):
1399         return (None,
1400                 "\n",
1401                 "\r",
1402                 ("\r", "\n"),
1403                 "\r\n",
1404                 ("\n", "\r\n"),
1405                 ("\r", "\r\n"),
1406                 ("\r", "\n", "\r\n")
1407                )[self.seennl]
1408
1409
1410 class TextIOWrapper(TextIOBase):
1411
1412     r"""Character and line based layer over a BufferedIOBase object, buffer.
1413
1414     encoding gives the name of the encoding that the stream will be
1415     decoded or encoded with. It defaults to locale.getpreferredencoding.
1416
1417     errors determines the strictness of encoding and decoding (see the
1418     codecs.register) and defaults to "strict".
1419
1420     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1421     handling of line endings. If it is None, universal newlines is
1422     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1423     or '\r\n' are translated to '\n' before being returned to the
1424     caller. Conversely, on output, '\n' is translated to the system
1425     default line seperator, os.linesep. If newline is any other of its
1426     legal values, that newline becomes the newline when the file is read
1427     and it is returned untranslated. On output, '\n' is converted to the
1428     newline.
1429
1430     If line_buffering is True, a call to flush is implied when a call to
1431     write contains a newline character.
1432     """
1433
1434     _CHUNK_SIZE = 2048
1435
1436     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1437                  line_buffering=False):
1438         if newline is not None and not isinstance(newline, basestring):
1439             raise TypeError("illegal newline type: %r" % (type(newline),))
1440         if newline not in (None, "", "\n", "\r", "\r\n"):
1441             raise ValueError("illegal newline value: %r" % (newline,))
1442         if encoding is None:
1443             try:
1444                 encoding = os.device_encoding(buffer.fileno())
1445             except (AttributeError, UnsupportedOperation):
1446                 pass
1447             if encoding is None:
1448                 try:
1449                     import locale
1450                 except ImportError:
1451                     # Importing locale may fail if Python is being built
1452                     encoding = "ascii"
1453                 else:
1454                     encoding = locale.getpreferredencoding()
1455
1456         if not isinstance(encoding, basestring):
1457             raise ValueError("invalid encoding: %r" % encoding)
1458
1459         if errors is None:
1460             errors = "strict"
1461         else:
1462             if not isinstance(errors, basestring):
1463                 raise ValueError("invalid errors: %r" % errors)
1464
1465         self.buffer = buffer
1466         self._line_buffering = line_buffering
1467         self._encoding = encoding
1468         self._errors = errors
1469         self._readuniversal = not newline
1470         self._readtranslate = newline is None
1471         self._readnl = newline
1472         self._writetranslate = newline != ''
1473         self._writenl = newline or os.linesep
1474         self._encoder = None
1475         self._decoder = None
1476         self._decoded_chars = ''  # buffer for text returned from decoder
1477         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1478         self._snapshot = None  # info for reconstructing decoder state
1479         self._seekable = self._telling = self.buffer.seekable()
1480
1481         if self._seekable and self.writable():
1482             position = self.buffer.tell()
1483             if position != 0:
1484                 try:
1485                     self._get_encoder().setstate(0)
1486                 except LookupError:
1487                     # Sometimes the encoder doesn't exist
1488                     pass
1489
1490     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1491     # where dec_flags is the second (integer) item of the decoder state
1492     # and next_input is the chunk of input bytes that comes next after the
1493     # snapshot point.  We use this to reconstruct decoder states in tell().
1494
1495     # Naming convention:
1496     #   - "bytes_..." for integer variables that count input bytes
1497     #   - "chars_..." for integer variables that count decoded characters
1498
1499     def __repr__(self):
1500         try:
1501             name = self.name
1502         except AttributeError:
1503             return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1504         else:
1505             return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1506                 name, self.encoding)
1507
1508     @property
1509     def encoding(self):
1510         return self._encoding
1511
1512     @property
1513     def errors(self):
1514         return self._errors
1515
1516     @property
1517     def line_buffering(self):
1518         return self._line_buffering
1519
1520     def seekable(self):
1521         return self._seekable
1522
1523     def readable(self):
1524         return self.buffer.readable()
1525
1526     def writable(self):
1527         return self.buffer.writable()
1528
1529     def flush(self):
1530         self.buffer.flush()
1531         self._telling = self._seekable
1532
1533     def close(self):
1534         if self.buffer is not None:
1535             try:
1536                 self.flush()
1537             except IOError:
1538                 pass  # If flush() fails, just give up
1539             self.buffer.close()
1540
1541     @property
1542     def closed(self):
1543         return self.buffer.closed
1544
1545     @property
1546     def name(self):
1547         return self.buffer.name
1548
1549     def fileno(self):
1550         return self.buffer.fileno()
1551
1552     def isatty(self):
1553         return self.buffer.isatty()
1554
1555     def write(self, s):
1556         if self.closed:
1557             raise ValueError("write to closed file")
1558         if not isinstance(s, unicode):
1559             raise TypeError("can't write %s to text stream" %
1560                             s.__class__.__name__)
1561         length = len(s)
1562         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1563         if haslf and self._writetranslate and self._writenl != "\n":
1564             s = s.replace("\n", self._writenl)
1565         encoder = self._encoder or self._get_encoder()
1566         # XXX What if we were just reading?
1567         b = encoder.encode(s)
1568         self.buffer.write(b)
1569         if self._line_buffering and (haslf or "\r" in s):
1570             self.flush()
1571         self._snapshot = None
1572         if self._decoder:
1573             self._decoder.reset()
1574         return length
1575
1576     def _get_encoder(self):
1577         make_encoder = codecs.getincrementalencoder(self._encoding)
1578         self._encoder = make_encoder(self._errors)
1579         return self._encoder
1580
1581     def _get_decoder(self):
1582         make_decoder = codecs.getincrementaldecoder(self._encoding)
1583         decoder = make_decoder(self._errors)
1584         if self._readuniversal:
1585             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1586         self._decoder = decoder
1587         return decoder
1588
1589     # The following three methods implement an ADT for _decoded_chars.
1590     # Text returned from the decoder is buffered here until the client
1591     # requests it by calling our read() or readline() method.
1592     def _set_decoded_chars(self, chars):
1593         """Set the _decoded_chars buffer."""
1594         self._decoded_chars = chars
1595         self._decoded_chars_used = 0
1596
1597     def _get_decoded_chars(self, n=None):
1598         """Advance into the _decoded_chars buffer."""
1599         offset = self._decoded_chars_used
1600         if n is None:
1601             chars = self._decoded_chars[offset:]
1602         else:
1603             chars = self._decoded_chars[offset:offset + n]
1604         self._decoded_chars_used += len(chars)
1605         return chars
1606
1607     def _rewind_decoded_chars(self, n):
1608         """Rewind the _decoded_chars buffer."""
1609         if self._decoded_chars_used < n:
1610             raise AssertionError("rewind decoded_chars out of bounds")
1611         self._decoded_chars_used -= n
1612
1613     def _read_chunk(self):
1614         """
1615         Read and decode the next chunk of data from the BufferedReader.
1616         """
1617
1618         # The return value is True unless EOF was reached.  The decoded
1619         # string is placed in self._decoded_chars (replacing its previous
1620         # value).  The entire input chunk is sent to the decoder, though
1621         # some of it may remain buffered in the decoder, yet to be
1622         # converted.
1623
1624         if self._decoder is None:
1625             raise ValueError("no decoder")
1626
1627         if self._telling:
1628             # To prepare for tell(), we need to snapshot a point in the
1629             # file where the decoder's input buffer is empty.
1630
1631             dec_buffer, dec_flags = self._decoder.getstate()
1632             # Given this, we know there was a valid snapshot point
1633             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1634
1635         # Read a chunk, decode it, and put the result in self._decoded_chars.
1636         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1637         eof = not input_chunk
1638         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1639
1640         if self._telling:
1641             # At the snapshot point, len(dec_buffer) bytes before the read,
1642             # the next input to be decoded is dec_buffer + input_chunk.
1643             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1644
1645         return not eof
1646
1647     def _pack_cookie(self, position, dec_flags=0,
1648                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1649         # The meaning of a tell() cookie is: seek to position, set the
1650         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1651         # into the decoder with need_eof as the EOF flag, then skip
1652         # chars_to_skip characters of the decoded result.  For most simple
1653         # decoders, tell() will often just give a byte offset in the file.
1654         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1655                (chars_to_skip<<192) | bool(need_eof)<<256)
1656
1657     def _unpack_cookie(self, bigint):
1658         rest, position = divmod(bigint, 1<<64)
1659         rest, dec_flags = divmod(rest, 1<<64)
1660         rest, bytes_to_feed = divmod(rest, 1<<64)
1661         need_eof, chars_to_skip = divmod(rest, 1<<64)
1662         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1663
1664     def tell(self):
1665         if not self._seekable:
1666             raise IOError("underlying stream is not seekable")
1667         if not self._telling:
1668             raise IOError("telling position disabled by next() call")
1669         self.flush()
1670         position = self.buffer.tell()
1671         decoder = self._decoder
1672         if decoder is None or self._snapshot is None:
1673             if self._decoded_chars:
1674                 # This should never happen.
1675                 raise AssertionError("pending decoded text")
1676             return position
1677
1678         # Skip backward to the snapshot point (see _read_chunk).
1679         dec_flags, next_input = self._snapshot
1680         position -= len(next_input)
1681
1682         # How many decoded characters have been used up since the snapshot?
1683         chars_to_skip = self._decoded_chars_used
1684         if chars_to_skip == 0:
1685             # We haven't moved from the snapshot point.
1686             return self._pack_cookie(position, dec_flags)
1687
1688         # Starting from the snapshot position, we will walk the decoder
1689         # forward until it gives us enough decoded characters.
1690         saved_state = decoder.getstate()
1691         try:
1692             # Note our initial start point.
1693             decoder.setstate((b'', dec_flags))
1694             start_pos = position
1695             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1696             need_eof = 0
1697
1698             # Feed the decoder one byte at a time.  As we go, note the
1699             # nearest "safe start point" before the current location
1700             # (a point where the decoder has nothing buffered, so seek()
1701             # can safely start from there and advance to this location).
1702             for next_byte in next_input:
1703                 bytes_fed += 1
1704                 chars_decoded += len(decoder.decode(next_byte))
1705                 dec_buffer, dec_flags = decoder.getstate()
1706                 if not dec_buffer and chars_decoded <= chars_to_skip:
1707                     # Decoder buffer is empty, so this is a safe start point.
1708                     start_pos += bytes_fed
1709                     chars_to_skip -= chars_decoded
1710                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1711                 if chars_decoded >= chars_to_skip:
1712                     break
1713             else:
1714                 # We didn't get enough decoded data; signal EOF to get more.
1715                 chars_decoded += len(decoder.decode(b'', final=True))
1716                 need_eof = 1
1717                 if chars_decoded < chars_to_skip:
1718                     raise IOError("can't reconstruct logical file position")
1719
1720             # The returned cookie corresponds to the last safe start point.
1721             return self._pack_cookie(
1722                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1723         finally:
1724             decoder.setstate(saved_state)
1725
1726     def truncate(self, pos=None):
1727         self.flush()
1728         if pos is None:
1729             pos = self.tell()
1730         return self.buffer.truncate(pos)
1731
1732     def detach(self):
1733         if self.buffer is None:
1734             raise ValueError("buffer is already detached")
1735         self.flush()
1736         buffer = self.buffer
1737         self.buffer = None
1738         return buffer
1739
1740     def seek(self, cookie, whence=0):
1741         if self.closed:
1742             raise ValueError("tell on closed file")
1743         if not self._seekable:
1744             raise IOError("underlying stream is not seekable")
1745         if whence == 1: # seek relative to current position
1746             if cookie != 0:
1747                 raise IOError("can't do nonzero cur-relative seeks")
1748             # Seeking to the current position should attempt to
1749             # sync the underlying buffer with the current position.
1750             whence = 0
1751             cookie = self.tell()
1752         if whence == 2: # seek relative to end of file
1753             if cookie != 0:
1754                 raise IOError("can't do nonzero end-relative seeks")
1755             self.flush()
1756             position = self.buffer.seek(0, 2)
1757             self._set_decoded_chars('')
1758             self._snapshot = None
1759             if self._decoder:
1760                 self._decoder.reset()
1761             return position
1762         if whence != 0:
1763             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1764                              (whence,))
1765         if cookie < 0:
1766             raise ValueError("negative seek position %r" % (cookie,))
1767         self.flush()
1768
1769         # The strategy of seek() is to go back to the safe start point
1770         # and replay the effect of read(chars_to_skip) from there.
1771         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1772             self._unpack_cookie(cookie)
1773
1774         # Seek back to the safe start point.
1775         self.buffer.seek(start_pos)
1776         self._set_decoded_chars('')
1777         self._snapshot = None
1778
1779         # Restore the decoder to its state from the safe start point.
1780         if cookie == 0 and self._decoder:
1781             self._decoder.reset()
1782         elif self._decoder or dec_flags or chars_to_skip:
1783             self._decoder = self._decoder or self._get_decoder()
1784             self._decoder.setstate((b'', dec_flags))
1785             self._snapshot = (dec_flags, b'')
1786
1787         if chars_to_skip:
1788             # Just like _read_chunk, feed the decoder and save a snapshot.
1789             input_chunk = self.buffer.read(bytes_to_feed)
1790             self._set_decoded_chars(
1791                 self._decoder.decode(input_chunk, need_eof))
1792             self._snapshot = (dec_flags, input_chunk)
1793
1794             # Skip chars_to_skip of the decoded characters.
1795             if len(self._decoded_chars) < chars_to_skip:
1796                 raise IOError("can't restore logical file position")
1797             self._decoded_chars_used = chars_to_skip
1798
1799         # Finally, reset the encoder (merely useful for proper BOM handling)
1800         try:
1801             encoder = self._encoder or self._get_encoder()
1802         except LookupError:
1803             # Sometimes the encoder doesn't exist
1804             pass
1805         else:
1806             if cookie != 0:
1807                 encoder.setstate(0)
1808             else:
1809                 encoder.reset()
1810         return cookie
1811
1812     def read(self, n=None):
1813         self._checkReadable()
1814         if n is None:
1815             n = -1
1816         decoder = self._decoder or self._get_decoder()
1817         try:
1818             n.__index__
1819         except AttributeError:
1820             raise TypeError("an integer is required")
1821         if n < 0:
1822             # Read everything.
1823             result = (self._get_decoded_chars() +
1824                       decoder.decode(self.buffer.read(), final=True))
1825             self._set_decoded_chars('')
1826             self._snapshot = None
1827             return result
1828         else:
1829             # Keep reading chunks until we have n characters to return.
1830             eof = False
1831             result = self._get_decoded_chars(n)
1832             while len(result) < n and not eof:
1833                 eof = not self._read_chunk()
1834                 result += self._get_decoded_chars(n - len(result))
1835             return result
1836
1837     def next(self):
1838         self._telling = False
1839         line = self.readline()
1840         if not line:
1841             self._snapshot = None
1842             self._telling = self._seekable
1843             raise StopIteration
1844         return line
1845
1846     def readline(self, limit=None):
1847         if self.closed:
1848             raise ValueError("read from closed file")
1849         if limit is None:
1850             limit = -1
1851         elif not isinstance(limit, (int, long)):
1852             raise TypeError("limit must be an integer")
1853
1854         # Grab all the decoded text (we will rewind any extra bits later).
1855         line = self._get_decoded_chars()
1856
1857         start = 0
1858         # Make the decoder if it doesn't already exist.
1859         if not self._decoder:
1860             self._get_decoder()
1861
1862         pos = endpos = None
1863         while True:
1864             if self._readtranslate:
1865                 # Newlines are already translated, only search for \n
1866                 pos = line.find('\n', start)
1867                 if pos >= 0:
1868                     endpos = pos + 1
1869                     break
1870                 else:
1871                     start = len(line)
1872
1873             elif self._readuniversal:
1874                 # Universal newline search. Find any of \r, \r\n, \n
1875                 # The decoder ensures that \r\n are not split in two pieces
1876
1877                 # In C we'd look for these in parallel of course.
1878                 nlpos = line.find("\n", start)
1879                 crpos = line.find("\r", start)
1880                 if crpos == -1:
1881                     if nlpos == -1:
1882                         # Nothing found
1883                         start = len(line)
1884                     else:
1885                         # Found \n
1886                         endpos = nlpos + 1
1887                         break
1888                 elif nlpos == -1:
1889                     # Found lone \r
1890                     endpos = crpos + 1
1891                     break
1892                 elif nlpos < crpos:
1893                     # Found \n
1894                     endpos = nlpos + 1
1895                     break
1896                 elif nlpos == crpos + 1:
1897                     # Found \r\n
1898                     endpos = crpos + 2
1899                     break
1900                 else:
1901                     # Found \r
1902                     endpos = crpos + 1
1903                     break
1904             else:
1905                 # non-universal
1906                 pos = line.find(self._readnl)
1907                 if pos >= 0:
1908                     endpos = pos + len(self._readnl)
1909                     break
1910
1911             if limit >= 0 and len(line) >= limit:
1912                 endpos = limit  # reached length limit
1913                 break
1914
1915             # No line ending seen yet - get more data'
1916             while self._read_chunk():
1917                 if self._decoded_chars:
1918                     break
1919             if self._decoded_chars:
1920                 line += self._get_decoded_chars()
1921             else:
1922                 # end of file
1923                 self._set_decoded_chars('')
1924                 self._snapshot = None
1925                 return line
1926
1927         if limit >= 0 and endpos > limit:
1928             endpos = limit  # don't exceed limit
1929
1930         # Rewind _decoded_chars to just after the line ending we found.
1931         self._rewind_decoded_chars(len(line) - endpos)
1932         return line[:endpos]
1933
1934     @property
1935     def newlines(self):
1936         return self._decoder.newlines if self._decoder else None
1937
1938
1939 class StringIO(TextIOWrapper):
1940     """Text I/O implementation using an in-memory buffer.
1941
1942     The initial_value argument sets the value of object.  The newline
1943     argument is like the one of TextIOWrapper's constructor.
1944     """
1945
1946     def __init__(self, initial_value="", newline="\n"):
1947         super(StringIO, self).__init__(BytesIO(),
1948                                        encoding="utf-8",
1949                                        errors="strict",
1950                                        newline=newline)
1951         # Issue #5645: make universal newlines semantics the same as in the
1952         # C version, even under Windows.
1953         if newline is None:
1954             self._writetranslate = False
1955         if initial_value:
1956             if not isinstance(initial_value, unicode):
1957                 initial_value = unicode(initial_value)
1958             self.write(initial_value)
1959             self.seek(0)
1960
1961     def getvalue(self):
1962         self.flush()
1963         return self.buffer.getvalue().decode(self._encoding, self._errors)
1964
1965     def __repr__(self):
1966         # TextIOWrapper tells the encoding in its repr. In StringIO,
1967         # that's a implementation detail.
1968         return object.__repr__(self)
1969
1970     @property
1971     def errors(self):
1972         return None
1973
1974     @property
1975     def encoding(self):
1976         return None
1977
1978     def detach(self):
1979         # This doesn't make sense on StringIO.
1980         self._unsupported("detach")