Lib/_pyio.py

   1 """
   2 Python implementation of the io module.
   3 """
   4
   5 from __future__ import print_function
   6 from __future__ import unicode_literals
   7
   8 import os
   9 import abc
  10 import codecs
  11 import warnings
  12 # Import _thread instead of threading to reduce startup cost
  13 try:
  14     from thread import allocate_lock as Lock
  15 except ImportError:
  16     from dummy_thread import allocate_lock as Lock
  17
  18 import io
  19 from io import __all__
  20 from io import SEEK_SET, SEEK_CUR, SEEK_END
  21
  22 __metaclass__ = type
  23
  24 # open() uses st_blksize whenever we can
  25 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  26
  27 # NOTE: Base classes defined here are registered with the "official" ABCs
  28 # defined in io.py. We don't use real inheritance though, because we don't
  29 # want to inherit the C implementations.
  30
  31
  32 class BlockingIOError(IOError):
  33
  34     """Exception raised when I/O would block on a non-blocking I/O stream."""
  35
  36     def __init__(self, errno, strerror, characters_written=0):
  37         super(IOError, self).__init__(errno, strerror)
  38         if not isinstance(characters_written, (int, long)):
  39             raise TypeError("characters_written must be a integer")
  40         self.characters_written = characters_written
  41
  42
  43 def open(file, mode="r", buffering=None,
  44          encoding=None, errors=None,
  45          newline=None, closefd=True):
  46
  47     r"""Open file and return a stream.  Raise IOError upon failure.
  48
  49     file is either a text or byte string giving the name (and the path
  50     if the file isn't in the current working directory) of the file to
  51     be opened or an integer file descriptor of the file to be
  52     wrapped. (If a file descriptor is given, it is closed when the
  53     returned I/O object is closed, unless closefd is set to False.)
  54
  55     mode is an optional string that specifies the mode in which the file
  56     is opened. It defaults to 'r' which means open for reading in text
  57     mode.  Other common values are 'w' for writing (truncating the file if
  58     it already exists), and 'a' for appending (which on some Unix systems,
  59     means that all writes append to the end of the file regardless of the
  60     current seek position). In text mode, if encoding is not specified the
  61     encoding used is platform dependent. (For reading and writing raw
  62     bytes use binary mode and leave encoding unspecified.) The available
  63     modes are:
  64
  65     ========= ===============================================================
  66     Character Meaning
  67     --------- ---------------------------------------------------------------
  68     'r'       open for reading (default)
  69     'w'       open for writing, truncating the file first
  70     'a'       open for writing, appending to the end of the file if it exists
  71     'b'       binary mode
  72     't'       text mode (default)
  73     '+'       open a disk file for updating (reading and writing)
  74     'U'       universal newline mode (for backwards compatibility; unneeded
  75               for new code)
  76     ========= ===============================================================
  77
  78     The default mode is 'rt' (open for reading text). For binary random
  79     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
  80     'r+b' opens the file without truncation.
  81
  82     Python distinguishes between files opened in binary and text modes,
  83     even when the underlying operating system doesn't. Files opened in
  84     binary mode (appending 'b' to the mode argument) return contents as
  85     bytes objects without any decoding. In text mode (the default, or when
  86     't' is appended to the mode argument), the contents of the file are
  87     returned as strings, the bytes having been first decoded using a
  88     platform-dependent encoding or using the specified encoding if given.
  89
  90     buffering is an optional integer used to set the buffering policy.
  91     Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
  92     line buffering (only usable in text mode), and an integer > 1 to indicate
  93     the size of a fixed-size chunk buffer.  When no buffering argument is
  94     given, the default buffering policy works as follows:
  95
  96     * Binary files are buffered in fixed-size chunks; the size of the buffer
  97       is chosen using a heuristic trying to determine the underlying device's
  98       "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
  99       On many systems, the buffer will typically be 4096 or 8192 bytes long.
 100
 101     * "Interactive" text files (files for which isatty() returns True)
 102       use line buffering.  Other text files use the policy described above
 103       for binary files.
 104
 105     encoding is the name of the encoding used to decode or encode the
 106     file. This should only be used in text mode. The default encoding is
 107     platform dependent, but any encoding supported by Python can be
 108     passed.  See the codecs module for the list of supported encodings.
 109
 110     errors is an optional string that specifies how encoding errors are to
 111     be handled---this argument should not be used in binary mode. Pass
 112     'strict' to raise a ValueError exception if there is an encoding error
 113     (the default of None has the same effect), or pass 'ignore' to ignore
 114     errors. (Note that ignoring encoding errors can lead to data loss.)
 115     See the documentation for codecs.register for a list of the permitted
 116     encoding error strings.
 117
 118     newline controls how universal newlines works (it only applies to text
 119     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 120     follows:
 121
 122     * On input, if newline is None, universal newlines mode is
 123       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 124       these are translated into '\n' before being returned to the
 125       caller. If it is '', universal newline mode is enabled, but line
 126       endings are returned to the caller untranslated. If it has any of
 127       the other legal values, input lines are only terminated by the given
 128       string, and the line ending is returned to the caller untranslated.
 129
 130     * On output, if newline is None, any '\n' characters written are
 131       translated to the system default line separator, os.linesep. If
 132       newline is '', no translation takes place. If newline is any of the
 133       other legal values, any '\n' characters written are translated to
 134       the given string.
 135
 136     If closefd is False, the underlying file descriptor will be kept open
 137     when the file is closed. This does not work when a file name is given
 138     and must be True in that case.
 139
 140     open() returns a file object whose type depends on the mode, and
 141     through which the standard file operations such as reading and writing
 142     are performed. When open() is used to open a file in a text mode ('w',
 143     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 144     a file in a binary mode, the returned class varies: in read binary
 145     mode, it returns a BufferedReader; in write binary and append binary
 146     modes, it returns a BufferedWriter, and in read/write mode, it returns
 147     a BufferedRandom.
 148
 149     It is also possible to use a string or bytearray as a file for both
 150     reading and writing. For strings StringIO can be used like a file
 151     opened in a text mode, and for bytes a BytesIO can be used like a file
 152     opened in a binary mode.
 153     """
 154     if not isinstance(file, (basestring, int, long)):
 155         raise TypeError("invalid file: %r" % file)
 156     if not isinstance(mode, basestring):
 157         raise TypeError("invalid mode: %r" % mode)
 158     if buffering is not None and not isinstance(buffering, (int, long)):
 159         raise TypeError("invalid buffering: %r" % buffering)
 160     if encoding is not None and not isinstance(encoding, basestring):
 161         raise TypeError("invalid encoding: %r" % encoding)
 162     if errors is not None and not isinstance(errors, basestring):
 163         raise TypeError("invalid errors: %r" % errors)
 164     modes = set(mode)
 165     if modes - set("arwb+tU") or len(mode) > len(modes):
 166         raise ValueError("invalid mode: %r" % mode)
 167     reading = "r" in modes
 168     writing = "w" in modes
 169     appending = "a" in modes
 170     updating = "+" in modes
 171     text = "t" in modes
 172     binary = "b" in modes
 173     if "U" in modes:
 174         if writing or appending:
 175             raise ValueError("can't use U and writing mode at once")
 176         reading = True
 177     if text and binary:
 178         raise ValueError("can't have text and binary mode at once")
 179     if reading + writing + appending > 1:
 180         raise ValueError("can't have read/write/append mode at once")
 181     if not (reading or writing or appending):
 182         raise ValueError("must have exactly one of read/write/append mode")
 183     if binary and encoding is not None:
 184         raise ValueError("binary mode doesn't take an encoding argument")
 185     if binary and errors is not None:
 186         raise ValueError("binary mode doesn't take an errors argument")
 187     if binary and newline is not None:
 188         raise ValueError("binary mode doesn't take a newline argument")
 189     raw = FileIO(file,
 190                  (reading and "r" or "") +
 191                  (writing and "w" or "") +
 192                  (appending and "a" or "") +
 193                  (updating and "+" or ""),
 194                  closefd)
 195     if buffering is None:
 196         buffering = -1
 197     line_buffering = False
 198     if buffering == 1 or buffering < 0 and raw.isatty():
 199         buffering = -1
 200         line_buffering = True
 201     if buffering < 0:
 202         buffering = DEFAULT_BUFFER_SIZE
 203         try:
 204             bs = os.fstat(raw.fileno()).st_blksize
 205         except (os.error, AttributeError):
 206             pass
 207         else:
 208             if bs > 1:
 209                 buffering = bs
 210     if buffering < 0:
 211         raise ValueError("invalid buffering size")
 212     if buffering == 0:
 213         if binary:
 214             return raw
 215         raise ValueError("can't have unbuffered text I/O")
 216     if updating:
 217         buffer = BufferedRandom(raw, buffering)
 218     elif writing or appending:
 219         buffer = BufferedWriter(raw, buffering)
 220     elif reading:
 221         buffer = BufferedReader(raw, buffering)
 222     else:
 223         raise ValueError("unknown mode: %r" % mode)
 224     if binary:
 225         return buffer
 226     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 227     text.mode = mode
 228     return text
 229
 230
 231 class DocDescriptor:
 232     """Helper for builtins.open.__doc__
 233     """
 234     def __get__(self, obj, typ):
 235         return (
 236             "open(file, mode='r', buffering=None, encoding=None, "
 237                  "errors=None, newline=None, closefd=True)\n\n" +
 238             open.__doc__)
 239
 240 class OpenWrapper:
 241     """Wrapper for builtins.open
 242
 243     Trick so that open won't become a bound method when stored
 244     as a class variable (as dbm.dumb does).
 245
 246     See initstdio() in Python/pythonrun.c.
 247     """
 248     __doc__ = DocDescriptor()
 249
 250     def __new__(cls, *args, **kwargs):
 251         return open(*args, **kwargs)
 252
 253
 254 class UnsupportedOperation(ValueError, IOError):
 255     pass
 256
 257
 258 class IOBase:
 259     __metaclass__ = abc.ABCMeta
 260
 261     """The abstract base class for all I/O classes, acting on streams of
 262     bytes. There is no public constructor.
 263
 264     This class provides dummy implementations for many methods that
 265     derived classes can override selectively; the default implementations
 266     represent a file that cannot be read, written or seeked.
 267
 268     Even though IOBase does not declare read, readinto, or write because
 269     their signatures will vary, implementations and clients should
 270     consider those methods part of the interface. Also, implementations
 271     may raise a IOError when operations they do not support are called.
 272
 273     The basic type used for binary data read from or written to a file is
 274     bytes. bytearrays are accepted too, and in some cases (such as
 275     readinto) needed. Text I/O classes work with str data.
 276
 277     Note that calling any method (even inquiries) on a closed stream is
 278     undefined. Implementations may raise IOError in this case.
 279
 280     IOBase (and its subclasses) support the iterator protocol, meaning
 281     that an IOBase object can be iterated over yielding the lines in a
 282     stream.
 283
 284     IOBase also supports the :keyword:`with` statement. In this example,
 285     fp is closed after the suite of the with statement is complete:
 286
 287     with open('spam.txt', 'r') as fp:
 288         fp.write('Spam and eggs!')
 289     """
 290
 291     ### Internal ###
 292
 293     def _unsupported(self, name):
 294         """Internal: raise an exception for unsupported operations."""
 295         raise UnsupportedOperation("%s.%s() not supported" %
 296                                    (self.__class__.__name__, name))
 297
 298     ### Positioning ###
 299
 300     def seek(self, pos, whence=0):
 301         """Change stream position.
 302
 303         Change the stream position to byte offset offset. offset is
 304         interpreted relative to the position indicated by whence.  Values
 305         for whence are:
 306
 307         * 0 -- start of stream (the default); offset should be zero or positive
 308         * 1 -- current stream position; offset may be negative
 309         * 2 -- end of stream; offset is usually negative
 310
 311         Return the new absolute position.
 312         """
 313         self._unsupported("seek")
 314
 315     def tell(self):
 316         """Return current stream position."""
 317         return self.seek(0, 1)
 318
 319     def truncate(self, pos=None):
 320         """Truncate file to size bytes.
 321
 322         Size defaults to the current IO position as reported by tell().  Return
 323         the new size.
 324         """
 325         self._unsupported("truncate")
 326
 327     ### Flush and close ###
 328
 329     def flush(self):
 330         """Flush write buffers, if applicable.
 331
 332         This is not implemented for read-only and non-blocking streams.
 333         """
 334         # XXX Should this return the number of bytes written???
 335
 336     __closed = False
 337
 338     def close(self):
 339         """Flush and close the IO object.
 340
 341         This method has no effect if the file is already closed.
 342         """
 343         if not self.__closed:
 344             try:
 345                 self.flush()
 346             except IOError:
 347                 pass  # If flush() fails, just give up
 348             self.__closed = True
 349
 350     def __del__(self):
 351         """Destructor.  Calls close()."""
 352         # The try/except block is in case this is called at program
 353         # exit time, when it's possible that globals have already been
 354         # deleted, and then the close() call might fail.  Since
 355         # there's nothing we can do about such failures and they annoy
 356         # the end users, we suppress the traceback.
 357         try:
 358             self.close()
 359         except:
 360             pass
 361
 362     ### Inquiries ###
 363
 364     def seekable(self):
 365         """Return whether object supports random access.
 366
 367         If False, seek(), tell() and truncate() will raise IOError.
 368         This method may need to do a test seek().
 369         """
 370         return False
 371
 372     def _checkSeekable(self, msg=None):
 373         """Internal: raise an IOError if file is not seekable
 374         """
 375         if not self.seekable():
 376             raise IOError("File or stream is not seekable."
 377                           if msg is None else msg)
 378
 379
 380     def readable(self):
 381         """Return whether object was opened for reading.
 382
 383         If False, read() will raise IOError.
 384         """
 385         return False
 386
 387     def _checkReadable(self, msg=None):
 388         """Internal: raise an IOError if file is not readable
 389         """
 390         if not self.readable():
 391             raise IOError("File or stream is not readable."
 392                           if msg is None else msg)
 393
 394     def writable(self):
 395         """Return whether object was opened for writing.
 396
 397         If False, write() and truncate() will raise IOError.
 398         """
 399         return False
 400
 401     def _checkWritable(self, msg=None):
 402         """Internal: raise an IOError if file is not writable
 403         """
 404         if not self.writable():
 405             raise IOError("File or stream is not writable."
 406                           if msg is None else msg)
 407
 408     @property
 409     def closed(self):
 410         """closed: bool.  True iff the file has been closed.
 411
 412         For backwards compatibility, this is a property, not a predicate.
 413         """
 414         return self.__closed
 415
 416     def _checkClosed(self, msg=None):
 417         """Internal: raise an ValueError if file is closed
 418         """
 419         if self.closed:
 420             raise ValueError("I/O operation on closed file."
 421                              if msg is None else msg)
 422
 423     ### Context manager ###
 424
 425     def __enter__(self):
 426         """Context management protocol.  Returns self."""
 427         self._checkClosed()
 428         return self
 429
 430     def __exit__(self, *args):
 431         """Context management protocol.  Calls close()"""
 432         self.close()
 433
 434     ### Lower-level APIs ###
 435
 436     # XXX Should these be present even if unimplemented?
 437
 438     def fileno(self):
 439         """Returns underlying file descriptor if one exists.
 440
 441         An IOError is raised if the IO object does not use a file descriptor.
 442         """
 443         self._unsupported("fileno")
 444
 445     def isatty(self):
 446         """Return whether this is an 'interactive' stream.
 447
 448         Return False if it can't be determined.
 449         """
 450         self._checkClosed()
 451         return False
 452
 453     ### Readline[s] and writelines ###
 454
 455     def readline(self, limit=-1):
 456         r"""Read and return a line from the stream.
 457
 458         If limit is specified, at most limit bytes will be read.
 459
 460         The line terminator is always b'\n' for binary files; for text
 461         files, the newlines argument to open can be used to select the line
 462         terminator(s) recognized.
 463         """
 464         # For backwards compatibility, a (slowish) readline().
 465         if hasattr(self, "peek"):
 466             def nreadahead():
 467                 readahead = self.peek(1)
 468                 if not readahead:
 469                     return 1
 470                 n = (readahead.find(b"\n") + 1) or len(readahead)
 471                 if limit >= 0:
 472                     n = min(n, limit)
 473                 return n
 474         else:
 475             def nreadahead():
 476                 return 1
 477         if limit is None:
 478             limit = -1
 479         elif not isinstance(limit, (int, long)):
 480             raise TypeError("limit must be an integer")
 481         res = bytearray()
 482         while limit < 0 or len(res) < limit:
 483             b = self.read(nreadahead())
 484             if not b:
 485                 break
 486             res += b
 487             if res.endswith(b"\n"):
 488                 break
 489         return bytes(res)
 490
 491     def __iter__(self):
 492         self._checkClosed()
 493         return self
 494
 495     def next(self):
 496         line = self.readline()
 497         if not line:
 498             raise StopIteration
 499         return line
 500
 501     def readlines(self, hint=None):
 502         """Return a list of lines from the stream.
 503
 504         hint can be specified to control the number of lines read: no more
 505         lines will be read if the total size (in bytes/characters) of all
 506         lines so far exceeds hint.
 507         """
 508         if hint is not None and not isinstance(hint, (int, long)):
 509             raise TypeError("integer or None expected")
 510         if hint is None or hint <= 0:
 511             return list(self)
 512         n = 0
 513         lines = []
 514         for line in self:
 515             lines.append(line)
 516             n += len(line)
 517             if n >= hint:
 518                 break
 519         return lines
 520
 521     def writelines(self, lines):
 522         self._checkClosed()
 523         for line in lines:
 524             self.write(line)
 525
 526 io.IOBase.register(IOBase)
 527
 528
 529 class RawIOBase(IOBase):
 530
 531     """Base class for raw binary I/O."""
 532
 533     # The read() method is implemented by calling readinto(); derived
 534     # classes that want to support read() only need to implement
 535     # readinto() as a primitive operation.  In general, readinto() can be
 536     # more efficient than read().
 537
 538     # (It would be tempting to also provide an implementation of
 539     # readinto() in terms of read(), in case the latter is a more suitable
 540     # primitive operation, but that would lead to nasty recursion in case
 541     # a subclass doesn't implement either.)
 542
 543     def read(self, n=-1):
 544         """Read and return up to n bytes.
 545
 546         Returns an empty bytes object on EOF, or None if the object is
 547         set not to block and has no data to read.
 548         """
 549         if n is None:
 550             n = -1
 551         if n < 0:
 552             return self.readall()
 553         b = bytearray(n.__index__())
 554         n = self.readinto(b)
 555         del b[n:]
 556         return bytes(b)
 557
 558     def readall(self):
 559         """Read until EOF, using multiple read() call."""
 560         res = bytearray()
 561         while True:
 562             data = self.read(DEFAULT_BUFFER_SIZE)
 563             if not data:
 564                 break
 565             res += data
 566         return bytes(res)
 567
 568     def readinto(self, b):
 569         """Read up to len(b) bytes into b.
 570
 571         Returns number of bytes read (0 for EOF), or None if the object
 572         is set not to block as has no data to read.
 573         """
 574         self._unsupported("readinto")
 575
 576     def write(self, b):
 577         """Write the given buffer to the IO stream.
 578
 579         Returns the number of bytes written, which may be less than len(b).
 580         """
 581         self._unsupported("write")
 582
 583 io.RawIOBase.register(RawIOBase)
 584 from _io import FileIO
 585 RawIOBase.register(FileIO)
 586
 587
 588 class BufferedIOBase(IOBase):
 589
 590     """Base class for buffered IO objects.
 591
 592     The main difference with RawIOBase is that the read() method
 593     supports omitting the size argument, and does not have a default
 594     implementation that defers to readinto().
 595
 596     In addition, read(), readinto() and write() may raise
 597     BlockingIOError if the underlying raw stream is in non-blocking
 598     mode and not ready; unlike their raw counterparts, they will never
 599     return None.
 600
 601     A typical implementation should not inherit from a RawIOBase
 602     implementation, but wrap one.
 603     """
 604
 605     def read(self, n=None):
 606         """Read and return up to n bytes.
 607
 608         If the argument is omitted, None, or negative, reads and
 609         returns all data until EOF.
 610
 611         If the argument is positive, and the underlying raw stream is
 612         not 'interactive', multiple raw reads may be issued to satisfy
 613         the byte count (unless EOF is reached first).  But for
 614         interactive raw streams (XXX and for pipes?), at most one raw
 615         read will be issued, and a short result does not imply that
 616         EOF is imminent.
 617
 618         Returns an empty bytes array on EOF.
 619
 620         Raises BlockingIOError if the underlying raw stream has no
 621         data at the moment.
 622         """
 623         self._unsupported("read")
 624
 625     def read1(self, n=None):
 626         """Read up to n bytes with at most one read() system call."""
 627         self._unsupported("read1")
 628
 629     def readinto(self, b):
 630         """Read up to len(b) bytes into b.
 631
 632         Like read(), this may issue multiple reads to the underlying raw
 633         stream, unless the latter is 'interactive'.
 634
 635         Returns the number of bytes read (0 for EOF).
 636
 637         Raises BlockingIOError if the underlying raw stream has no
 638         data at the moment.
 639         """
 640         # XXX This ought to work with anything that supports the buffer API
 641         data = self.read(len(b))
 642         n = len(data)
 643         try:
 644             b[:n] = data
 645         except TypeError as err:
 646             import array
 647             if not isinstance(b, array.array):
 648                 raise err
 649             b[:n] = array.array(b'b', data)
 650         return n
 651
 652     def write(self, b):
 653         """Write the given buffer to the IO stream.
 654
 655         Return the number of bytes written, which is never less than
 656         len(b).
 657
 658         Raises BlockingIOError if the buffer is full and the
 659         underlying raw stream cannot accept more data at the moment.
 660         """
 661         self._unsupported("write")
 662
 663     def detach(self):
 664         """
 665         Separate the underlying raw stream from the buffer and return it.
 666
 667         After the raw stream has been detached, the buffer is in an unusable
 668         state.
 669         """
 670         self._unsupported("detach")
 671
 672 io.BufferedIOBase.register(BufferedIOBase)
 673
 674
 675 class _BufferedIOMixin(BufferedIOBase):
 676
 677     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 678
 679     This passes most requests on to the underlying raw stream.  It
 680     does *not* provide implementations of read(), readinto() or
 681     write().
 682     """
 683
 684     def __init__(self, raw):
 685         self.raw = raw
 686
 687     ### Positioning ###
 688
 689     def seek(self, pos, whence=0):
 690         new_position = self.raw.seek(pos, whence)
 691         if new_position < 0:
 692             raise IOError("seek() returned an invalid position")
 693         return new_position
 694
 695     def tell(self):
 696         pos = self.raw.tell()
 697         if pos < 0:
 698             raise IOError("tell() returned an invalid position")
 699         return pos
 700
 701     def truncate(self, pos=None):
 702         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 703         # and a flush may be necessary to synch both views of the current
 704         # file state.
 705         self.flush()
 706
 707         if pos is None:
 708             pos = self.tell()
 709         # XXX: Should seek() be used, instead of passing the position
 710         # XXX  directly to truncate?
 711         return self.raw.truncate(pos)
 712
 713     ### Flush and close ###
 714
 715     def flush(self):
 716         self.raw.flush()
 717
 718     def close(self):
 719         if not self.closed and self.raw is not None:
 720             try:
 721                 self.flush()
 722             except IOError:
 723                 pass  # If flush() fails, just give up
 724             self.raw.close()
 725
 726     def detach(self):
 727         if self.raw is None:
 728             raise ValueError("raw stream already detached")
 729         self.flush()
 730         raw = self.raw
 731         self.raw = None
 732         return raw
 733
 734     ### Inquiries ###
 735
 736     def seekable(self):
 737         return self.raw.seekable()
 738
 739     def readable(self):
 740         return self.raw.readable()
 741
 742     def writable(self):
 743         return self.raw.writable()
 744
 745     @property
 746     def closed(self):
 747         return self.raw.closed
 748
 749     @property
 750     def name(self):
 751         return self.raw.name
 752
 753     @property
 754     def mode(self):
 755         return self.raw.mode
 756
 757     def __repr__(self):
 758         clsname = self.__class__.__name__
 759         try:
 760             name = self.name
 761         except AttributeError:
 762             return "<_pyio.{0}>".format(clsname)
 763         else:
 764             return "<_pyio.{0} name={1!r}>".format(clsname, name)
 765
 766     ### Lower-level APIs ###
 767
 768     def fileno(self):
 769         return self.raw.fileno()
 770
 771     def isatty(self):
 772         return self.raw.isatty()
 773
 774
 775 class BytesIO(BufferedIOBase):
 776
 777     """Buffered I/O implementation using an in-memory bytes buffer."""
 778
 779     def __init__(self, initial_bytes=None):
 780         buf = bytearray()
 781         if initial_bytes is not None:
 782             buf.extend(initial_bytes)
 783         self._buffer = buf
 784         self._pos = 0
 785
 786     def __getstate__(self):
 787         if self.closed:
 788             raise ValueError("__getstate__ on closed file")
 789         return self.__dict__.copy()
 790
 791     def getvalue(self):
 792         """Return the bytes value (contents) of the buffer
 793         """
 794         if self.closed:
 795             raise ValueError("getvalue on closed file")
 796         return bytes(self._buffer)
 797
 798     def read(self, n=None):
 799         if self.closed:
 800             raise ValueError("read from closed file")
 801         if n is None:
 802             n = -1
 803         if not isinstance(n, (int, long)):
 804             raise TypeError("integer argument expected, got {0!r}".format(
 805                 type(n)))
 806         if n < 0:
 807             n = len(self._buffer)
 808         if len(self._buffer) <= self._pos:
 809             return b""
 810         newpos = min(len(self._buffer), self._pos + n)
 811         b = self._buffer[self._pos : newpos]
 812         self._pos = newpos
 813         return bytes(b)
 814
 815     def read1(self, n):
 816         """This is the same as read.
 817         """
 818         return self.read(n)
 819
 820     def write(self, b):
 821         if self.closed:
 822             raise ValueError("write to closed file")
 823         if isinstance(b, unicode):
 824             raise TypeError("can't write unicode to binary stream")
 825         n = len(b)
 826         if n == 0:
 827             return 0
 828         pos = self._pos
 829         if pos > len(self._buffer):
 830             # Inserts null bytes between the current end of the file
 831             # and the new write position.
 832             padding = b'\x00' * (pos - len(self._buffer))
 833             self._buffer += padding
 834         self._buffer[pos:pos + n] = b
 835         self._pos += n
 836         return n
 837
 838     def seek(self, pos, whence=0):
 839         if self.closed:
 840             raise ValueError("seek on closed file")
 841         try:
 842             pos = pos.__index__()
 843         except AttributeError as err:
 844             raise TypeError("an integer is required")
 845         if whence == 0:
 846             if pos < 0:
 847                 raise ValueError("negative seek position %r" % (pos,))
 848             self._pos = pos
 849         elif whence == 1:
 850             self._pos = max(0, self._pos + pos)
 851         elif whence == 2:
 852             self._pos = max(0, len(self._buffer) + pos)
 853         else:
 854             raise ValueError("invalid whence value")
 855         return self._pos
 856
 857     def tell(self):
 858         if self.closed:
 859             raise ValueError("tell on closed file")
 860         return self._pos
 861
 862     def truncate(self, pos=None):
 863         if self.closed:
 864             raise ValueError("truncate on closed file")
 865         if pos is None:
 866             pos = self._pos
 867         elif pos < 0:
 868             raise ValueError("negative truncate position %r" % (pos,))
 869         del self._buffer[pos:]
 870         return self.seek(pos)
 871
 872     def readable(self):
 873         return True
 874
 875     def writable(self):
 876         return True
 877
 878     def seekable(self):
 879         return True
 880
 881
 882 class BufferedReader(_BufferedIOMixin):
 883
 884     """BufferedReader(raw[, buffer_size])
 885
 886     A buffer for a readable, sequential BaseRawIO object.
 887
 888     The constructor creates a BufferedReader for the given readable raw
 889     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 890     is used.
 891     """
 892
 893     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 894         """Create a new buffered reader using the given readable raw IO object.
 895         """
 896         if not raw.readable():
 897             raise IOError('"raw" argument must be readable.')
 898
 899         _BufferedIOMixin.__init__(self, raw)
 900         if buffer_size <= 0:
 901             raise ValueError("invalid buffer size")
 902         self.buffer_size = buffer_size
 903         self._reset_read_buf()
 904         self._read_lock = Lock()
 905
 906     def _reset_read_buf(self):
 907         self._read_buf = b""
 908         self._read_pos = 0
 909
 910     def read(self, n=None):
 911         """Read n bytes.
 912
 913         Returns exactly n bytes of data unless the underlying raw IO
 914         stream reaches EOF or if the call would block in non-blocking
 915         mode. If n is negative, read until EOF or until read() would
 916         block.
 917         """
 918         if n is not None and n < -1:
 919             raise ValueError("invalid number of bytes to read")
 920         with self._read_lock:
 921             return self._read_unlocked(n)
 922
 923     def _read_unlocked(self, n=None):
 924         nodata_val = b""
 925         empty_values = (b"", None)
 926         buf = self._read_buf
 927         pos = self._read_pos
 928
 929         # Special case for when the number of bytes to read is unspecified.
 930         if n is None or n == -1:
 931             self._reset_read_buf()
 932             chunks = [buf[pos:]]  # Strip the consumed bytes.
 933             current_size = 0
 934             while True:
 935                 # Read until EOF or until read() would block.
 936                 chunk = self.raw.read()
 937                 if chunk in empty_values:
 938                     nodata_val = chunk
 939                     break
 940                 current_size += len(chunk)
 941                 chunks.append(chunk)
 942             return b"".join(chunks) or nodata_val
 943
 944         # The number of bytes to read is specified, return at most n bytes.
 945         avail = len(buf) - pos  # Length of the available buffered data.
 946         if n <= avail:
 947             # Fast path: the data to read is fully buffered.
 948             self._read_pos += n
 949             return buf[pos:pos+n]
 950         # Slow path: read from the stream until enough bytes are read,
 951         # or until an EOF occurs or until read() would block.
 952         chunks = [buf[pos:]]
 953         wanted = max(self.buffer_size, n)
 954         while avail < n:
 955             chunk = self.raw.read(wanted)
 956             if chunk in empty_values:
 957                 nodata_val = chunk
 958                 break
 959             avail += len(chunk)
 960             chunks.append(chunk)
 961         # n is more then avail only when an EOF occurred or when
 962         # read() would have blocked.
 963         n = min(n, avail)
 964         out = b"".join(chunks)
 965         self._read_buf = out[n:]  # Save the extra data in the buffer.
 966         self._read_pos = 0
 967         return out[:n] if out else nodata_val
 968
 969     def peek(self, n=0):
 970         """Returns buffered bytes without advancing the position.
 971
 972         The argument indicates a desired minimal number of bytes; we
 973         do at most one raw read to satisfy it.  We never return more
 974         than self.buffer_size.
 975         """
 976         with self._read_lock:
 977             return self._peek_unlocked(n)
 978
 979     def _peek_unlocked(self, n=0):
 980         want = min(n, self.buffer_size)
 981         have = len(self._read_buf) - self._read_pos
 982         if have < want or have <= 0:
 983             to_read = self.buffer_size - have
 984             current = self.raw.read(to_read)
 985             if current:
 986                 self._read_buf = self._read_buf[self._read_pos:] + current
 987                 self._read_pos = 0
 988         return self._read_buf[self._read_pos:]
 989
 990     def read1(self, n):
 991         """Reads up to n bytes, with at most one read() system call."""
 992         # Returns up to n bytes.  If at least one byte is buffered, we
 993         # only return buffered bytes.  Otherwise, we do one raw read.
 994         if n < 0:
 995             raise ValueError("number of bytes to read must be positive")
 996         if n == 0:
 997             return b""
 998         with self._read_lock:
 999             self._peek_unlocked(1)
1000             return self._read_unlocked(
1001                 min(n, len(self._read_buf) - self._read_pos))
1002
1003     def tell(self):
1004         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1005
1006     def seek(self, pos, whence=0):
1007         if not (0 <= whence <= 2):
1008             raise ValueError("invalid whence value")
1009         with self._read_lock:
1010             if whence == 1:
1011                 pos -= len(self._read_buf) - self._read_pos
1012             pos = _BufferedIOMixin.seek(self, pos, whence)
1013             self._reset_read_buf()
1014             return pos
1015
1016 class BufferedWriter(_BufferedIOMixin):
1017
1018     """A buffer for a writeable sequential RawIO object.
1019
1020     The constructor creates a BufferedWriter for the given writeable raw
1021     stream. If the buffer_size is not given, it defaults to
1022     DEFAULT_BUFFER_SIZE.
1023     """
1024
1025     _warning_stack_offset = 2
1026
1027     def __init__(self, raw,
1028                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1029         if not raw.writable():
1030             raise IOError('"raw" argument must be writable.')
1031
1032         _BufferedIOMixin.__init__(self, raw)
1033         if buffer_size <= 0:
1034             raise ValueError("invalid buffer size")
1035         if max_buffer_size is not None:
1036             warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1037                           self._warning_stack_offset)
1038         self.buffer_size = buffer_size
1039         self._write_buf = bytearray()
1040         self._write_lock = Lock()
1041
1042     def write(self, b):
1043         if self.closed:
1044             raise ValueError("write to closed file")
1045         if isinstance(b, unicode):
1046             raise TypeError("can't write unicode to binary stream")
1047         with self._write_lock:
1048             # XXX we can implement some more tricks to try and avoid
1049             # partial writes
1050             if len(self._write_buf) > self.buffer_size:
1051                 # We're full, so let's pre-flush the buffer
1052                 try:
1053                     self._flush_unlocked()
1054                 except BlockingIOError as e:
1055                     # We can't accept anything else.
1056                     # XXX Why not just let the exception pass through?
1057                     raise BlockingIOError(e.errno, e.strerror, 0)
1058             before = len(self._write_buf)
1059             self._write_buf.extend(b)
1060             written = len(self._write_buf) - before
1061             if len(self._write_buf) > self.buffer_size:
1062                 try:
1063                     self._flush_unlocked()
1064                 except BlockingIOError as e:
1065                     if len(self._write_buf) > self.buffer_size:
1066                         # We've hit the buffer_size. We have to accept a partial
1067                         # write and cut back our buffer.
1068                         overage = len(self._write_buf) - self.buffer_size
1069                         written -= overage
1070                         self._write_buf = self._write_buf[:self.buffer_size]
1071                         raise BlockingIOError(e.errno, e.strerror, written)
1072             return written
1073
1074     def truncate(self, pos=None):
1075         with self._write_lock:
1076             self._flush_unlocked()
1077             if pos is None:
1078                 pos = self.raw.tell()
1079             return self.raw.truncate(pos)
1080
1081     def flush(self):
1082         with self._write_lock:
1083             self._flush_unlocked()
1084
1085     def _flush_unlocked(self):
1086         if self.closed:
1087             raise ValueError("flush of closed file")
1088         written = 0
1089         try:
1090             while self._write_buf:
1091                 n = self.raw.write(self._write_buf)
1092                 if n > len(self._write_buf) or n < 0:
1093                     raise IOError("write() returned incorrect number of bytes")
1094                 del self._write_buf[:n]
1095                 written += n
1096         except BlockingIOError as e:
1097             n = e.characters_written
1098             del self._write_buf[:n]
1099             written += n
1100             raise BlockingIOError(e.errno, e.strerror, written)
1101
1102     def tell(self):
1103         return _BufferedIOMixin.tell(self) + len(self._write_buf)
1104
1105     def seek(self, pos, whence=0):
1106         if not (0 <= whence <= 2):
1107             raise ValueError("invalid whence")
1108         with self._write_lock:
1109             self._flush_unlocked()
1110             return _BufferedIOMixin.seek(self, pos, whence)
1111
1112
1113 class BufferedRWPair(BufferedIOBase):
1114
1115     """A buffered reader and writer object together.
1116
1117     A buffered reader object and buffered writer object put together to
1118     form a sequential IO object that can read and write. This is typically
1119     used with a socket or two-way pipe.
1120
1121     reader and writer are RawIOBase objects that are readable and
1122     writeable respectively. If the buffer_size is omitted it defaults to
1123     DEFAULT_BUFFER_SIZE.
1124     """
1125
1126     # XXX The usefulness of this (compared to having two separate IO
1127     # objects) is questionable.
1128
1129     def __init__(self, reader, writer,
1130                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1131         """Constructor.
1132
1133         The arguments are two RawIO instances.
1134         """
1135         if max_buffer_size is not None:
1136             warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1137
1138         if not reader.readable():
1139             raise IOError('"reader" argument must be readable.')
1140
1141         if not writer.writable():
1142             raise IOError('"writer" argument must be writable.')
1143
1144         self.reader = BufferedReader(reader, buffer_size)
1145         self.writer = BufferedWriter(writer, buffer_size)
1146
1147     def read(self, n=None):
1148         if n is None:
1149             n = -1
1150         return self.reader.read(n)
1151
1152     def readinto(self, b):
1153         return self.reader.readinto(b)
1154
1155     def write(self, b):
1156         return self.writer.write(b)
1157
1158     def peek(self, n=0):
1159         return self.reader.peek(n)
1160
1161     def read1(self, n):
1162         return self.reader.read1(n)
1163
1164     def readable(self):
1165         return self.reader.readable()
1166
1167     def writable(self):
1168         return self.writer.writable()
1169
1170     def flush(self):
1171         return self.writer.flush()
1172
1173     def close(self):
1174         self.writer.close()
1175         self.reader.close()
1176
1177     def isatty(self):
1178         return self.reader.isatty() or self.writer.isatty()
1179
1180     @property
1181     def closed(self):
1182         return self.writer.closed
1183
1184
1185 class BufferedRandom(BufferedWriter, BufferedReader):
1186
1187     """A buffered interface to random access streams.
1188
1189     The constructor creates a reader and writer for a seekable stream,
1190     raw, given in the first argument. If the buffer_size is omitted it
1191     defaults to DEFAULT_BUFFER_SIZE.
1192     """
1193
1194     _warning_stack_offset = 3
1195
1196     def __init__(self, raw,
1197                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1198         raw._checkSeekable()
1199         BufferedReader.__init__(self, raw, buffer_size)
1200         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1201
1202     def seek(self, pos, whence=0):
1203         if not (0 <= whence <= 2):
1204             raise ValueError("invalid whence")
1205         self.flush()
1206         if self._read_buf:
1207             # Undo read ahead.
1208             with self._read_lock:
1209                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1210         # First do the raw seek, then empty the read buffer, so that
1211         # if the raw seek fails, we don't lose buffered data forever.
1212         pos = self.raw.seek(pos, whence)
1213         with self._read_lock:
1214             self._reset_read_buf()
1215         if pos < 0:
1216             raise IOError("seek() returned invalid position")
1217         return pos
1218
1219     def tell(self):
1220         if self._write_buf:
1221             return BufferedWriter.tell(self)
1222         else:
1223             return BufferedReader.tell(self)
1224
1225     def truncate(self, pos=None):
1226         if pos is None:
1227             pos = self.tell()
1228         # Use seek to flush the read buffer.
1229         self.seek(pos)
1230         return BufferedWriter.truncate(self)
1231
1232     def read(self, n=None):
1233         if n is None:
1234             n = -1
1235         self.flush()
1236         return BufferedReader.read(self, n)
1237
1238     def readinto(self, b):
1239         self.flush()
1240         return BufferedReader.readinto(self, b)
1241
1242     def peek(self, n=0):
1243         self.flush()
1244         return BufferedReader.peek(self, n)
1245
1246     def read1(self, n):
1247         self.flush()
1248         return BufferedReader.read1(self, n)
1249
1250     def write(self, b):
1251         if self._read_buf:
1252             # Undo readahead
1253             with self._read_lock:
1254                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1255                 self._reset_read_buf()
1256         return BufferedWriter.write(self, b)
1257
1258
1259 class TextIOBase(IOBase):
1260
1261     """Base class for text I/O.
1262
1263     This class provides a character and line based interface to stream
1264     I/O. There is no readinto method because Python's character strings
1265     are immutable. There is no public constructor.
1266     """
1267
1268     def read(self, n=-1):
1269         """Read at most n characters from stream.
1270
1271         Read from underlying buffer until we have n characters or we hit EOF.
1272         If n is negative or omitted, read until EOF.
1273         """
1274         self._unsupported("read")
1275
1276     def write(self, s):
1277         """Write string s to stream."""
1278         self._unsupported("write")
1279
1280     def truncate(self, pos=None):
1281         """Truncate size to pos."""
1282         self._unsupported("truncate")
1283
1284     def readline(self):
1285         """Read until newline or EOF.
1286
1287         Returns an empty string if EOF is hit immediately.
1288         """
1289         self._unsupported("readline")
1290
1291     def detach(self):
1292         """
1293         Separate the underlying buffer from the TextIOBase and return it.
1294
1295         After the underlying buffer has been detached, the TextIO is in an
1296         unusable state.
1297         """
1298         self._unsupported("detach")
1299
1300     @property
1301     def encoding(self):
1302         """Subclasses should override."""
1303         return None
1304
1305     @property
1306     def newlines(self):
1307         """Line endings translated so far.
1308
1309         Only line endings translated during reading are considered.
1310
1311         Subclasses should override.
1312         """
1313         return None
1314
1315     @property
1316     def errors(self):
1317         """Error setting of the decoder or encoder.
1318
1319         Subclasses should override."""
1320         return None
1321
1322 io.TextIOBase.register(TextIOBase)
1323
1324
1325 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1326     r"""Codec used when reading a file in universal newlines mode.  It wraps
1327     another incremental decoder, translating \r\n and \r into \n.  It also
1328     records the types of newlines encountered.  When used with
1329     translate=False, it ensures that the newline sequence is returned in
1330     one piece.
1331     """
1332     def __init__(self, decoder, translate, errors='strict'):
1333         codecs.IncrementalDecoder.__init__(self, errors=errors)
1334         self.translate = translate
1335         self.decoder = decoder
1336         self.seennl = 0
1337         self.pendingcr = False
1338
1339     def decode(self, input, final=False):
1340         # decode input (with the eventual \r from a previous pass)
1341         if self.decoder is None:
1342             output = input
1343         else:
1344             output = self.decoder.decode(input, final=final)
1345         if self.pendingcr and (output or final):
1346             output = "\r" + output
1347             self.pendingcr = False
1348
1349         # retain last \r even when not translating data:
1350         # then readline() is sure to get \r\n in one pass
1351         if output.endswith("\r") and not final:
1352             output = output[:-1]
1353             self.pendingcr = True
1354
1355         # Record which newlines are read
1356         crlf = output.count('\r\n')
1357         cr = output.count('\r') - crlf
1358         lf = output.count('\n') - crlf
1359         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1360                     | (crlf and self._CRLF)
1361
1362         if self.translate:
1363             if crlf:
1364                 output = output.replace("\r\n", "\n")
1365             if cr:
1366                 output = output.replace("\r", "\n")
1367
1368         return output
1369
1370     def getstate(self):
1371         if self.decoder is None:
1372             buf = b""
1373             flag = 0
1374         else:
1375             buf, flag = self.decoder.getstate()
1376         flag <<= 1
1377         if self.pendingcr:
1378             flag |= 1
1379         return buf, flag
1380
1381     def setstate(self, state):
1382         buf, flag = state
1383         self.pendingcr = bool(flag & 1)
1384         if self.decoder is not None:
1385             self.decoder.setstate((buf, flag >> 1))
1386
1387     def reset(self):
1388         self.seennl = 0
1389         self.pendingcr = False
1390         if self.decoder is not None:
1391             self.decoder.reset()
1392
1393     _LF = 1
1394     _CR = 2
1395     _CRLF = 4
1396
1397     @property
1398     def newlines(self):
1399         return (None,
1400                 "\n",
1401                 "\r",
1402                 ("\r", "\n"),
1403                 "\r\n",
1404                 ("\n", "\r\n"),
1405                 ("\r", "\r\n"),
1406                 ("\r", "\n", "\r\n")
1407                )[self.seennl]
1408
1409
1410 class TextIOWrapper(TextIOBase):
1411
1412     r"""Character and line based layer over a BufferedIOBase object, buffer.
1413
1414     encoding gives the name of the encoding that the stream will be
1415     decoded or encoded with. It defaults to locale.getpreferredencoding.
1416
1417     errors determines the strictness of encoding and decoding (see the
1418     codecs.register) and defaults to "strict".
1419
1420     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1421     handling of line endings. If it is None, universal newlines is
1422     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1423     or '\r\n' are translated to '\n' before being returned to the
1424     caller. Conversely, on output, '\n' is translated to the system
1425     default line seperator, os.linesep. If newline is any other of its
1426     legal values, that newline becomes the newline when the file is read
1427     and it is returned untranslated. On output, '\n' is converted to the
1428     newline.
1429
1430     If line_buffering is True, a call to flush is implied when a call to
1431     write contains a newline character.
1432     """
1433
1434     _CHUNK_SIZE = 2048
1435
1436     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1437                  line_buffering=False):
1438         if newline is not None and not isinstance(newline, basestring):
1439             raise TypeError("illegal newline type: %r" % (type(newline),))
1440         if newline not in (None, "", "\n", "\r", "\r\n"):
1441             raise ValueError("illegal newline value: %r" % (newline,))
1442         if encoding is None:
1443             try:
1444                 encoding = os.device_encoding(buffer.fileno())
1445             except (AttributeError, UnsupportedOperation):
1446                 pass
1447             if encoding is None:
1448                 try:
1449                     import locale
1450                 except ImportError:
1451                     # Importing locale may fail if Python is being built
1452                     encoding = "ascii"
1453                 else:
1454                     encoding = locale.getpreferredencoding()
1455
1456         if not isinstance(encoding, basestring):
1457             raise ValueError("invalid encoding: %r" % encoding)
1458
1459         if errors is None:
1460             errors = "strict"
1461         else:
1462             if not isinstance(errors, basestring):
1463                 raise ValueError("invalid errors: %r" % errors)
1464
1465         self.buffer = buffer
1466         self._line_buffering = line_buffering
1467         self._encoding = encoding
1468         self._errors = errors
1469         self._readuniversal = not newline
1470         self._readtranslate = newline is None
1471         self._readnl = newline
1472         self._writetranslate = newline != ''
1473         self._writenl = newline or os.linesep
1474         self._encoder = None
1475         self._decoder = None
1476         self._decoded_chars = ''  # buffer for text returned from decoder
1477         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1478         self._snapshot = None  # info for reconstructing decoder state
1479         self._seekable = self._telling = self.buffer.seekable()
1480
1481         if self._seekable and self.writable():
1482             position = self.buffer.tell()
1483             if position != 0:
1484                 try:
1485                     self._get_encoder().setstate(0)
1486                 except LookupError:
1487                     # Sometimes the encoder doesn't exist
1488                     pass
1489
1490     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1491     # where dec_flags is the second (integer) item of the decoder state
1492     # and next_input is the chunk of input bytes that comes next after the
1493     # snapshot point.  We use this to reconstruct decoder states in tell().
1494
1495     # Naming convention:
1496     #   - "bytes_..." for integer variables that count input bytes
1497     #   - "chars_..." for integer variables that count decoded characters
1498
1499     def __repr__(self):
1500         try:
1501             name = self.name
1502         except AttributeError:
1503             return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1504         else:
1505             return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1506                 name, self.encoding)
1507
1508     @property
1509     def encoding(self):
1510         return self._encoding
1511
1512     @property
1513     def errors(self):
1514         return self._errors
1515
1516     @property
1517     def line_buffering(self):
1518         return self._line_buffering
1519
1520     def seekable(self):
1521         return self._seekable
1522
1523     def readable(self):
1524         return self.buffer.readable()
1525
1526     def writable(self):
1527         return self.buffer.writable()
1528
1529     def flush(self):
1530         self.buffer.flush()
1531         self._telling = self._seekable
1532
1533     def close(self):
1534         if self.buffer is not None:
1535             try:
1536                 self.flush()
1537             except IOError:
1538                 pass  # If flush() fails, just give up
1539             self.buffer.close()
1540
1541     @property
1542     def closed(self):
1543         return self.buffer.closed
1544
1545     @property
1546     def name(self):
1547         return self.buffer.name
1548
1549     def fileno(self):
1550         return self.buffer.fileno()
1551
1552     def isatty(self):
1553         return self.buffer.isatty()
1554
1555     def write(self, s):
1556         if self.closed:
1557             raise ValueError("write to closed file")
1558         if not isinstance(s, unicode):
1559             raise TypeError("can't write %s to text stream" %
1560                             s.__class__.__name__)
1561         length = len(s)
1562         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1563         if haslf and self._writetranslate and self._writenl != "\n":
1564             s = s.replace("\n", self._writenl)
1565         encoder = self._encoder or self._get_encoder()
1566         # XXX What if we were just reading?
1567         b = encoder.encode(s)
1568         self.buffer.write(b)
1569         if self._line_buffering and (haslf or "\r" in s):
1570             self.flush()
1571         self._snapshot = None
1572         if self._decoder:
1573             self._decoder.reset()
1574         return length
1575
1576     def _get_encoder(self):
1577         make_encoder = codecs.getincrementalencoder(self._encoding)
1578         self._encoder = make_encoder(self._errors)
1579         return self._encoder
1580
1581     def _get_decoder(self):
1582         make_decoder = codecs.getincrementaldecoder(self._encoding)
1583         decoder = make_decoder(self._errors)
1584         if self._readuniversal:
1585             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1586         self._decoder = decoder
1587         return decoder
1588
1589     # The following three methods implement an ADT for _decoded_chars.
1590     # Text returned from the decoder is buffered here until the client
1591     # requests it by calling our read() or readline() method.
1592     def _set_decoded_chars(self, chars):
1593         """Set the _decoded_chars buffer."""
1594         self._decoded_chars = chars
1595         self._decoded_chars_used = 0
1596
1597     def _get_decoded_chars(self, n=None):
1598         """Advance into the _decoded_chars buffer."""
1599         offset = self._decoded_chars_used
1600         if n is None:
1601             chars = self._decoded_chars[offset:]
1602         else:
1603             chars = self._decoded_chars[offset:offset + n]
1604         self._decoded_chars_used += len(chars)
1605         return chars
1606
1607     def _rewind_decoded_chars(self, n):
1608         """Rewind the _decoded_chars buffer."""
1609         if self._decoded_chars_used < n:
1610             raise AssertionError("rewind decoded_chars out of bounds")
1611         self._decoded_chars_used -= n
1612
1613     def _read_chunk(self):
1614         """
1615         Read and decode the next chunk of data from the BufferedReader.
1616         """
1617
1618         # The return value is True unless EOF was reached.  The decoded
1619         # string is placed in self._decoded_chars (replacing its previous
1620         # value).  The entire input chunk is sent to the decoder, though
1621         # some of it may remain buffered in the decoder, yet to be
1622         # converted.
1623
1624         if self._decoder is None:
1625             raise ValueError("no decoder")
1626
1627         if self._telling:
1628             # To prepare for tell(), we need to snapshot a point in the
1629             # file where the decoder's input buffer is empty.
1630
1631             dec_buffer, dec_flags = self._decoder.getstate()
1632             # Given this, we know there was a valid snapshot point
1633             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1634
1635         # Read a chunk, decode it, and put the result in self._decoded_chars.
1636         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1637         eof = not input_chunk
1638         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1639
1640         if self._telling:
1641             # At the snapshot point, len(dec_buffer) bytes before the read,
1642             # the next input to be decoded is dec_buffer + input_chunk.
1643             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1644
1645         return not eof
1646
1647     def _pack_cookie(self, position, dec_flags=0,
1648                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1649         # The meaning of a tell() cookie is: seek to position, set the
1650         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1651         # into the decoder with need_eof as the EOF flag, then skip
1652         # chars_to_skip characters of the decoded result.  For most simple
1653         # decoders, tell() will often just give a byte offset in the file.
1654         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1655                (chars_to_skip<<192) | bool(need_eof)<<256)
1656
1657     def _unpack_cookie(self, bigint):
1658         rest, position = divmod(bigint, 1<<64)
1659         rest, dec_flags = divmod(rest, 1<<64)
1660         rest, bytes_to_feed = divmod(rest, 1<<64)
1661         need_eof, chars_to_skip = divmod(rest, 1<<64)
1662         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1663
1664     def tell(self):
1665         if not self._seekable:
1666             raise IOError("underlying stream is not seekable")
1667         if not self._telling:
1668             raise IOError("telling position disabled by next() call")
1669         self.flush()
1670         position = self.buffer.tell()
1671         decoder = self._decoder
1672         if decoder is None or self._snapshot is None:
1673             if self._decoded_chars:
1674                 # This should never happen.
1675                 raise AssertionError("pending decoded text")
1676             return position
1677
1678         # Skip backward to the snapshot point (see _read_chunk).
1679         dec_flags, next_input = self._snapshot
1680         position -= len(next_input)
1681
1682         # How many decoded characters have been used up since the snapshot?
1683         chars_to_skip = self._decoded_chars_used
1684         if chars_to_skip == 0:
1685             # We haven't moved from the snapshot point.
1686             return self._pack_cookie(position, dec_flags)
1687
1688         # Starting from the snapshot position, we will walk the decoder
1689         # forward until it gives us enough decoded characters.
1690         saved_state = decoder.getstate()
1691         try:
1692             # Note our initial start point.
1693             decoder.setstate((b'', dec_flags))
1694             start_pos = position
1695             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1696             need_eof = 0
1697
1698             # Feed the decoder one byte at a time.  As we go, note the
1699             # nearest "safe start point" before the current location
1700             # (a point where the decoder has nothing buffered, so seek()
1701             # can safely start from there and advance to this location).
1702             for next_byte in next_input:
1703                 bytes_fed += 1
1704                 chars_decoded += len(decoder.decode(next_byte))
1705                 dec_buffer, dec_flags = decoder.getstate()
1706                 if not dec_buffer and chars_decoded <= chars_to_skip:
1707                     # Decoder buffer is empty, so this is a safe start point.
1708                     start_pos += bytes_fed
1709                     chars_to_skip -= chars_decoded
1710                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1711                 if chars_decoded >= chars_to_skip:
1712                     break
1713             else:
1714                 # We didn't get enough decoded data; signal EOF to get more.
1715                 chars_decoded += len(decoder.decode(b'', final=True))
1716                 need_eof = 1
1717                 if chars_decoded < chars_to_skip:
1718                     raise IOError("can't reconstruct logical file position")
1719
1720             # The returned cookie corresponds to the last safe start point.
1721             return self._pack_cookie(
1722                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1723         finally:
1724             decoder.setstate(saved_state)
1725
1726     def truncate(self, pos=None):
1727         self.flush()
1728         if pos is None:
1729             pos = self.tell()
1730         self.seek(pos)
1731         return self.buffer.truncate()
1732
1733     def detach(self):
1734         if self.buffer is None:
1735             raise ValueError("buffer is already detached")
1736         self.flush()
1737         buffer = self.buffer
1738         self.buffer = None
1739         return buffer
1740
1741     def seek(self, cookie, whence=0):
1742         if self.closed:
1743             raise ValueError("tell on closed file")
1744         if not self._seekable:
1745             raise IOError("underlying stream is not seekable")
1746         if whence == 1: # seek relative to current position
1747             if cookie != 0:
1748                 raise IOError("can't do nonzero cur-relative seeks")
1749             # Seeking to the current position should attempt to
1750             # sync the underlying buffer with the current position.
1751             whence = 0
1752             cookie = self.tell()
1753         if whence == 2: # seek relative to end of file
1754             if cookie != 0:
1755                 raise IOError("can't do nonzero end-relative seeks")
1756             self.flush()
1757             position = self.buffer.seek(0, 2)
1758             self._set_decoded_chars('')
1759             self._snapshot = None
1760             if self._decoder:
1761                 self._decoder.reset()
1762             return position
1763         if whence != 0:
1764             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1765                              (whence,))
1766         if cookie < 0:
1767             raise ValueError("negative seek position %r" % (cookie,))
1768         self.flush()
1769
1770         # The strategy of seek() is to go back to the safe start point
1771         # and replay the effect of read(chars_to_skip) from there.
1772         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1773             self._unpack_cookie(cookie)
1774
1775         # Seek back to the safe start point.
1776         self.buffer.seek(start_pos)
1777         self._set_decoded_chars('')
1778         self._snapshot = None
1779
1780         # Restore the decoder to its state from the safe start point.
1781         if cookie == 0 and self._decoder:
1782             self._decoder.reset()
1783         elif self._decoder or dec_flags or chars_to_skip:
1784             self._decoder = self._decoder or self._get_decoder()
1785             self._decoder.setstate((b'', dec_flags))
1786             self._snapshot = (dec_flags, b'')
1787
1788         if chars_to_skip:
1789             # Just like _read_chunk, feed the decoder and save a snapshot.
1790             input_chunk = self.buffer.read(bytes_to_feed)
1791             self._set_decoded_chars(
1792                 self._decoder.decode(input_chunk, need_eof))
1793             self._snapshot = (dec_flags, input_chunk)
1794
1795             # Skip chars_to_skip of the decoded characters.
1796             if len(self._decoded_chars) < chars_to_skip:
1797                 raise IOError("can't restore logical file position")
1798             self._decoded_chars_used = chars_to_skip
1799
1800         # Finally, reset the encoder (merely useful for proper BOM handling)
1801         try:
1802             encoder = self._encoder or self._get_encoder()
1803         except LookupError:
1804             # Sometimes the encoder doesn't exist
1805             pass
1806         else:
1807             if cookie != 0:
1808                 encoder.setstate(0)
1809             else:
1810                 encoder.reset()
1811         return cookie
1812
1813     def read(self, n=None):
1814         self._checkReadable()
1815         if n is None:
1816             n = -1
1817         decoder = self._decoder or self._get_decoder()
1818         if n < 0:
1819             # Read everything.
1820             result = (self._get_decoded_chars() +
1821                       decoder.decode(self.buffer.read(), final=True))
1822             self._set_decoded_chars('')
1823             self._snapshot = None
1824             return result
1825         else:
1826             # Keep reading chunks until we have n characters to return.
1827             eof = False
1828             result = self._get_decoded_chars(n)
1829             while len(result) < n and not eof:
1830                 eof = not self._read_chunk()
1831                 result += self._get_decoded_chars(n - len(result))
1832             return result
1833
1834     def next(self):
1835         self._telling = False
1836         line = self.readline()
1837         if not line:
1838             self._snapshot = None
1839             self._telling = self._seekable
1840             raise StopIteration
1841         return line
1842
1843     def readline(self, limit=None):
1844         if self.closed:
1845             raise ValueError("read from closed file")
1846         if limit is None:
1847             limit = -1
1848         elif not isinstance(limit, (int, long)):
1849             raise TypeError("limit must be an integer")
1850
1851         # Grab all the decoded text (we will rewind any extra bits later).
1852         line = self._get_decoded_chars()
1853
1854         start = 0
1855         # Make the decoder if it doesn't already exist.
1856         if not self._decoder:
1857             self._get_decoder()
1858
1859         pos = endpos = None
1860         while True:
1861             if self._readtranslate:
1862                 # Newlines are already translated, only search for \n
1863                 pos = line.find('\n', start)
1864                 if pos >= 0:
1865                     endpos = pos + 1
1866                     break
1867                 else:
1868                     start = len(line)
1869
1870             elif self._readuniversal:
1871                 # Universal newline search. Find any of \r, \r\n, \n
1872                 # The decoder ensures that \r\n are not split in two pieces
1873
1874                 # In C we'd look for these in parallel of course.
1875                 nlpos = line.find("\n", start)
1876                 crpos = line.find("\r", start)
1877                 if crpos == -1:
1878                     if nlpos == -1:
1879                         # Nothing found
1880                         start = len(line)
1881                     else:
1882                         # Found \n
1883                         endpos = nlpos + 1
1884                         break
1885                 elif nlpos == -1:
1886                     # Found lone \r
1887                     endpos = crpos + 1
1888                     break
1889                 elif nlpos < crpos:
1890                     # Found \n
1891                     endpos = nlpos + 1
1892                     break
1893                 elif nlpos == crpos + 1:
1894                     # Found \r\n
1895                     endpos = crpos + 2
1896                     break
1897                 else:
1898                     # Found \r
1899                     endpos = crpos + 1
1900                     break
1901             else:
1902                 # non-universal
1903                 pos = line.find(self._readnl)
1904                 if pos >= 0:
1905                     endpos = pos + len(self._readnl)
1906                     break
1907
1908             if limit >= 0 and len(line) >= limit:
1909                 endpos = limit  # reached length limit
1910                 break
1911
1912             # No line ending seen yet - get more data'
1913             while self._read_chunk():
1914                 if self._decoded_chars:
1915                     break
1916             if self._decoded_chars:
1917                 line += self._get_decoded_chars()
1918             else:
1919                 # end of file
1920                 self._set_decoded_chars('')
1921                 self._snapshot = None
1922                 return line
1923
1924         if limit >= 0 and endpos > limit:
1925             endpos = limit  # don't exceed limit
1926
1927         # Rewind _decoded_chars to just after the line ending we found.
1928         self._rewind_decoded_chars(len(line) - endpos)
1929         return line[:endpos]
1930
1931     @property
1932     def newlines(self):
1933         return self._decoder.newlines if self._decoder else None
1934
1935
1936 class StringIO(TextIOWrapper):
1937     """Text I/O implementation using an in-memory buffer.
1938
1939     The initial_value argument sets the value of object.  The newline
1940     argument is like the one of TextIOWrapper's constructor.
1941     """
1942
1943     def __init__(self, initial_value="", newline="\n"):
1944         super(StringIO, self).__init__(BytesIO(),
1945                                        encoding="utf-8",
1946                                        errors="strict",
1947                                        newline=newline)
1948         # Issue #5645: make universal newlines semantics the same as in the
1949         # C version, even under Windows.
1950         if newline is None:
1951             self._writetranslate = False
1952         if initial_value:
1953             if not isinstance(initial_value, unicode):
1954                 initial_value = unicode(initial_value)
1955             self.write(initial_value)
1956             self.seek(0)
1957
1958     def getvalue(self):
1959         self.flush()
1960         return self.buffer.getvalue().decode(self._encoding, self._errors)
1961
1962     def __repr__(self):
1963         # TextIOWrapper tells the encoding in its repr. In StringIO,
1964         # that's a implementation detail.
1965         return object.__repr__(self)
1966
1967     @property
1968     def errors(self):
1969         return None
1970
1971     @property
1972     def encoding(self):
1973         return None
1974
1975     def detach(self):
1976         # This doesn't make sense on StringIO.
1977         self._unsupported("detach")