Lib/_pyio.py

   1 """
   2 Python implementation of the io module.
   3 """
   4
   5 from __future__ import (print_function, unicode_literals)
   6
   7 import os
   8 import abc
   9 import codecs
  10 import warnings
  11 # Import thread instead of threading to reduce startup cost
  12 try:
  13     from thread import allocate_lock as Lock
  14 except ImportError:
  15     from dummy_thread import allocate_lock as Lock
  16
  17 import io
  18 from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
  19
  20 __metaclass__ = type
  21
  22 # open() uses st_blksize whenever we can
  23 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  24
  25 # NOTE: Base classes defined here are registered with the "official" ABCs
  26 # defined in io.py. We don't use real inheritance though, because we don't
  27 # want to inherit the C implementations.
  28
  29
  30 class BlockingIOError(IOError):
  31
  32     """Exception raised when I/O would block on a non-blocking I/O stream."""
  33
  34     def __init__(self, errno, strerror, characters_written=0):
  35         super(IOError, self).__init__(errno, strerror)
  36         if not isinstance(characters_written, (int, long)):
  37             raise TypeError("characters_written must be a integer")
  38         self.characters_written = characters_written
  39
  40
  41 def open(file, mode="r", buffering=-1,
  42          encoding=None, errors=None,
  43          newline=None, closefd=True):
  44
  45     r"""Open file and return a stream.  Raise IOError upon failure.
  46
  47     file is either a text or byte string giving the name (and the path
  48     if the file isn't in the current working directory) of the file to
  49     be opened or an integer file descriptor of the file to be
  50     wrapped. (If a file descriptor is given, it is closed when the
  51     returned I/O object is closed, unless closefd is set to False.)
  52
  53     mode is an optional string that specifies the mode in which the file
  54     is opened. It defaults to 'r' which means open for reading in text
  55     mode.  Other common values are 'w' for writing (truncating the file if
  56     it already exists), and 'a' for appending (which on some Unix systems,
  57     means that all writes append to the end of the file regardless of the
  58     current seek position). In text mode, if encoding is not specified the
  59     encoding used is platform dependent. (For reading and writing raw
  60     bytes use binary mode and leave encoding unspecified.) The available
  61     modes are:
  62
  63     ========= ===============================================================
  64     Character Meaning
  65     --------- ---------------------------------------------------------------
  66     'r'       open for reading (default)
  67     'w'       open for writing, truncating the file first
  68     'a'       open for writing, appending to the end of the file if it exists
  69     'b'       binary mode
  70     't'       text mode (default)
  71     '+'       open a disk file for updating (reading and writing)
  72     'U'       universal newline mode (for backwards compatibility; unneeded
  73               for new code)
  74     ========= ===============================================================
  75
  76     The default mode is 'rt' (open for reading text). For binary random
  77     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
  78     'r+b' opens the file without truncation.
  79
  80     Python distinguishes between files opened in binary and text modes,
  81     even when the underlying operating system doesn't. Files opened in
  82     binary mode (appending 'b' to the mode argument) return contents as
  83     bytes objects without any decoding. In text mode (the default, or when
  84     't' is appended to the mode argument), the contents of the file are
  85     returned as strings, the bytes having been first decoded using a
  86     platform-dependent encoding or using the specified encoding if given.
  87
  88     buffering is an optional integer used to set the buffering policy.
  89     Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
  90     line buffering (only usable in text mode), and an integer > 1 to indicate
  91     the size of a fixed-size chunk buffer.  When no buffering argument is
  92     given, the default buffering policy works as follows:
  93
  94     * Binary files are buffered in fixed-size chunks; the size of the buffer
  95       is chosen using a heuristic trying to determine the underlying device's
  96       "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
  97       On many systems, the buffer will typically be 4096 or 8192 bytes long.
  98
  99     * "Interactive" text files (files for which isatty() returns True)
 100       use line buffering.  Other text files use the policy described above
 101       for binary files.
 102
 103     encoding is the name of the encoding used to decode or encode the
 104     file. This should only be used in text mode. The default encoding is
 105     platform dependent, but any encoding supported by Python can be
 106     passed.  See the codecs module for the list of supported encodings.
 107
 108     errors is an optional string that specifies how encoding errors are to
 109     be handled---this argument should not be used in binary mode. Pass
 110     'strict' to raise a ValueError exception if there is an encoding error
 111     (the default of None has the same effect), or pass 'ignore' to ignore
 112     errors. (Note that ignoring encoding errors can lead to data loss.)
 113     See the documentation for codecs.register for a list of the permitted
 114     encoding error strings.
 115
 116     newline controls how universal newlines works (it only applies to text
 117     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 118     follows:
 119
 120     * On input, if newline is None, universal newlines mode is
 121       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 122       these are translated into '\n' before being returned to the
 123       caller. If it is '', universal newline mode is enabled, but line
 124       endings are returned to the caller untranslated. If it has any of
 125       the other legal values, input lines are only terminated by the given
 126       string, and the line ending is returned to the caller untranslated.
 127
 128     * On output, if newline is None, any '\n' characters written are
 129       translated to the system default line separator, os.linesep. If
 130       newline is '', no translation takes place. If newline is any of the
 131       other legal values, any '\n' characters written are translated to
 132       the given string.
 133
 134     If closefd is False, the underlying file descriptor will be kept open
 135     when the file is closed. This does not work when a file name is given
 136     and must be True in that case.
 137
 138     open() returns a file object whose type depends on the mode, and
 139     through which the standard file operations such as reading and writing
 140     are performed. When open() is used to open a file in a text mode ('w',
 141     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 142     a file in a binary mode, the returned class varies: in read binary
 143     mode, it returns a BufferedReader; in write binary and append binary
 144     modes, it returns a BufferedWriter, and in read/write mode, it returns
 145     a BufferedRandom.
 146
 147     It is also possible to use a string or bytearray as a file for both
 148     reading and writing. For strings StringIO can be used like a file
 149     opened in a text mode, and for bytes a BytesIO can be used like a file
 150     opened in a binary mode.
 151     """
 152     if not isinstance(file, (basestring, int, long)):
 153         raise TypeError("invalid file: %r" % file)
 154     if not isinstance(mode, basestring):
 155         raise TypeError("invalid mode: %r" % mode)
 156     if not isinstance(buffering, (int, long)):
 157         raise TypeError("invalid buffering: %r" % buffering)
 158     if encoding is not None and not isinstance(encoding, basestring):
 159         raise TypeError("invalid encoding: %r" % encoding)
 160     if errors is not None and not isinstance(errors, basestring):
 161         raise TypeError("invalid errors: %r" % errors)
 162     modes = set(mode)
 163     if modes - set("arwb+tU") or len(mode) > len(modes):
 164         raise ValueError("invalid mode: %r" % mode)
 165     reading = "r" in modes
 166     writing = "w" in modes
 167     appending = "a" in modes
 168     updating = "+" in modes
 169     text = "t" in modes
 170     binary = "b" in modes
 171     if "U" in modes:
 172         if writing or appending:
 173             raise ValueError("can't use U and writing mode at once")
 174         reading = True
 175     if text and binary:
 176         raise ValueError("can't have text and binary mode at once")
 177     if reading + writing + appending > 1:
 178         raise ValueError("can't have read/write/append mode at once")
 179     if not (reading or writing or appending):
 180         raise ValueError("must have exactly one of read/write/append mode")
 181     if binary and encoding is not None:
 182         raise ValueError("binary mode doesn't take an encoding argument")
 183     if binary and errors is not None:
 184         raise ValueError("binary mode doesn't take an errors argument")
 185     if binary and newline is not None:
 186         raise ValueError("binary mode doesn't take a newline argument")
 187     raw = FileIO(file,
 188                  (reading and "r" or "") +
 189                  (writing and "w" or "") +
 190                  (appending and "a" or "") +
 191                  (updating and "+" or ""),
 192                  closefd)
 193     line_buffering = False
 194     if buffering == 1 or buffering < 0 and raw.isatty():
 195         buffering = -1
 196         line_buffering = True
 197     if buffering < 0:
 198         buffering = DEFAULT_BUFFER_SIZE
 199         try:
 200             bs = os.fstat(raw.fileno()).st_blksize
 201         except (os.error, AttributeError):
 202             pass
 203         else:
 204             if bs > 1:
 205                 buffering = bs
 206     if buffering < 0:
 207         raise ValueError("invalid buffering size")
 208     if buffering == 0:
 209         if binary:
 210             return raw
 211         raise ValueError("can't have unbuffered text I/O")
 212     if updating:
 213         buffer = BufferedRandom(raw, buffering)
 214     elif writing or appending:
 215         buffer = BufferedWriter(raw, buffering)
 216     elif reading:
 217         buffer = BufferedReader(raw, buffering)
 218     else:
 219         raise ValueError("unknown mode: %r" % mode)
 220     if binary:
 221         return buffer
 222     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 223     text.mode = mode
 224     return text
 225
 226
 227 class DocDescriptor:
 228     """Helper for builtins.open.__doc__
 229     """
 230     def __get__(self, obj, typ):
 231         return (
 232             "open(file, mode='r', buffering=-1, encoding=None, "
 233                  "errors=None, newline=None, closefd=True)\n\n" +
 234             open.__doc__)
 235
 236 class OpenWrapper:
 237     """Wrapper for builtins.open
 238
 239     Trick so that open won't become a bound method when stored
 240     as a class variable (as dbm.dumb does).
 241
 242     See initstdio() in Python/pythonrun.c.
 243     """
 244     __doc__ = DocDescriptor()
 245
 246     def __new__(cls, *args, **kwargs):
 247         return open(*args, **kwargs)
 248
 249
 250 class UnsupportedOperation(ValueError, IOError):
 251     pass
 252
 253
 254 class IOBase:
 255     __metaclass__ = abc.ABCMeta
 256
 257     """The abstract base class for all I/O classes, acting on streams of
 258     bytes. There is no public constructor.
 259
 260     This class provides dummy implementations for many methods that
 261     derived classes can override selectively; the default implementations
 262     represent a file that cannot be read, written or seeked.
 263
 264     Even though IOBase does not declare read, readinto, or write because
 265     their signatures will vary, implementations and clients should
 266     consider those methods part of the interface. Also, implementations
 267     may raise a IOError when operations they do not support are called.
 268
 269     The basic type used for binary data read from or written to a file is
 270     bytes. bytearrays are accepted too, and in some cases (such as
 271     readinto) needed. Text I/O classes work with str data.
 272
 273     Note that calling any method (even inquiries) on a closed stream is
 274     undefined. Implementations may raise IOError in this case.
 275
 276     IOBase (and its subclasses) support the iterator protocol, meaning
 277     that an IOBase object can be iterated over yielding the lines in a
 278     stream.
 279
 280     IOBase also supports the :keyword:`with` statement. In this example,
 281     fp is closed after the suite of the with statement is complete:
 282
 283     with open('spam.txt', 'r') as fp:
 284         fp.write('Spam and eggs!')
 285     """
 286
 287     ### Internal ###
 288
 289     def _unsupported(self, name):
 290         """Internal: raise an exception for unsupported operations."""
 291         raise UnsupportedOperation("%s.%s() not supported" %
 292                                    (self.__class__.__name__, name))
 293
 294     ### Positioning ###
 295
 296     def seek(self, pos, whence=0):
 297         """Change stream position.
 298
 299         Change the stream position to byte offset offset. offset is
 300         interpreted relative to the position indicated by whence.  Values
 301         for whence are:
 302
 303         * 0 -- start of stream (the default); offset should be zero or positive
 304         * 1 -- current stream position; offset may be negative
 305         * 2 -- end of stream; offset is usually negative
 306
 307         Return the new absolute position.
 308         """
 309         self._unsupported("seek")
 310
 311     def tell(self):
 312         """Return current stream position."""
 313         return self.seek(0, 1)
 314
 315     def truncate(self, pos=None):
 316         """Truncate file to size bytes.
 317
 318         Size defaults to the current IO position as reported by tell().  Return
 319         the new size.
 320         """
 321         self._unsupported("truncate")
 322
 323     ### Flush and close ###
 324
 325     def flush(self):
 326         """Flush write buffers, if applicable.
 327
 328         This is not implemented for read-only and non-blocking streams.
 329         """
 330         self._checkClosed()
 331         # XXX Should this return the number of bytes written???
 332
 333     __closed = False
 334
 335     def close(self):
 336         """Flush and close the IO object.
 337
 338         This method has no effect if the file is already closed.
 339         """
 340         if not self.__closed:
 341             self.flush()
 342             self.__closed = True
 343
 344     def __del__(self):
 345         """Destructor.  Calls close()."""
 346         # The try/except block is in case this is called at program
 347         # exit time, when it's possible that globals have already been
 348         # deleted, and then the close() call might fail.  Since
 349         # there's nothing we can do about such failures and they annoy
 350         # the end users, we suppress the traceback.
 351         try:
 352             self.close()
 353         except:
 354             pass
 355
 356     ### Inquiries ###
 357
 358     def seekable(self):
 359         """Return whether object supports random access.
 360
 361         If False, seek(), tell() and truncate() will raise IOError.
 362         This method may need to do a test seek().
 363         """
 364         return False
 365
 366     def _checkSeekable(self, msg=None):
 367         """Internal: raise an IOError if file is not seekable
 368         """
 369         if not self.seekable():
 370             raise IOError("File or stream is not seekable."
 371                           if msg is None else msg)
 372
 373
 374     def readable(self):
 375         """Return whether object was opened for reading.
 376
 377         If False, read() will raise IOError.
 378         """
 379         return False
 380
 381     def _checkReadable(self, msg=None):
 382         """Internal: raise an IOError if file is not readable
 383         """
 384         if not self.readable():
 385             raise IOError("File or stream is not readable."
 386                           if msg is None else msg)
 387
 388     def writable(self):
 389         """Return whether object was opened for writing.
 390
 391         If False, write() and truncate() will raise IOError.
 392         """
 393         return False
 394
 395     def _checkWritable(self, msg=None):
 396         """Internal: raise an IOError if file is not writable
 397         """
 398         if not self.writable():
 399             raise IOError("File or stream is not writable."
 400                           if msg is None else msg)
 401
 402     @property
 403     def closed(self):
 404         """closed: bool.  True iff the file has been closed.
 405
 406         For backwards compatibility, this is a property, not a predicate.
 407         """
 408         return self.__closed
 409
 410     def _checkClosed(self, msg=None):
 411         """Internal: raise an ValueError if file is closed
 412         """
 413         if self.closed:
 414             raise ValueError("I/O operation on closed file."
 415                              if msg is None else msg)
 416
 417     ### Context manager ###
 418
 419     def __enter__(self):
 420         """Context management protocol.  Returns self."""
 421         self._checkClosed()
 422         return self
 423
 424     def __exit__(self, *args):
 425         """Context management protocol.  Calls close()"""
 426         self.close()
 427
 428     ### Lower-level APIs ###
 429
 430     # XXX Should these be present even if unimplemented?
 431
 432     def fileno(self):
 433         """Returns underlying file descriptor if one exists.
 434
 435         An IOError is raised if the IO object does not use a file descriptor.
 436         """
 437         self._unsupported("fileno")
 438
 439     def isatty(self):
 440         """Return whether this is an 'interactive' stream.
 441
 442         Return False if it can't be determined.
 443         """
 444         self._checkClosed()
 445         return False
 446
 447     ### Readline[s] and writelines ###
 448
 449     def readline(self, limit=-1):
 450         r"""Read and return a line from the stream.
 451
 452         If limit is specified, at most limit bytes will be read.
 453
 454         The line terminator is always b'\n' for binary files; for text
 455         files, the newlines argument to open can be used to select the line
 456         terminator(s) recognized.
 457         """
 458         # For backwards compatibility, a (slowish) readline().
 459         if hasattr(self, "peek"):
 460             def nreadahead():
 461                 readahead = self.peek(1)
 462                 if not readahead:
 463                     return 1
 464                 n = (readahead.find(b"\n") + 1) or len(readahead)
 465                 if limit >= 0:
 466                     n = min(n, limit)
 467                 return n
 468         else:
 469             def nreadahead():
 470                 return 1
 471         if limit is None:
 472             limit = -1
 473         elif not isinstance(limit, (int, long)):
 474             raise TypeError("limit must be an integer")
 475         res = bytearray()
 476         while limit < 0 or len(res) < limit:
 477             b = self.read(nreadahead())
 478             if not b:
 479                 break
 480             res += b
 481             if res.endswith(b"\n"):
 482                 break
 483         return bytes(res)
 484
 485     def __iter__(self):
 486         self._checkClosed()
 487         return self
 488
 489     def next(self):
 490         line = self.readline()
 491         if not line:
 492             raise StopIteration
 493         return line
 494
 495     def readlines(self, hint=None):
 496         """Return a list of lines from the stream.
 497
 498         hint can be specified to control the number of lines read: no more
 499         lines will be read if the total size (in bytes/characters) of all
 500         lines so far exceeds hint.
 501         """
 502         if hint is not None and not isinstance(hint, (int, long)):
 503             raise TypeError("integer or None expected")
 504         if hint is None or hint <= 0:
 505             return list(self)
 506         n = 0
 507         lines = []
 508         for line in self:
 509             lines.append(line)
 510             n += len(line)
 511             if n >= hint:
 512                 break
 513         return lines
 514
 515     def writelines(self, lines):
 516         self._checkClosed()
 517         for line in lines:
 518             self.write(line)
 519
 520 io.IOBase.register(IOBase)
 521
 522
 523 class RawIOBase(IOBase):
 524
 525     """Base class for raw binary I/O."""
 526
 527     # The read() method is implemented by calling readinto(); derived
 528     # classes that want to support read() only need to implement
 529     # readinto() as a primitive operation.  In general, readinto() can be
 530     # more efficient than read().
 531
 532     # (It would be tempting to also provide an implementation of
 533     # readinto() in terms of read(), in case the latter is a more suitable
 534     # primitive operation, but that would lead to nasty recursion in case
 535     # a subclass doesn't implement either.)
 536
 537     def read(self, n=-1):
 538         """Read and return up to n bytes.
 539
 540         Returns an empty bytes object on EOF, or None if the object is
 541         set not to block and has no data to read.
 542         """
 543         if n is None:
 544             n = -1
 545         if n < 0:
 546             return self.readall()
 547         b = bytearray(n.__index__())
 548         n = self.readinto(b)
 549         del b[n:]
 550         return bytes(b)
 551
 552     def readall(self):
 553         """Read until EOF, using multiple read() call."""
 554         res = bytearray()
 555         while True:
 556             data = self.read(DEFAULT_BUFFER_SIZE)
 557             if not data:
 558                 break
 559             res += data
 560         return bytes(res)
 561
 562     def readinto(self, b):
 563         """Read up to len(b) bytes into b.
 564
 565         Returns number of bytes read (0 for EOF), or None if the object
 566         is set not to block as has no data to read.
 567         """
 568         self._unsupported("readinto")
 569
 570     def write(self, b):
 571         """Write the given buffer to the IO stream.
 572
 573         Returns the number of bytes written, which may be less than len(b).
 574         """
 575         self._unsupported("write")
 576
 577 io.RawIOBase.register(RawIOBase)
 578 from _io import FileIO
 579 RawIOBase.register(FileIO)
 580
 581
 582 class BufferedIOBase(IOBase):
 583
 584     """Base class for buffered IO objects.
 585
 586     The main difference with RawIOBase is that the read() method
 587     supports omitting the size argument, and does not have a default
 588     implementation that defers to readinto().
 589
 590     In addition, read(), readinto() and write() may raise
 591     BlockingIOError if the underlying raw stream is in non-blocking
 592     mode and not ready; unlike their raw counterparts, they will never
 593     return None.
 594
 595     A typical implementation should not inherit from a RawIOBase
 596     implementation, but wrap one.
 597     """
 598
 599     def read(self, n=None):
 600         """Read and return up to n bytes.
 601
 602         If the argument is omitted, None, or negative, reads and
 603         returns all data until EOF.
 604
 605         If the argument is positive, and the underlying raw stream is
 606         not 'interactive', multiple raw reads may be issued to satisfy
 607         the byte count (unless EOF is reached first).  But for
 608         interactive raw streams (XXX and for pipes?), at most one raw
 609         read will be issued, and a short result does not imply that
 610         EOF is imminent.
 611
 612         Returns an empty bytes array on EOF.
 613
 614         Raises BlockingIOError if the underlying raw stream has no
 615         data at the moment.
 616         """
 617         self._unsupported("read")
 618
 619     def read1(self, n=None):
 620         """Read up to n bytes with at most one read() system call."""
 621         self._unsupported("read1")
 622
 623     def readinto(self, b):
 624         """Read up to len(b) bytes into b.
 625
 626         Like read(), this may issue multiple reads to the underlying raw
 627         stream, unless the latter is 'interactive'.
 628
 629         Returns the number of bytes read (0 for EOF).
 630
 631         Raises BlockingIOError if the underlying raw stream has no
 632         data at the moment.
 633         """
 634         # XXX This ought to work with anything that supports the buffer API
 635         data = self.read(len(b))
 636         n = len(data)
 637         try:
 638             b[:n] = data
 639         except TypeError as err:
 640             import array
 641             if not isinstance(b, array.array):
 642                 raise err
 643             b[:n] = array.array(b'b', data)
 644         return n
 645
 646     def write(self, b):
 647         """Write the given buffer to the IO stream.
 648
 649         Return the number of bytes written, which is never less than
 650         len(b).
 651
 652         Raises BlockingIOError if the buffer is full and the
 653         underlying raw stream cannot accept more data at the moment.
 654         """
 655         self._unsupported("write")
 656
 657     def detach(self):
 658         """
 659         Separate the underlying raw stream from the buffer and return it.
 660
 661         After the raw stream has been detached, the buffer is in an unusable
 662         state.
 663         """
 664         self._unsupported("detach")
 665
 666 io.BufferedIOBase.register(BufferedIOBase)
 667
 668
 669 class _BufferedIOMixin(BufferedIOBase):
 670
 671     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 672
 673     This passes most requests on to the underlying raw stream.  It
 674     does *not* provide implementations of read(), readinto() or
 675     write().
 676     """
 677
 678     def __init__(self, raw):
 679         self.raw = raw
 680
 681     ### Positioning ###
 682
 683     def seek(self, pos, whence=0):
 684         new_position = self.raw.seek(pos, whence)
 685         if new_position < 0:
 686             raise IOError("seek() returned an invalid position")
 687         return new_position
 688
 689     def tell(self):
 690         pos = self.raw.tell()
 691         if pos < 0:
 692             raise IOError("tell() returned an invalid position")
 693         return pos
 694
 695     def truncate(self, pos=None):
 696         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 697         # and a flush may be necessary to synch both views of the current
 698         # file state.
 699         self.flush()
 700
 701         if pos is None:
 702             pos = self.tell()
 703         # XXX: Should seek() be used, instead of passing the position
 704         # XXX  directly to truncate?
 705         return self.raw.truncate(pos)
 706
 707     ### Flush and close ###
 708
 709     def flush(self):
 710         if self.closed:
 711             raise ValueError("flush of closed file")
 712         self.raw.flush()
 713
 714     def close(self):
 715         if self.raw is not None and not self.closed:
 716             self.flush()
 717             self.raw.close()
 718
 719     def detach(self):
 720         if self.raw is None:
 721             raise ValueError("raw stream already detached")
 722         self.flush()
 723         raw = self.raw
 724         self.raw = None
 725         return raw
 726
 727     ### Inquiries ###
 728
 729     def seekable(self):
 730         return self.raw.seekable()
 731
 732     def readable(self):
 733         return self.raw.readable()
 734
 735     def writable(self):
 736         return self.raw.writable()
 737
 738     @property
 739     def closed(self):
 740         return self.raw.closed
 741
 742     @property
 743     def name(self):
 744         return self.raw.name
 745
 746     @property
 747     def mode(self):
 748         return self.raw.mode
 749
 750     def __repr__(self):
 751         clsname = self.__class__.__name__
 752         try:
 753             name = self.name
 754         except AttributeError:
 755             return "<_pyio.{0}>".format(clsname)
 756         else:
 757             return "<_pyio.{0} name={1!r}>".format(clsname, name)
 758
 759     ### Lower-level APIs ###
 760
 761     def fileno(self):
 762         return self.raw.fileno()
 763
 764     def isatty(self):
 765         return self.raw.isatty()
 766
 767
 768 class BytesIO(BufferedIOBase):
 769
 770     """Buffered I/O implementation using an in-memory bytes buffer."""
 771
 772     def __init__(self, initial_bytes=None):
 773         buf = bytearray()
 774         if initial_bytes is not None:
 775             buf.extend(initial_bytes)
 776         self._buffer = buf
 777         self._pos = 0
 778
 779     def __getstate__(self):
 780         if self.closed:
 781             raise ValueError("__getstate__ on closed file")
 782         return self.__dict__.copy()
 783
 784     def getvalue(self):
 785         """Return the bytes value (contents) of the buffer
 786         """
 787         if self.closed:
 788             raise ValueError("getvalue on closed file")
 789         return bytes(self._buffer)
 790
 791     def read(self, n=None):
 792         if self.closed:
 793             raise ValueError("read from closed file")
 794         if n is None:
 795             n = -1
 796         if not isinstance(n, (int, long)):
 797             raise TypeError("integer argument expected, got {0!r}".format(
 798                 type(n)))
 799         if n < 0:
 800             n = len(self._buffer)
 801         if len(self._buffer) <= self._pos:
 802             return b""
 803         newpos = min(len(self._buffer), self._pos + n)
 804         b = self._buffer[self._pos : newpos]
 805         self._pos = newpos
 806         return bytes(b)
 807
 808     def read1(self, n):
 809         """This is the same as read.
 810         """
 811         return self.read(n)
 812
 813     def write(self, b):
 814         if self.closed:
 815             raise ValueError("write to closed file")
 816         if isinstance(b, unicode):
 817             raise TypeError("can't write unicode to binary stream")
 818         n = len(b)
 819         if n == 0:
 820             return 0
 821         pos = self._pos
 822         if pos > len(self._buffer):
 823             # Inserts null bytes between the current end of the file
 824             # and the new write position.
 825             padding = b'\x00' * (pos - len(self._buffer))
 826             self._buffer += padding
 827         self._buffer[pos:pos + n] = b
 828         self._pos += n
 829         return n
 830
 831     def seek(self, pos, whence=0):
 832         if self.closed:
 833             raise ValueError("seek on closed file")
 834         try:
 835             pos.__index__
 836         except AttributeError:
 837             raise TypeError("an integer is required")
 838         if whence == 0:
 839             if pos < 0:
 840                 raise ValueError("negative seek position %r" % (pos,))
 841             self._pos = pos
 842         elif whence == 1:
 843             self._pos = max(0, self._pos + pos)
 844         elif whence == 2:
 845             self._pos = max(0, len(self._buffer) + pos)
 846         else:
 847             raise ValueError("invalid whence value")
 848         return self._pos
 849
 850     def tell(self):
 851         if self.closed:
 852             raise ValueError("tell on closed file")
 853         return self._pos
 854
 855     def truncate(self, pos=None):
 856         if self.closed:
 857             raise ValueError("truncate on closed file")
 858         if pos is None:
 859             pos = self._pos
 860         else:
 861             try:
 862                 pos.__index__
 863             except AttributeError:
 864                 raise TypeError("an integer is required")
 865             if pos < 0:
 866                 raise ValueError("negative truncate position %r" % (pos,))
 867         del self._buffer[pos:]
 868         return pos
 869
 870     def readable(self):
 871         return True
 872
 873     def writable(self):
 874         return True
 875
 876     def seekable(self):
 877         return True
 878
 879
 880 class BufferedReader(_BufferedIOMixin):
 881
 882     """BufferedReader(raw[, buffer_size])
 883
 884     A buffer for a readable, sequential BaseRawIO object.
 885
 886     The constructor creates a BufferedReader for the given readable raw
 887     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 888     is used.
 889     """
 890
 891     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 892         """Create a new buffered reader using the given readable raw IO object.
 893         """
 894         if not raw.readable():
 895             raise IOError('"raw" argument must be readable.')
 896
 897         _BufferedIOMixin.__init__(self, raw)
 898         if buffer_size <= 0:
 899             raise ValueError("invalid buffer size")
 900         self.buffer_size = buffer_size
 901         self._reset_read_buf()
 902         self._read_lock = Lock()
 903
 904     def _reset_read_buf(self):
 905         self._read_buf = b""
 906         self._read_pos = 0
 907
 908     def read(self, n=None):
 909         """Read n bytes.
 910
 911         Returns exactly n bytes of data unless the underlying raw IO
 912         stream reaches EOF or if the call would block in non-blocking
 913         mode. If n is negative, read until EOF or until read() would
 914         block.
 915         """
 916         if n is not None and n < -1:
 917             raise ValueError("invalid number of bytes to read")
 918         with self._read_lock:
 919             return self._read_unlocked(n)
 920
 921     def _read_unlocked(self, n=None):
 922         nodata_val = b""
 923         empty_values = (b"", None)
 924         buf = self._read_buf
 925         pos = self._read_pos
 926
 927         # Special case for when the number of bytes to read is unspecified.
 928         if n is None or n == -1:
 929             self._reset_read_buf()
 930             chunks = [buf[pos:]]  # Strip the consumed bytes.
 931             current_size = 0
 932             while True:
 933                 # Read until EOF or until read() would block.
 934                 chunk = self.raw.read()
 935                 if chunk in empty_values:
 936                     nodata_val = chunk
 937                     break
 938                 current_size += len(chunk)
 939                 chunks.append(chunk)
 940             return b"".join(chunks) or nodata_val
 941
 942         # The number of bytes to read is specified, return at most n bytes.
 943         avail = len(buf) - pos  # Length of the available buffered data.
 944         if n <= avail:
 945             # Fast path: the data to read is fully buffered.
 946             self._read_pos += n
 947             return buf[pos:pos+n]
 948         # Slow path: read from the stream until enough bytes are read,
 949         # or until an EOF occurs or until read() would block.
 950         chunks = [buf[pos:]]
 951         wanted = max(self.buffer_size, n)
 952         while avail < n:
 953             chunk = self.raw.read(wanted)
 954             if chunk in empty_values:
 955                 nodata_val = chunk
 956                 break
 957             avail += len(chunk)
 958             chunks.append(chunk)
 959         # n is more then avail only when an EOF occurred or when
 960         # read() would have blocked.
 961         n = min(n, avail)
 962         out = b"".join(chunks)
 963         self._read_buf = out[n:]  # Save the extra data in the buffer.
 964         self._read_pos = 0
 965         return out[:n] if out else nodata_val
 966
 967     def peek(self, n=0):
 968         """Returns buffered bytes without advancing the position.
 969
 970         The argument indicates a desired minimal number of bytes; we
 971         do at most one raw read to satisfy it.  We never return more
 972         than self.buffer_size.
 973         """
 974         with self._read_lock:
 975             return self._peek_unlocked(n)
 976
 977     def _peek_unlocked(self, n=0):
 978         want = min(n, self.buffer_size)
 979         have = len(self._read_buf) - self._read_pos
 980         if have < want or have <= 0:
 981             to_read = self.buffer_size - have
 982             current = self.raw.read(to_read)
 983             if current:
 984                 self._read_buf = self._read_buf[self._read_pos:] + current
 985                 self._read_pos = 0
 986         return self._read_buf[self._read_pos:]
 987
 988     def read1(self, n):
 989         """Reads up to n bytes, with at most one read() system call."""
 990         # Returns up to n bytes.  If at least one byte is buffered, we
 991         # only return buffered bytes.  Otherwise, we do one raw read.
 992         if n < 0:
 993             raise ValueError("number of bytes to read must be positive")
 994         if n == 0:
 995             return b""
 996         with self._read_lock:
 997             self._peek_unlocked(1)
 998             return self._read_unlocked(
 999                 min(n, len(self._read_buf) - self._read_pos))
1000
1001     def tell(self):
1002         return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1003
1004     def seek(self, pos, whence=0):
1005         if not (0 <= whence <= 2):
1006             raise ValueError("invalid whence value")
1007         with self._read_lock:
1008             if whence == 1:
1009                 pos -= len(self._read_buf) - self._read_pos
1010             pos = _BufferedIOMixin.seek(self, pos, whence)
1011             self._reset_read_buf()
1012             return pos
1013
1014 class BufferedWriter(_BufferedIOMixin):
1015
1016     """A buffer for a writeable sequential RawIO object.
1017
1018     The constructor creates a BufferedWriter for the given writeable raw
1019     stream. If the buffer_size is not given, it defaults to
1020     DEFAULT_BUFFER_SIZE.
1021     """
1022
1023     _warning_stack_offset = 2
1024
1025     def __init__(self, raw,
1026                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1027         if not raw.writable():
1028             raise IOError('"raw" argument must be writable.')
1029
1030         _BufferedIOMixin.__init__(self, raw)
1031         if buffer_size <= 0:
1032             raise ValueError("invalid buffer size")
1033         if max_buffer_size is not None:
1034             warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1035                           self._warning_stack_offset)
1036         self.buffer_size = buffer_size
1037         self._write_buf = bytearray()
1038         self._write_lock = Lock()
1039
1040     def write(self, b):
1041         if self.closed:
1042             raise ValueError("write to closed file")
1043         if isinstance(b, unicode):
1044             raise TypeError("can't write unicode to binary stream")
1045         with self._write_lock:
1046             # XXX we can implement some more tricks to try and avoid
1047             # partial writes
1048             if len(self._write_buf) > self.buffer_size:
1049                 # We're full, so let's pre-flush the buffer
1050                 try:
1051                     self._flush_unlocked()
1052                 except BlockingIOError as e:
1053                     # We can't accept anything else.
1054                     # XXX Why not just let the exception pass through?
1055                     raise BlockingIOError(e.errno, e.strerror, 0)
1056             before = len(self._write_buf)
1057             self._write_buf.extend(b)
1058             written = len(self._write_buf) - before
1059             if len(self._write_buf) > self.buffer_size:
1060                 try:
1061                     self._flush_unlocked()
1062                 except BlockingIOError as e:
1063                     if len(self._write_buf) > self.buffer_size:
1064                         # We've hit the buffer_size. We have to accept a partial
1065                         # write and cut back our buffer.
1066                         overage = len(self._write_buf) - self.buffer_size
1067                         written -= overage
1068                         self._write_buf = self._write_buf[:self.buffer_size]
1069                         raise BlockingIOError(e.errno, e.strerror, written)
1070             return written
1071
1072     def truncate(self, pos=None):
1073         with self._write_lock:
1074             self._flush_unlocked()
1075             if pos is None:
1076                 pos = self.raw.tell()
1077             return self.raw.truncate(pos)
1078
1079     def flush(self):
1080         with self._write_lock:
1081             self._flush_unlocked()
1082
1083     def _flush_unlocked(self):
1084         if self.closed:
1085             raise ValueError("flush of closed file")
1086         written = 0
1087         try:
1088             while self._write_buf:
1089                 n = self.raw.write(self._write_buf)
1090                 if n > len(self._write_buf) or n < 0:
1091                     raise IOError("write() returned incorrect number of bytes")
1092                 del self._write_buf[:n]
1093                 written += n
1094         except BlockingIOError as e:
1095             n = e.characters_written
1096             del self._write_buf[:n]
1097             written += n
1098             raise BlockingIOError(e.errno, e.strerror, written)
1099
1100     def tell(self):
1101         return _BufferedIOMixin.tell(self) + len(self._write_buf)
1102
1103     def seek(self, pos, whence=0):
1104         if not (0 <= whence <= 2):
1105             raise ValueError("invalid whence")
1106         with self._write_lock:
1107             self._flush_unlocked()
1108             return _BufferedIOMixin.seek(self, pos, whence)
1109
1110
1111 class BufferedRWPair(BufferedIOBase):
1112
1113     """A buffered reader and writer object together.
1114
1115     A buffered reader object and buffered writer object put together to
1116     form a sequential IO object that can read and write. This is typically
1117     used with a socket or two-way pipe.
1118
1119     reader and writer are RawIOBase objects that are readable and
1120     writeable respectively. If the buffer_size is omitted it defaults to
1121     DEFAULT_BUFFER_SIZE.
1122     """
1123
1124     # XXX The usefulness of this (compared to having two separate IO
1125     # objects) is questionable.
1126
1127     def __init__(self, reader, writer,
1128                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1129         """Constructor.
1130
1131         The arguments are two RawIO instances.
1132         """
1133         if max_buffer_size is not None:
1134             warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1135
1136         if not reader.readable():
1137             raise IOError('"reader" argument must be readable.')
1138
1139         if not writer.writable():
1140             raise IOError('"writer" argument must be writable.')
1141
1142         self.reader = BufferedReader(reader, buffer_size)
1143         self.writer = BufferedWriter(writer, buffer_size)
1144
1145     def read(self, n=None):
1146         if n is None:
1147             n = -1
1148         return self.reader.read(n)
1149
1150     def readinto(self, b):
1151         return self.reader.readinto(b)
1152
1153     def write(self, b):
1154         return self.writer.write(b)
1155
1156     def peek(self, n=0):
1157         return self.reader.peek(n)
1158
1159     def read1(self, n):
1160         return self.reader.read1(n)
1161
1162     def readable(self):
1163         return self.reader.readable()
1164
1165     def writable(self):
1166         return self.writer.writable()
1167
1168     def flush(self):
1169         return self.writer.flush()
1170
1171     def close(self):
1172         self.writer.close()
1173         self.reader.close()
1174
1175     def isatty(self):
1176         return self.reader.isatty() or self.writer.isatty()
1177
1178     @property
1179     def closed(self):
1180         return self.writer.closed
1181
1182
1183 class BufferedRandom(BufferedWriter, BufferedReader):
1184
1185     """A buffered interface to random access streams.
1186
1187     The constructor creates a reader and writer for a seekable stream,
1188     raw, given in the first argument. If the buffer_size is omitted it
1189     defaults to DEFAULT_BUFFER_SIZE.
1190     """
1191
1192     _warning_stack_offset = 3
1193
1194     def __init__(self, raw,
1195                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1196         raw._checkSeekable()
1197         BufferedReader.__init__(self, raw, buffer_size)
1198         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1199
1200     def seek(self, pos, whence=0):
1201         if not (0 <= whence <= 2):
1202             raise ValueError("invalid whence")
1203         self.flush()
1204         if self._read_buf:
1205             # Undo read ahead.
1206             with self._read_lock:
1207                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1208         # First do the raw seek, then empty the read buffer, so that
1209         # if the raw seek fails, we don't lose buffered data forever.
1210         pos = self.raw.seek(pos, whence)
1211         with self._read_lock:
1212             self._reset_read_buf()
1213         if pos < 0:
1214             raise IOError("seek() returned invalid position")
1215         return pos
1216
1217     def tell(self):
1218         if self._write_buf:
1219             return BufferedWriter.tell(self)
1220         else:
1221             return BufferedReader.tell(self)
1222
1223     def truncate(self, pos=None):
1224         if pos is None:
1225             pos = self.tell()
1226         # Use seek to flush the read buffer.
1227         return BufferedWriter.truncate(self, pos)
1228
1229     def read(self, n=None):
1230         if n is None:
1231             n = -1
1232         self.flush()
1233         return BufferedReader.read(self, n)
1234
1235     def readinto(self, b):
1236         self.flush()
1237         return BufferedReader.readinto(self, b)
1238
1239     def peek(self, n=0):
1240         self.flush()
1241         return BufferedReader.peek(self, n)
1242
1243     def read1(self, n):
1244         self.flush()
1245         return BufferedReader.read1(self, n)
1246
1247     def write(self, b):
1248         if self._read_buf:
1249             # Undo readahead
1250             with self._read_lock:
1251                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1252                 self._reset_read_buf()
1253         return BufferedWriter.write(self, b)
1254
1255
1256 class TextIOBase(IOBase):
1257
1258     """Base class for text I/O.
1259
1260     This class provides a character and line based interface to stream
1261     I/O. There is no readinto method because Python's character strings
1262     are immutable. There is no public constructor.
1263     """
1264
1265     def read(self, n=-1):
1266         """Read at most n characters from stream.
1267
1268         Read from underlying buffer until we have n characters or we hit EOF.
1269         If n is negative or omitted, read until EOF.
1270         """
1271         self._unsupported("read")
1272
1273     def write(self, s):
1274         """Write string s to stream."""
1275         self._unsupported("write")
1276
1277     def truncate(self, pos=None):
1278         """Truncate size to pos."""
1279         self._unsupported("truncate")
1280
1281     def readline(self):
1282         """Read until newline or EOF.
1283
1284         Returns an empty string if EOF is hit immediately.
1285         """
1286         self._unsupported("readline")
1287
1288     def detach(self):
1289         """
1290         Separate the underlying buffer from the TextIOBase and return it.
1291
1292         After the underlying buffer has been detached, the TextIO is in an
1293         unusable state.
1294         """
1295         self._unsupported("detach")
1296
1297     @property
1298     def encoding(self):
1299         """Subclasses should override."""
1300         return None
1301
1302     @property
1303     def newlines(self):
1304         """Line endings translated so far.
1305
1306         Only line endings translated during reading are considered.
1307
1308         Subclasses should override.
1309         """
1310         return None
1311
1312     @property
1313     def errors(self):
1314         """Error setting of the decoder or encoder.
1315
1316         Subclasses should override."""
1317         return None
1318
1319 io.TextIOBase.register(TextIOBase)
1320
1321
1322 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1323     r"""Codec used when reading a file in universal newlines mode.  It wraps
1324     another incremental decoder, translating \r\n and \r into \n.  It also
1325     records the types of newlines encountered.  When used with
1326     translate=False, it ensures that the newline sequence is returned in
1327     one piece.
1328     """
1329     def __init__(self, decoder, translate, errors='strict'):
1330         codecs.IncrementalDecoder.__init__(self, errors=errors)
1331         self.translate = translate
1332         self.decoder = decoder
1333         self.seennl = 0
1334         self.pendingcr = False
1335
1336     def decode(self, input, final=False):
1337         # decode input (with the eventual \r from a previous pass)
1338         if self.decoder is None:
1339             output = input
1340         else:
1341             output = self.decoder.decode(input, final=final)
1342         if self.pendingcr and (output or final):
1343             output = "\r" + output
1344             self.pendingcr = False
1345
1346         # retain last \r even when not translating data:
1347         # then readline() is sure to get \r\n in one pass
1348         if output.endswith("\r") and not final:
1349             output = output[:-1]
1350             self.pendingcr = True
1351
1352         # Record which newlines are read
1353         crlf = output.count('\r\n')
1354         cr = output.count('\r') - crlf
1355         lf = output.count('\n') - crlf
1356         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1357                     | (crlf and self._CRLF)
1358
1359         if self.translate:
1360             if crlf:
1361                 output = output.replace("\r\n", "\n")
1362             if cr:
1363                 output = output.replace("\r", "\n")
1364
1365         return output
1366
1367     def getstate(self):
1368         if self.decoder is None:
1369             buf = b""
1370             flag = 0
1371         else:
1372             buf, flag = self.decoder.getstate()
1373         flag <<= 1
1374         if self.pendingcr:
1375             flag |= 1
1376         return buf, flag
1377
1378     def setstate(self, state):
1379         buf, flag = state
1380         self.pendingcr = bool(flag & 1)
1381         if self.decoder is not None:
1382             self.decoder.setstate((buf, flag >> 1))
1383
1384     def reset(self):
1385         self.seennl = 0
1386         self.pendingcr = False
1387         if self.decoder is not None:
1388             self.decoder.reset()
1389
1390     _LF = 1
1391     _CR = 2
1392     _CRLF = 4
1393
1394     @property
1395     def newlines(self):
1396         return (None,
1397                 "\n",
1398                 "\r",
1399                 ("\r", "\n"),
1400                 "\r\n",
1401                 ("\n", "\r\n"),
1402                 ("\r", "\r\n"),
1403                 ("\r", "\n", "\r\n")
1404                )[self.seennl]
1405
1406
1407 class TextIOWrapper(TextIOBase):
1408
1409     r"""Character and line based layer over a BufferedIOBase object, buffer.
1410
1411     encoding gives the name of the encoding that the stream will be
1412     decoded or encoded with. It defaults to locale.getpreferredencoding.
1413
1414     errors determines the strictness of encoding and decoding (see the
1415     codecs.register) and defaults to "strict".
1416
1417     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1418     handling of line endings. If it is None, universal newlines is
1419     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1420     or '\r\n' are translated to '\n' before being returned to the
1421     caller. Conversely, on output, '\n' is translated to the system
1422     default line seperator, os.linesep. If newline is any other of its
1423     legal values, that newline becomes the newline when the file is read
1424     and it is returned untranslated. On output, '\n' is converted to the
1425     newline.
1426
1427     If line_buffering is True, a call to flush is implied when a call to
1428     write contains a newline character.
1429     """
1430
1431     _CHUNK_SIZE = 2048
1432
1433     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1434                  line_buffering=False):
1435         if newline is not None and not isinstance(newline, basestring):
1436             raise TypeError("illegal newline type: %r" % (type(newline),))
1437         if newline not in (None, "", "\n", "\r", "\r\n"):
1438             raise ValueError("illegal newline value: %r" % (newline,))
1439         if encoding is None:
1440             try:
1441                 import locale
1442             except ImportError:
1443                 # Importing locale may fail if Python is being built
1444                 encoding = "ascii"
1445             else:
1446                 encoding = locale.getpreferredencoding()
1447
1448         if not isinstance(encoding, basestring):
1449             raise ValueError("invalid encoding: %r" % encoding)
1450
1451         if errors is None:
1452             errors = "strict"
1453         else:
1454             if not isinstance(errors, basestring):
1455                 raise ValueError("invalid errors: %r" % errors)
1456
1457         self.buffer = buffer
1458         self._line_buffering = line_buffering
1459         self._encoding = encoding
1460         self._errors = errors
1461         self._readuniversal = not newline
1462         self._readtranslate = newline is None
1463         self._readnl = newline
1464         self._writetranslate = newline != ''
1465         self._writenl = newline or os.linesep
1466         self._encoder = None
1467         self._decoder = None
1468         self._decoded_chars = ''  # buffer for text returned from decoder
1469         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1470         self._snapshot = None  # info for reconstructing decoder state
1471         self._seekable = self._telling = self.buffer.seekable()
1472
1473         if self._seekable and self.writable():
1474             position = self.buffer.tell()
1475             if position != 0:
1476                 try:
1477                     self._get_encoder().setstate(0)
1478                 except LookupError:
1479                     # Sometimes the encoder doesn't exist
1480                     pass
1481
1482     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1483     # where dec_flags is the second (integer) item of the decoder state
1484     # and next_input is the chunk of input bytes that comes next after the
1485     # snapshot point.  We use this to reconstruct decoder states in tell().
1486
1487     # Naming convention:
1488     #   - "bytes_..." for integer variables that count input bytes
1489     #   - "chars_..." for integer variables that count decoded characters
1490
1491     def __repr__(self):
1492         try:
1493             name = self.name
1494         except AttributeError:
1495             return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1496         else:
1497             return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1498                 name, self.encoding)
1499
1500     @property
1501     def encoding(self):
1502         return self._encoding
1503
1504     @property
1505     def errors(self):
1506         return self._errors
1507
1508     @property
1509     def line_buffering(self):
1510         return self._line_buffering
1511
1512     def seekable(self):
1513         return self._seekable
1514
1515     def readable(self):
1516         return self.buffer.readable()
1517
1518     def writable(self):
1519         return self.buffer.writable()
1520
1521     def flush(self):
1522         self.buffer.flush()
1523         self._telling = self._seekable
1524
1525     def close(self):
1526         if self.buffer is not None and not self.closed:
1527             self.flush()
1528             self.buffer.close()
1529
1530     @property
1531     def closed(self):
1532         return self.buffer.closed
1533
1534     @property
1535     def name(self):
1536         return self.buffer.name
1537
1538     def fileno(self):
1539         return self.buffer.fileno()
1540
1541     def isatty(self):
1542         return self.buffer.isatty()
1543
1544     def write(self, s):
1545         if self.closed:
1546             raise ValueError("write to closed file")
1547         if not isinstance(s, unicode):
1548             raise TypeError("can't write %s to text stream" %
1549                             s.__class__.__name__)
1550         length = len(s)
1551         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1552         if haslf and self._writetranslate and self._writenl != "\n":
1553             s = s.replace("\n", self._writenl)
1554         encoder = self._encoder or self._get_encoder()
1555         # XXX What if we were just reading?
1556         b = encoder.encode(s)
1557         self.buffer.write(b)
1558         if self._line_buffering and (haslf or "\r" in s):
1559             self.flush()
1560         self._snapshot = None
1561         if self._decoder:
1562             self._decoder.reset()
1563         return length
1564
1565     def _get_encoder(self):
1566         make_encoder = codecs.getincrementalencoder(self._encoding)
1567         self._encoder = make_encoder(self._errors)
1568         return self._encoder
1569
1570     def _get_decoder(self):
1571         make_decoder = codecs.getincrementaldecoder(self._encoding)
1572         decoder = make_decoder(self._errors)
1573         if self._readuniversal:
1574             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1575         self._decoder = decoder
1576         return decoder
1577
1578     # The following three methods implement an ADT for _decoded_chars.
1579     # Text returned from the decoder is buffered here until the client
1580     # requests it by calling our read() or readline() method.
1581     def _set_decoded_chars(self, chars):
1582         """Set the _decoded_chars buffer."""
1583         self._decoded_chars = chars
1584         self._decoded_chars_used = 0
1585
1586     def _get_decoded_chars(self, n=None):
1587         """Advance into the _decoded_chars buffer."""
1588         offset = self._decoded_chars_used
1589         if n is None:
1590             chars = self._decoded_chars[offset:]
1591         else:
1592             chars = self._decoded_chars[offset:offset + n]
1593         self._decoded_chars_used += len(chars)
1594         return chars
1595
1596     def _rewind_decoded_chars(self, n):
1597         """Rewind the _decoded_chars buffer."""
1598         if self._decoded_chars_used < n:
1599             raise AssertionError("rewind decoded_chars out of bounds")
1600         self._decoded_chars_used -= n
1601
1602     def _read_chunk(self):
1603         """
1604         Read and decode the next chunk of data from the BufferedReader.
1605         """
1606
1607         # The return value is True unless EOF was reached.  The decoded
1608         # string is placed in self._decoded_chars (replacing its previous
1609         # value).  The entire input chunk is sent to the decoder, though
1610         # some of it may remain buffered in the decoder, yet to be
1611         # converted.
1612
1613         if self._decoder is None:
1614             raise ValueError("no decoder")
1615
1616         if self._telling:
1617             # To prepare for tell(), we need to snapshot a point in the
1618             # file where the decoder's input buffer is empty.
1619
1620             dec_buffer, dec_flags = self._decoder.getstate()
1621             # Given this, we know there was a valid snapshot point
1622             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1623
1624         # Read a chunk, decode it, and put the result in self._decoded_chars.
1625         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1626         eof = not input_chunk
1627         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1628
1629         if self._telling:
1630             # At the snapshot point, len(dec_buffer) bytes before the read,
1631             # the next input to be decoded is dec_buffer + input_chunk.
1632             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1633
1634         return not eof
1635
1636     def _pack_cookie(self, position, dec_flags=0,
1637                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1638         # The meaning of a tell() cookie is: seek to position, set the
1639         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1640         # into the decoder with need_eof as the EOF flag, then skip
1641         # chars_to_skip characters of the decoded result.  For most simple
1642         # decoders, tell() will often just give a byte offset in the file.
1643         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1644                (chars_to_skip<<192) | bool(need_eof)<<256)
1645
1646     def _unpack_cookie(self, bigint):
1647         rest, position = divmod(bigint, 1<<64)
1648         rest, dec_flags = divmod(rest, 1<<64)
1649         rest, bytes_to_feed = divmod(rest, 1<<64)
1650         need_eof, chars_to_skip = divmod(rest, 1<<64)
1651         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1652
1653     def tell(self):
1654         if not self._seekable:
1655             raise IOError("underlying stream is not seekable")
1656         if not self._telling:
1657             raise IOError("telling position disabled by next() call")
1658         self.flush()
1659         position = self.buffer.tell()
1660         decoder = self._decoder
1661         if decoder is None or self._snapshot is None:
1662             if self._decoded_chars:
1663                 # This should never happen.
1664                 raise AssertionError("pending decoded text")
1665             return position
1666
1667         # Skip backward to the snapshot point (see _read_chunk).
1668         dec_flags, next_input = self._snapshot
1669         position -= len(next_input)
1670
1671         # How many decoded characters have been used up since the snapshot?
1672         chars_to_skip = self._decoded_chars_used
1673         if chars_to_skip == 0:
1674             # We haven't moved from the snapshot point.
1675             return self._pack_cookie(position, dec_flags)
1676
1677         # Starting from the snapshot position, we will walk the decoder
1678         # forward until it gives us enough decoded characters.
1679         saved_state = decoder.getstate()
1680         try:
1681             # Note our initial start point.
1682             decoder.setstate((b'', dec_flags))
1683             start_pos = position
1684             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1685             need_eof = 0
1686
1687             # Feed the decoder one byte at a time.  As we go, note the
1688             # nearest "safe start point" before the current location
1689             # (a point where the decoder has nothing buffered, so seek()
1690             # can safely start from there and advance to this location).
1691             for next_byte in next_input:
1692                 bytes_fed += 1
1693                 chars_decoded += len(decoder.decode(next_byte))
1694                 dec_buffer, dec_flags = decoder.getstate()
1695                 if not dec_buffer and chars_decoded <= chars_to_skip:
1696                     # Decoder buffer is empty, so this is a safe start point.
1697                     start_pos += bytes_fed
1698                     chars_to_skip -= chars_decoded
1699                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1700                 if chars_decoded >= chars_to_skip:
1701                     break
1702             else:
1703                 # We didn't get enough decoded data; signal EOF to get more.
1704                 chars_decoded += len(decoder.decode(b'', final=True))
1705                 need_eof = 1
1706                 if chars_decoded < chars_to_skip:
1707                     raise IOError("can't reconstruct logical file position")
1708
1709             # The returned cookie corresponds to the last safe start point.
1710             return self._pack_cookie(
1711                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1712         finally:
1713             decoder.setstate(saved_state)
1714
1715     def truncate(self, pos=None):
1716         self.flush()
1717         if pos is None:
1718             pos = self.tell()
1719         return self.buffer.truncate(pos)
1720
1721     def detach(self):
1722         if self.buffer is None:
1723             raise ValueError("buffer is already detached")
1724         self.flush()
1725         buffer = self.buffer
1726         self.buffer = None
1727         return buffer
1728
1729     def seek(self, cookie, whence=0):
1730         if self.closed:
1731             raise ValueError("tell on closed file")
1732         if not self._seekable:
1733             raise IOError("underlying stream is not seekable")
1734         if whence == 1: # seek relative to current position
1735             if cookie != 0:
1736                 raise IOError("can't do nonzero cur-relative seeks")
1737             # Seeking to the current position should attempt to
1738             # sync the underlying buffer with the current position.
1739             whence = 0
1740             cookie = self.tell()
1741         if whence == 2: # seek relative to end of file
1742             if cookie != 0:
1743                 raise IOError("can't do nonzero end-relative seeks")
1744             self.flush()
1745             position = self.buffer.seek(0, 2)
1746             self._set_decoded_chars('')
1747             self._snapshot = None
1748             if self._decoder:
1749                 self._decoder.reset()
1750             return position
1751         if whence != 0:
1752             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1753                              (whence,))
1754         if cookie < 0:
1755             raise ValueError("negative seek position %r" % (cookie,))
1756         self.flush()
1757
1758         # The strategy of seek() is to go back to the safe start point
1759         # and replay the effect of read(chars_to_skip) from there.
1760         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1761             self._unpack_cookie(cookie)
1762
1763         # Seek back to the safe start point.
1764         self.buffer.seek(start_pos)
1765         self._set_decoded_chars('')
1766         self._snapshot = None
1767
1768         # Restore the decoder to its state from the safe start point.
1769         if cookie == 0 and self._decoder:
1770             self._decoder.reset()
1771         elif self._decoder or dec_flags or chars_to_skip:
1772             self._decoder = self._decoder or self._get_decoder()
1773             self._decoder.setstate((b'', dec_flags))
1774             self._snapshot = (dec_flags, b'')
1775
1776         if chars_to_skip:
1777             # Just like _read_chunk, feed the decoder and save a snapshot.
1778             input_chunk = self.buffer.read(bytes_to_feed)
1779             self._set_decoded_chars(
1780                 self._decoder.decode(input_chunk, need_eof))
1781             self._snapshot = (dec_flags, input_chunk)
1782
1783             # Skip chars_to_skip of the decoded characters.
1784             if len(self._decoded_chars) < chars_to_skip:
1785                 raise IOError("can't restore logical file position")
1786             self._decoded_chars_used = chars_to_skip
1787
1788         # Finally, reset the encoder (merely useful for proper BOM handling)
1789         try:
1790             encoder = self._encoder or self._get_encoder()
1791         except LookupError:
1792             # Sometimes the encoder doesn't exist
1793             pass
1794         else:
1795             if cookie != 0:
1796                 encoder.setstate(0)
1797             else:
1798                 encoder.reset()
1799         return cookie
1800
1801     def read(self, n=None):
1802         self._checkReadable()
1803         if n is None:
1804             n = -1
1805         decoder = self._decoder or self._get_decoder()
1806         try:
1807             n.__index__
1808         except AttributeError:
1809             raise TypeError("an integer is required")
1810         if n < 0:
1811             # Read everything.
1812             result = (self._get_decoded_chars() +
1813                       decoder.decode(self.buffer.read(), final=True))
1814             self._set_decoded_chars('')
1815             self._snapshot = None
1816             return result
1817         else:
1818             # Keep reading chunks until we have n characters to return.
1819             eof = False
1820             result = self._get_decoded_chars(n)
1821             while len(result) < n and not eof:
1822                 eof = not self._read_chunk()
1823                 result += self._get_decoded_chars(n - len(result))
1824             return result
1825
1826     def next(self):
1827         self._telling = False
1828         line = self.readline()
1829         if not line:
1830             self._snapshot = None
1831             self._telling = self._seekable
1832             raise StopIteration
1833         return line
1834
1835     def readline(self, limit=None):
1836         if self.closed:
1837             raise ValueError("read from closed file")
1838         if limit is None:
1839             limit = -1
1840         elif not isinstance(limit, (int, long)):
1841             raise TypeError("limit must be an integer")
1842
1843         # Grab all the decoded text (we will rewind any extra bits later).
1844         line = self._get_decoded_chars()
1845
1846         start = 0
1847         # Make the decoder if it doesn't already exist.
1848         if not self._decoder:
1849             self._get_decoder()
1850
1851         pos = endpos = None
1852         while True:
1853             if self._readtranslate:
1854                 # Newlines are already translated, only search for \n
1855                 pos = line.find('\n', start)
1856                 if pos >= 0:
1857                     endpos = pos + 1
1858                     break
1859                 else:
1860                     start = len(line)
1861
1862             elif self._readuniversal:
1863                 # Universal newline search. Find any of \r, \r\n, \n
1864                 # The decoder ensures that \r\n are not split in two pieces
1865
1866                 # In C we'd look for these in parallel of course.
1867                 nlpos = line.find("\n", start)
1868                 crpos = line.find("\r", start)
1869                 if crpos == -1:
1870                     if nlpos == -1:
1871                         # Nothing found
1872                         start = len(line)
1873                     else:
1874                         # Found \n
1875                         endpos = nlpos + 1
1876                         break
1877                 elif nlpos == -1:
1878                     # Found lone \r
1879                     endpos = crpos + 1
1880                     break
1881                 elif nlpos < crpos:
1882                     # Found \n
1883                     endpos = nlpos + 1
1884                     break
1885                 elif nlpos == crpos + 1:
1886                     # Found \r\n
1887                     endpos = crpos + 2
1888                     break
1889                 else:
1890                     # Found \r
1891                     endpos = crpos + 1
1892                     break
1893             else:
1894                 # non-universal
1895                 pos = line.find(self._readnl)
1896                 if pos >= 0:
1897                     endpos = pos + len(self._readnl)
1898                     break
1899
1900             if limit >= 0 and len(line) >= limit:
1901                 endpos = limit  # reached length limit
1902                 break
1903
1904             # No line ending seen yet - get more data'
1905             while self._read_chunk():
1906                 if self._decoded_chars:
1907                     break
1908             if self._decoded_chars:
1909                 line += self._get_decoded_chars()
1910             else:
1911                 # end of file
1912                 self._set_decoded_chars('')
1913                 self._snapshot = None
1914                 return line
1915
1916         if limit >= 0 and endpos > limit:
1917             endpos = limit  # don't exceed limit
1918
1919         # Rewind _decoded_chars to just after the line ending we found.
1920         self._rewind_decoded_chars(len(line) - endpos)
1921         return line[:endpos]
1922
1923     @property
1924     def newlines(self):
1925         return self._decoder.newlines if self._decoder else None
1926
1927
1928 class StringIO(TextIOWrapper):
1929     """Text I/O implementation using an in-memory buffer.
1930
1931     The initial_value argument sets the value of object.  The newline
1932     argument is like the one of TextIOWrapper's constructor.
1933     """
1934
1935     def __init__(self, initial_value="", newline="\n"):
1936         super(StringIO, self).__init__(BytesIO(),
1937                                        encoding="utf-8",
1938                                        errors="strict",
1939                                        newline=newline)
1940         # Issue #5645: make universal newlines semantics the same as in the
1941         # C version, even under Windows.
1942         if newline is None:
1943             self._writetranslate = False
1944         if initial_value:
1945             if not isinstance(initial_value, unicode):
1946                 initial_value = unicode(initial_value)
1947             self.write(initial_value)
1948             self.seek(0)
1949
1950     def getvalue(self):
1951         self.flush()
1952         return self.buffer.getvalue().decode(self._encoding, self._errors)
1953
1954     def __repr__(self):
1955         # TextIOWrapper tells the encoding in its repr. In StringIO,
1956         # that's a implementation detail.
1957         return object.__repr__(self)
1958
1959     @property
1960     def errors(self):
1961         return None
1962
1963     @property
1964     def encoding(self):
1965         return None
1966
1967     def detach(self):
1968         # This doesn't make sense on StringIO.
1969         self._unsupported("detach")