Lib/io.py

   1 """
   2 The io module provides the Python interfaces to stream handling. The
   3 builtin open function is defined in this module.
   4
   5 At the top of the I/O hierarchy is the abstract base class IOBase. It
   6 defines the basic interface to a stream. Note, however, that there is no
   7 seperation between reading and writing to streams; implementations are
   8 allowed to throw an IOError if they do not support a given operation.
   9
  10 Extending IOBase is RawIOBase which deals simply with the reading and
  11 writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
  12 an interface to OS files.
  13
  14 BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
  15 subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
  16 streams that are readable, writable, and both respectively.
  17 BufferedRandom provides a buffered interface to random access
  18 streams. BytesIO is a simple stream of in-memory bytes.
  19
  20 Another IOBase subclass, TextIOBase, deals with the encoding and decoding
  21 of streams into text. TextIOWrapper, which extends it, is a buffered text
  22 interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
  23 is a in-memory stream for text.
  24
  25 Argument names are not part of the specification, and only the arguments
  26 of open() are intended to be used as keyword arguments.
  27
  28 data:
  29
  30 DEFAULT_BUFFER_SIZE
  31
  32    An int containing the default buffer size used by the module's buffered
  33    I/O classes. open() uses the file's blksize (as obtained by os.stat) if
  34    possible.
  35 """
  36 # New I/O library conforming to PEP 3116.
  37
  38 # This is a prototype; hopefully eventually some of this will be
  39 # reimplemented in C.
  40
  41 # XXX edge cases when switching between reading/writing
  42 # XXX need to support 1 meaning line-buffered
  43 # XXX whenever an argument is None, use the default value
  44 # XXX read/write ops should check readable/writable
  45 # XXX buffered readinto should work with arbitrary buffer objects
  46 # XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
  47 # XXX check writable, readable and seekable in appropriate places
  48 from __future__ import print_function
  49 from __future__ import unicode_literals
  50
  51 __author__ = ("Guido van Rossum <guido@python.org>, "
  52               "Mike Verdone <mike.verdone@gmail.com>, "
  53               "Mark Russell <mark.russell@zen.co.uk>")
  54
  55 __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
  56            "BytesIO", "StringIO", "BufferedIOBase",
  57            "BufferedReader", "BufferedWriter", "BufferedRWPair",
  58            "BufferedRandom", "TextIOBase", "TextIOWrapper"]
  59
  60 import os
  61 import abc
  62 import codecs
  63 import _fileio
  64 import threading
  65
  66 # open() uses st_blksize whenever we can
  67 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  68
  69 # py3k has only new style classes
  70 __metaclass__ = type
  71
  72 class BlockingIOError(IOError):
  73
  74     """Exception raised when I/O would block on a non-blocking I/O stream."""
  75
  76     def __init__(self, errno, strerror, characters_written=0):
  77         IOError.__init__(self, errno, strerror)
  78         self.characters_written = characters_written
  79
  80
  81 def open(file, mode="r", buffering=None, encoding=None, errors=None,
  82          newline=None, closefd=True):
  83     r"""Open file and return a stream. If the file cannot be opened, an IOError is
  84     raised.
  85
  86     file is either a string giving the name (and the path if the file
  87     isn't in the current working directory) of the file to be opened or an
  88     integer file descriptor of the file to be wrapped. (If a file
  89     descriptor is given, it is closed when the returned I/O object is
  90     closed, unless closefd is set to False.)
  91
  92     mode is an optional string that specifies the mode in which the file
  93     is opened. It defaults to 'r' which means open for reading in text
  94     mode.  Other common values are 'w' for writing (truncating the file if
  95     it already exists), and 'a' for appending (which on some Unix systems,
  96     means that all writes append to the end of the file regardless of the
  97     current seek position). In text mode, if encoding is not specified the
  98     encoding used is platform dependent. (For reading and writing raw
  99     bytes use binary mode and leave encoding unspecified.) The available
 100     modes are:
 101
 102     ========= ===============================================================
 103     Character Meaning
 104     --------- ---------------------------------------------------------------
 105     'r'       open for reading (default)
 106     'w'       open for writing, truncating the file first
 107     'a'       open for writing, appending to the end of the file if it exists
 108     'b'       binary mode
 109     't'       text mode (default)
 110     '+'       open a disk file for updating (reading and writing)
 111     'U'       universal newline mode (for backwards compatibility; unneeded
 112               for new code)
 113     ========= ===============================================================
 114
 115     The default mode is 'rt' (open for reading text). For binary random
 116     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
 117     'r+b' opens the file without truncation.
 118
 119     Python distinguishes between files opened in binary and text modes,
 120     even when the underlying operating system doesn't. Files opened in
 121     binary mode (appending 'b' to the mode argument) return contents as
 122     bytes objects without any decoding. In text mode (the default, or when
 123     't' is appended to the mode argument), the contents of the file are
 124     returned as strings, the bytes having been first decoded using a
 125     platform-dependent encoding or using the specified encoding if given.
 126
 127     buffering is an optional integer used to set the buffering policy. By
 128     default full buffering is on. Pass 0 to switch buffering off (only
 129     allowed in binary mode), 1 to set line buffering, and an integer > 1
 130     for full buffering.
 131
 132     encoding is the name of the encoding used to decode or encode the
 133     file. This should only be used in text mode. The default encoding is
 134     platform dependent, but any encoding supported by Python can be
 135     passed.  See the codecs module for the list of supported encodings.
 136
 137     errors is an optional string that specifies how encoding errors are to
 138     be handled---this argument should not be used in binary mode. Pass
 139     'strict' to raise a ValueError exception if there is an encoding error
 140     (the default of None has the same effect), or pass 'ignore' to ignore
 141     errors. (Note that ignoring encoding errors can lead to data loss.)
 142     See the documentation for codecs.register for a list of the permitted
 143     encoding error strings.
 144
 145     newline controls how universal newlines works (it only applies to text
 146     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 147     follows:
 148
 149     * On input, if newline is None, universal newlines mode is
 150       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 151       these are translated into '\n' before being returned to the
 152       caller. If it is '', universal newline mode is enabled, but line
 153       endings are returned to the caller untranslated. If it has any of
 154       the other legal values, input lines are only terminated by the given
 155       string, and the line ending is returned to the caller untranslated.
 156
 157     * On output, if newline is None, any '\n' characters written are
 158       translated to the system default line separator, os.linesep. If
 159       newline is '', no translation takes place. If newline is any of the
 160       other legal values, any '\n' characters written are translated to
 161       the given string.
 162
 163     If closefd is False, the underlying file descriptor will be kept open
 164     when the file is closed. This does not work when a file name is given
 165     and must be True in that case.
 166
 167     open() returns a file object whose type depends on the mode, and
 168     through which the standard file operations such as reading and writing
 169     are performed. When open() is used to open a file in a text mode ('w',
 170     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 171     a file in a binary mode, the returned class varies: in read binary
 172     mode, it returns a BufferedReader; in write binary and append binary
 173     modes, it returns a BufferedWriter, and in read/write mode, it returns
 174     a BufferedRandom.
 175
 176     It is also possible to use a string or bytearray as a file for both
 177     reading and writing. For strings StringIO can be used like a file
 178     opened in a text mode, and for bytes a BytesIO can be used like a file
 179     opened in a binary mode.
 180     """
 181     if not isinstance(file, (basestring, int)):
 182         raise TypeError("invalid file: %r" % file)
 183     if not isinstance(mode, basestring):
 184         raise TypeError("invalid mode: %r" % mode)
 185     if buffering is not None and not isinstance(buffering, int):
 186         raise TypeError("invalid buffering: %r" % buffering)
 187     if encoding is not None and not isinstance(encoding, basestring):
 188         raise TypeError("invalid encoding: %r" % encoding)
 189     if errors is not None and not isinstance(errors, basestring):
 190         raise TypeError("invalid errors: %r" % errors)
 191     modes = set(mode)
 192     if modes - set("arwb+tU") or len(mode) > len(modes):
 193         raise ValueError("invalid mode: %r" % mode)
 194     reading = "r" in modes
 195     writing = "w" in modes
 196     appending = "a" in modes
 197     updating = "+" in modes
 198     text = "t" in modes
 199     binary = "b" in modes
 200     if "U" in modes:
 201         if writing or appending:
 202             raise ValueError("can't use U and writing mode at once")
 203         reading = True
 204     if text and binary:
 205         raise ValueError("can't have text and binary mode at once")
 206     if reading + writing + appending > 1:
 207         raise ValueError("can't have read/write/append mode at once")
 208     if not (reading or writing or appending):
 209         raise ValueError("must have exactly one of read/write/append mode")
 210     if binary and encoding is not None:
 211         raise ValueError("binary mode doesn't take an encoding argument")
 212     if binary and errors is not None:
 213         raise ValueError("binary mode doesn't take an errors argument")
 214     if binary and newline is not None:
 215         raise ValueError("binary mode doesn't take a newline argument")
 216     raw = FileIO(file,
 217                  (reading and "r" or "") +
 218                  (writing and "w" or "") +
 219                  (appending and "a" or "") +
 220                  (updating and "+" or ""),
 221                  closefd)
 222     if buffering is None:
 223         buffering = -1
 224     line_buffering = False
 225     if buffering == 1 or buffering < 0 and raw.isatty():
 226         buffering = -1
 227         line_buffering = True
 228     if buffering < 0:
 229         buffering = DEFAULT_BUFFER_SIZE
 230         try:
 231             bs = os.fstat(raw.fileno()).st_blksize
 232         except (os.error, AttributeError):
 233             pass
 234         else:
 235             if bs > 1:
 236                 buffering = bs
 237     if buffering < 0:
 238         raise ValueError("invalid buffering size")
 239     if buffering == 0:
 240         if binary:
 241             return raw
 242         raise ValueError("can't have unbuffered text I/O")
 243     if updating:
 244         buffer = BufferedRandom(raw, buffering)
 245     elif writing or appending:
 246         buffer = BufferedWriter(raw, buffering)
 247     elif reading:
 248         buffer = BufferedReader(raw, buffering)
 249     else:
 250         raise ValueError("unknown mode: %r" % mode)
 251     if binary:
 252         return buffer
 253     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 254     text.mode = mode
 255     return text
 256
 257 class _DocDescriptor:
 258     """Helper for builtins.open.__doc__
 259     """
 260     def __get__(self, obj, typ):
 261         return (
 262             "open(file, mode='r', buffering=None, encoding=None, "
 263                  "errors=None, newline=None, closefd=True)\n\n" +
 264             open.__doc__)
 265
 266 class OpenWrapper:
 267     """Wrapper for builtins.open
 268
 269     Trick so that open won't become a bound method when stored
 270     as a class variable (as dumbdbm does).
 271
 272     See initstdio() in Python/pythonrun.c.
 273     """
 274     __doc__ = _DocDescriptor()
 275
 276     def __new__(cls, *args, **kwargs):
 277         return open(*args, **kwargs)
 278
 279
 280 class UnsupportedOperation(ValueError, IOError):
 281     pass
 282
 283
 284 class IOBase(object):
 285
 286     """The abstract base class for all I/O classes, acting on streams of
 287     bytes. There is no public constructor.
 288
 289     This class provides dummy implementations for many methods that
 290     derived classes can override selectively; the default implementations
 291     represent a file that cannot be read, written or seeked.
 292
 293     Even though IOBase does not declare read, readinto, or write because
 294     their signatures will vary, implementations and clients should
 295     consider those methods part of the interface. Also, implementations
 296     may raise a IOError when operations they do not support are called.
 297
 298     The basic type used for binary data read from or written to a file is
 299     bytes. bytearrays are accepted too, and in some cases (such as
 300     readinto) needed. Text I/O classes work with str data.
 301
 302     Note that calling any method (even inquiries) on a closed stream is
 303     undefined. Implementations may raise IOError in this case.
 304
 305     IOBase (and its subclasses) support the iterator protocol, meaning
 306     that an IOBase object can be iterated over yielding the lines in a
 307     stream.
 308
 309     IOBase also supports the :keyword:`with` statement. In this example,
 310     fp is closed after the suite of the with statment is complete:
 311
 312     with open('spam.txt', 'r') as fp:
 313         fp.write('Spam and eggs!')
 314     """
 315
 316     __metaclass__ = abc.ABCMeta
 317
 318     ### Internal ###
 319
 320     def _unsupported(self, name):
 321         """Internal: raise an exception for unsupported operations."""
 322         raise UnsupportedOperation("%s.%s() not supported" %
 323                                    (self.__class__.__name__, name))
 324
 325     ### Positioning ###
 326
 327     def seek(self, pos, whence = 0):
 328         """Change stream position.
 329
 330         Change the stream position to byte offset offset. offset is
 331         interpreted relative to the position indicated by whence.  Values
 332         for whence are:
 333
 334         * 0 -- start of stream (the default); offset should be zero or positive
 335         * 1 -- current stream position; offset may be negative
 336         * 2 -- end of stream; offset is usually negative
 337
 338         Return the new absolute position.
 339         """
 340         self._unsupported("seek")
 341
 342     def tell(self):
 343         """Return current stream position."""
 344         return self.seek(0, 1)
 345
 346     def truncate(self, pos = None):
 347         """Truncate file to size bytes.
 348
 349         Size defaults to the current IO position as reported by tell().  Return
 350         the new size.
 351         """
 352         self._unsupported("truncate")
 353
 354     ### Flush and close ###
 355
 356     def flush(self):
 357         """Flush write buffers, if applicable.
 358
 359         This is not implemented for read-only and non-blocking streams.
 360         """
 361         # XXX Should this return the number of bytes written???
 362
 363     __closed = False
 364
 365     def close(self):
 366         """Flush and close the IO object.
 367
 368         This method has no effect if the file is already closed.
 369         """
 370         if not self.__closed:
 371             try:
 372                 self.flush()
 373             except IOError:
 374                 pass  # If flush() fails, just give up
 375             self.__closed = True
 376
 377     def __del__(self):
 378         """Destructor.  Calls close()."""
 379         # The try/except block is in case this is called at program
 380         # exit time, when it's possible that globals have already been
 381         # deleted, and then the close() call might fail.  Since
 382         # there's nothing we can do about such failures and they annoy
 383         # the end users, we suppress the traceback.
 384         try:
 385             self.close()
 386         except:
 387             pass
 388
 389     ### Inquiries ###
 390
 391     def seekable(self):
 392         """Return whether object supports random access.
 393
 394         If False, seek(), tell() and truncate() will raise IOError.
 395         This method may need to do a test seek().
 396         """
 397         return False
 398
 399     def _checkSeekable(self, msg=None):
 400         """Internal: raise an IOError if file is not seekable
 401         """
 402         if not self.seekable():
 403             raise IOError("File or stream is not seekable."
 404                           if msg is None else msg)
 405
 406
 407     def readable(self):
 408         """Return whether object was opened for reading.
 409
 410         If False, read() will raise IOError.
 411         """
 412         return False
 413
 414     def _checkReadable(self, msg=None):
 415         """Internal: raise an IOError if file is not readable
 416         """
 417         if not self.readable():
 418             raise IOError("File or stream is not readable."
 419                           if msg is None else msg)
 420
 421     def writable(self):
 422         """Return whether object was opened for writing.
 423
 424         If False, write() and truncate() will raise IOError.
 425         """
 426         return False
 427
 428     def _checkWritable(self, msg=None):
 429         """Internal: raise an IOError if file is not writable
 430         """
 431         if not self.writable():
 432             raise IOError("File or stream is not writable."
 433                           if msg is None else msg)
 434
 435     @property
 436     def closed(self):
 437         """closed: bool.  True iff the file has been closed.
 438
 439         For backwards compatibility, this is a property, not a predicate.
 440         """
 441         return self.__closed
 442
 443     def _checkClosed(self, msg=None):
 444         """Internal: raise an ValueError if file is closed
 445         """
 446         if self.closed:
 447             raise ValueError("I/O operation on closed file."
 448                              if msg is None else msg)
 449
 450     ### Context manager ###
 451
 452     def __enter__(self):
 453         """Context management protocol.  Returns self."""
 454         self._checkClosed()
 455         return self
 456
 457     def __exit__(self, *args):
 458         """Context management protocol.  Calls close()"""
 459         self.close()
 460
 461     ### Lower-level APIs ###
 462
 463     # XXX Should these be present even if unimplemented?
 464
 465     def fileno(self):
 466         """Returns underlying file descriptor if one exists.
 467
 468         An IOError is raised if the IO object does not use a file descriptor.
 469         """
 470         self._unsupported("fileno")
 471
 472     def isatty(self):
 473         """Return whether this is an 'interactive' stream.
 474
 475         Return False if it can't be determined.
 476         """
 477         self._checkClosed()
 478         return False
 479
 480     ### Readline[s] and writelines ###
 481
 482     def readline(self, limit = -1):
 483         r"""Read and return a line from the stream.
 484
 485         If limit is specified, at most limit bytes will be read.
 486
 487         The line terminator is always b'\n' for binary files; for text
 488         files, the newlines argument to open can be used to select the line
 489         terminator(s) recognized.
 490         """
 491         self._checkClosed()
 492         if hasattr(self, "peek"):
 493             def nreadahead():
 494                 readahead = self.peek(1)
 495                 if not readahead:
 496                     return 1
 497                 n = (readahead.find(b"\n") + 1) or len(readahead)
 498                 if limit >= 0:
 499                     n = min(n, limit)
 500                 return n
 501         else:
 502             def nreadahead():
 503                 return 1
 504         if limit is None:
 505             limit = -1
 506         if not isinstance(limit, (int, long)):
 507             raise TypeError("limit must be an integer")
 508         res = bytearray()
 509         while limit < 0 or len(res) < limit:
 510             b = self.read(nreadahead())
 511             if not b:
 512                 break
 513             res += b
 514             if res.endswith(b"\n"):
 515                 break
 516         return bytes(res)
 517
 518     def __iter__(self):
 519         self._checkClosed()
 520         return self
 521
 522     def next(self):
 523         line = self.readline()
 524         if not line:
 525             raise StopIteration
 526         return line
 527
 528     def readlines(self, hint=None):
 529         """Return a list of lines from the stream.
 530
 531         hint can be specified to control the number of lines read: no more
 532         lines will be read if the total size (in bytes/characters) of all
 533         lines so far exceeds hint.
 534         """
 535         if hint is None:
 536             hint = -1
 537         if not isinstance(hint, (int, long)):
 538             raise TypeError("hint must be an integer")
 539         if hint <= 0:
 540             return list(self)
 541         n = 0
 542         lines = []
 543         for line in self:
 544             lines.append(line)
 545             n += len(line)
 546             if n >= hint:
 547                 break
 548         return lines
 549
 550     def writelines(self, lines):
 551         self._checkClosed()
 552         for line in lines:
 553             self.write(line)
 554
 555
 556 class RawIOBase(IOBase):
 557
 558     """Base class for raw binary I/O."""
 559
 560     # The read() method is implemented by calling readinto(); derived
 561     # classes that want to support read() only need to implement
 562     # readinto() as a primitive operation.  In general, readinto() can be
 563     # more efficient than read().
 564
 565     # (It would be tempting to also provide an implementation of
 566     # readinto() in terms of read(), in case the latter is a more suitable
 567     # primitive operation, but that would lead to nasty recursion in case
 568     # a subclass doesn't implement either.)
 569
 570     def read(self, n = -1):
 571         """Read and return up to n bytes.
 572
 573         Returns an empty bytes array on EOF, or None if the object is
 574         set not to block and has no data to read.
 575         """
 576         if n is None:
 577             n = -1
 578         if n < 0:
 579             return self.readall()
 580         b = bytearray(n.__index__())
 581         n = self.readinto(b)
 582         del b[n:]
 583         return bytes(b)
 584
 585     def readall(self):
 586         """Read until EOF, using multiple read() call."""
 587         res = bytearray()
 588         while True:
 589             data = self.read(DEFAULT_BUFFER_SIZE)
 590             if not data:
 591                 break
 592             res += data
 593         return bytes(res)
 594
 595     def readinto(self, b):
 596         """Read up to len(b) bytes into b.
 597
 598         Returns number of bytes read (0 for EOF), or None if the object
 599         is set not to block as has no data to read.
 600         """
 601         self._unsupported("readinto")
 602
 603     def write(self, b):
 604         """Write the given buffer to the IO stream.
 605
 606         Returns the number of bytes written, which may be less than len(b).
 607         """
 608         self._unsupported("write")
 609
 610
 611 class FileIO(_fileio._FileIO, RawIOBase):
 612
 613     """Raw I/O implementation for OS files."""
 614
 615     # This multiply inherits from _FileIO and RawIOBase to make
 616     # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
 617     # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
 618     # to do since _fileio.c is written in C).
 619
 620     def __init__(self, name, mode="r", closefd=True):
 621         _fileio._FileIO.__init__(self, name, mode, closefd)
 622         self._name = name
 623
 624     def close(self):
 625         _fileio._FileIO.close(self)
 626         RawIOBase.close(self)
 627
 628     @property
 629     def name(self):
 630         return self._name
 631
 632
 633 class BufferedIOBase(IOBase):
 634
 635     """Base class for buffered IO objects.
 636
 637     The main difference with RawIOBase is that the read() method
 638     supports omitting the size argument, and does not have a default
 639     implementation that defers to readinto().
 640
 641     In addition, read(), readinto() and write() may raise
 642     BlockingIOError if the underlying raw stream is in non-blocking
 643     mode and not ready; unlike their raw counterparts, they will never
 644     return None.
 645
 646     A typical implementation should not inherit from a RawIOBase
 647     implementation, but wrap one.
 648     """
 649
 650     def read(self, n = None):
 651         """Read and return up to n bytes.
 652
 653         If the argument is omitted, None, or negative, reads and
 654         returns all data until EOF.
 655
 656         If the argument is positive, and the underlying raw stream is
 657         not 'interactive', multiple raw reads may be issued to satisfy
 658         the byte count (unless EOF is reached first).  But for
 659         interactive raw streams (XXX and for pipes?), at most one raw
 660         read will be issued, and a short result does not imply that
 661         EOF is imminent.
 662
 663         Returns an empty bytes array on EOF.
 664
 665         Raises BlockingIOError if the underlying raw stream has no
 666         data at the moment.
 667         """
 668         self._unsupported("read")
 669
 670     def readinto(self, b):
 671         """Read up to len(b) bytes into b.
 672
 673         Like read(), this may issue multiple reads to the underlying raw
 674         stream, unless the latter is 'interactive'.
 675
 676         Returns the number of bytes read (0 for EOF).
 677
 678         Raises BlockingIOError if the underlying raw stream has no
 679         data at the moment.
 680         """
 681         # XXX This ought to work with anything that supports the buffer API
 682         data = self.read(len(b))
 683         n = len(data)
 684         try:
 685             b[:n] = data
 686         except TypeError as err:
 687             import array
 688             if not isinstance(b, array.array):
 689                 raise err
 690             b[:n] = array.array(b'b', data)
 691         return n
 692
 693     def write(self, b):
 694         """Write the given buffer to the IO stream.
 695
 696         Return the number of bytes written, which is never less than
 697         len(b).
 698
 699         Raises BlockingIOError if the buffer is full and the
 700         underlying raw stream cannot accept more data at the moment.
 701         """
 702         self._unsupported("write")
 703
 704
 705 class _BufferedIOMixin(BufferedIOBase):
 706
 707     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 708
 709     This passes most requests on to the underlying raw stream.  It
 710     does *not* provide implementations of read(), readinto() or
 711     write().
 712     """
 713
 714     def __init__(self, raw):
 715         self.raw = raw
 716
 717     ### Positioning ###
 718
 719     def seek(self, pos, whence=0):
 720         return self.raw.seek(pos, whence)
 721
 722     def tell(self):
 723         return self.raw.tell()
 724
 725     def truncate(self, pos=None):
 726         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 727         # and a flush may be necessary to synch both views of the current
 728         # file state.
 729         self.flush()
 730
 731         if pos is None:
 732             pos = self.tell()
 733         # XXX: Should seek() be used, instead of passing the position
 734         # XXX  directly to truncate?
 735         return self.raw.truncate(pos)
 736
 737     ### Flush and close ###
 738
 739     def flush(self):
 740         self.raw.flush()
 741
 742     def close(self):
 743         if not self.closed:
 744             try:
 745                 self.flush()
 746             except IOError:
 747                 pass  # If flush() fails, just give up
 748             self.raw.close()
 749
 750     ### Inquiries ###
 751
 752     def seekable(self):
 753         return self.raw.seekable()
 754
 755     def readable(self):
 756         return self.raw.readable()
 757
 758     def writable(self):
 759         return self.raw.writable()
 760
 761     @property
 762     def closed(self):
 763         return self.raw.closed
 764
 765     @property
 766     def name(self):
 767         return self.raw.name
 768
 769     @property
 770     def mode(self):
 771         return self.raw.mode
 772
 773     ### Lower-level APIs ###
 774
 775     def fileno(self):
 776         return self.raw.fileno()
 777
 778     def isatty(self):
 779         return self.raw.isatty()
 780
 781
 782 class _BytesIO(BufferedIOBase):
 783
 784     """Buffered I/O implementation using an in-memory bytes buffer."""
 785
 786     # XXX More docs
 787
 788     def __init__(self, initial_bytes=None):
 789         buf = bytearray()
 790         if initial_bytes is not None:
 791             buf += bytearray(initial_bytes)
 792         self._buffer = buf
 793         self._pos = 0
 794
 795     def getvalue(self):
 796         """Return the bytes value (contents) of the buffer
 797         """
 798         if self.closed:
 799             raise ValueError("getvalue on closed file")
 800         return bytes(self._buffer)
 801
 802     def read(self, n=None):
 803         if self.closed:
 804             raise ValueError("read from closed file")
 805         if n is None:
 806             n = -1
 807         if not isinstance(n, (int, long)):
 808             raise TypeError("argument must be an integer")
 809         if n < 0:
 810             n = len(self._buffer)
 811         if len(self._buffer) <= self._pos:
 812             return b""
 813         newpos = min(len(self._buffer), self._pos + n)
 814         b = self._buffer[self._pos : newpos]
 815         self._pos = newpos
 816         return bytes(b)
 817
 818     def read1(self, n):
 819         """this is the same as read.
 820         """
 821         return self.read(n)
 822
 823     def write(self, b):
 824         if self.closed:
 825             raise ValueError("write to closed file")
 826         if isinstance(b, unicode):
 827             raise TypeError("can't write unicode to binary stream")
 828         n = len(b)
 829         if n == 0:
 830             return 0
 831         pos = self._pos
 832         if pos > len(self._buffer):
 833             # Inserts null bytes between the current end of the file
 834             # and the new write position.
 835             padding = b'\x00' * (pos - len(self._buffer))
 836             self._buffer += padding
 837         self._buffer[pos:pos + n] = b
 838         self._pos += n
 839         return n
 840
 841     def seek(self, pos, whence=0):
 842         if self.closed:
 843             raise ValueError("seek on closed file")
 844         try:
 845             pos = pos.__index__()
 846         except AttributeError as err:
 847             raise TypeError("an integer is required") # from err
 848         if whence == 0:
 849             if pos < 0:
 850                 raise ValueError("negative seek position %r" % (pos,))
 851             self._pos = pos
 852         elif whence == 1:
 853             self._pos = max(0, self._pos + pos)
 854         elif whence == 2:
 855             self._pos = max(0, len(self._buffer) + pos)
 856         else:
 857             raise ValueError("invalid whence value")
 858         return self._pos
 859
 860     def tell(self):
 861         if self.closed:
 862             raise ValueError("tell on closed file")
 863         return self._pos
 864
 865     def truncate(self, pos=None):
 866         if self.closed:
 867             raise ValueError("truncate on closed file")
 868         if pos is None:
 869             pos = self._pos
 870         elif pos < 0:
 871             raise ValueError("negative truncate position %r" % (pos,))
 872         del self._buffer[pos:]
 873         return self.seek(pos)
 874
 875     def readable(self):
 876         return True
 877
 878     def writable(self):
 879         return True
 880
 881     def seekable(self):
 882         return True
 883
 884 # Use the faster implementation of BytesIO if available
 885 try:
 886     import _bytesio
 887
 888     class BytesIO(_bytesio._BytesIO, BufferedIOBase):
 889         __doc__ = _bytesio._BytesIO.__doc__
 890
 891 except ImportError:
 892     BytesIO = _BytesIO
 893
 894
 895 class BufferedReader(_BufferedIOMixin):
 896
 897     """BufferedReader(raw[, buffer_size])
 898
 899     A buffer for a readable, sequential BaseRawIO object.
 900
 901     The constructor creates a BufferedReader for the given readable raw
 902     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 903     is used.
 904     """
 905
 906     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 907         """Create a new buffered reader using the given readable raw IO object.
 908         """
 909         raw._checkReadable()
 910         _BufferedIOMixin.__init__(self, raw)
 911         self.buffer_size = buffer_size
 912         self._reset_read_buf()
 913         self._read_lock = threading.Lock()
 914
 915     def _reset_read_buf(self):
 916         self._read_buf = b""
 917         self._read_pos = 0
 918
 919     def read(self, n=None):
 920         """Read n bytes.
 921
 922         Returns exactly n bytes of data unless the underlying raw IO
 923         stream reaches EOF or if the call would block in non-blocking
 924         mode. If n is negative, read until EOF or until read() would
 925         block.
 926         """
 927         with self._read_lock:
 928             return self._read_unlocked(n)
 929
 930     def _read_unlocked(self, n=None):
 931         nodata_val = b""
 932         empty_values = (b"", None)
 933         buf = self._read_buf
 934         pos = self._read_pos
 935
 936         # Special case for when the number of bytes to read is unspecified.
 937         if n is None or n == -1:
 938             self._reset_read_buf()
 939             chunks = [buf[pos:]]  # Strip the consumed bytes.
 940             current_size = 0
 941             while True:
 942                 # Read until EOF or until read() would block.
 943                 chunk = self.raw.read()
 944                 if chunk in empty_values:
 945                     nodata_val = chunk
 946                     break
 947                 current_size += len(chunk)
 948                 chunks.append(chunk)
 949             return b"".join(chunks) or nodata_val
 950
 951         # The number of bytes to read is specified, return at most n bytes.
 952         avail = len(buf) - pos  # Length of the available buffered data.
 953         if n <= avail:
 954             # Fast path: the data to read is fully buffered.
 955             self._read_pos += n
 956             return buf[pos:pos+n]
 957         # Slow path: read from the stream until enough bytes are read,
 958         # or until an EOF occurs or until read() would block.
 959         chunks = [buf[pos:]]
 960         wanted = max(self.buffer_size, n)
 961         while avail < n:
 962             chunk = self.raw.read(wanted)
 963             if chunk in empty_values:
 964                 nodata_val = chunk
 965                 break
 966             avail += len(chunk)
 967             chunks.append(chunk)
 968         # n is more then avail only when an EOF occurred or when
 969         # read() would have blocked.
 970         n = min(n, avail)
 971         out = b"".join(chunks)
 972         self._read_buf = out[n:]  # Save the extra data in the buffer.
 973         self._read_pos = 0
 974         return out[:n] if out else nodata_val
 975
 976     def peek(self, n=0):
 977         """Returns buffered bytes without advancing the position.
 978
 979         The argument indicates a desired minimal number of bytes; we
 980         do at most one raw read to satisfy it.  We never return more
 981         than self.buffer_size.
 982         """
 983         with self._read_lock:
 984             return self._peek_unlocked(n)
 985
 986     def _peek_unlocked(self, n=0):
 987         want = min(n, self.buffer_size)
 988         have = len(self._read_buf) - self._read_pos
 989         if have < want:
 990             to_read = self.buffer_size - have
 991             current = self.raw.read(to_read)
 992             if current:
 993                 self._read_buf = self._read_buf[self._read_pos:] + current
 994                 self._read_pos = 0
 995         return self._read_buf[self._read_pos:]
 996
 997     def read1(self, n):
 998         """Reads up to n bytes, with at most one read() system call."""
 999         # Returns up to n bytes.  If at least one byte is buffered, we
1000         # only return buffered bytes.  Otherwise, we do one raw read.
1001         if n <= 0:
1002             return b""
1003         with self._read_lock:
1004             self._peek_unlocked(1)
1005             return self._read_unlocked(
1006                 min(n, len(self._read_buf) - self._read_pos))
1007
1008     def tell(self):
1009         return self.raw.tell() - len(self._read_buf) + self._read_pos
1010
1011     def seek(self, pos, whence=0):
1012         with self._read_lock:
1013             if whence == 1:
1014                 pos -= len(self._read_buf) - self._read_pos
1015             pos = self.raw.seek(pos, whence)
1016             self._reset_read_buf()
1017             return pos
1018
1019
1020 class BufferedWriter(_BufferedIOMixin):
1021
1022     """A buffer for a writeable sequential RawIO object.
1023
1024     The constructor creates a BufferedWriter for the given writeable raw
1025     stream. If the buffer_size is not given, it defaults to
1026     DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1027     twice the buffer size.
1028     """
1029
1030     def __init__(self, raw,
1031                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1032         raw._checkWritable()
1033         _BufferedIOMixin.__init__(self, raw)
1034         self.buffer_size = buffer_size
1035         self.max_buffer_size = (2*buffer_size
1036                                 if max_buffer_size is None
1037                                 else max_buffer_size)
1038         self._write_buf = bytearray()
1039         self._write_lock = threading.Lock()
1040
1041     def write(self, b):
1042         if self.closed:
1043             raise ValueError("write to closed file")
1044         if isinstance(b, unicode):
1045             raise TypeError("can't write unicode to binary stream")
1046         with self._write_lock:
1047             # XXX we can implement some more tricks to try and avoid
1048             # partial writes
1049             if len(self._write_buf) > self.buffer_size:
1050                 # We're full, so let's pre-flush the buffer
1051                 try:
1052                     self._flush_unlocked()
1053                 except BlockingIOError as e:
1054                     # We can't accept anything else.
1055                     # XXX Why not just let the exception pass through?
1056                     raise BlockingIOError(e.errno, e.strerror, 0)
1057             before = len(self._write_buf)
1058             self._write_buf.extend(b)
1059             written = len(self._write_buf) - before
1060             if len(self._write_buf) > self.buffer_size:
1061                 try:
1062                     self._flush_unlocked()
1063                 except BlockingIOError as e:
1064                     if len(self._write_buf) > self.max_buffer_size:
1065                         # We've hit max_buffer_size. We have to accept a
1066                         # partial write and cut back our buffer.
1067                         overage = len(self._write_buf) - self.max_buffer_size
1068                         self._write_buf = self._write_buf[:self.max_buffer_size]
1069                         raise BlockingIOError(e.errno, e.strerror, overage)
1070             return written
1071
1072     def truncate(self, pos=None):
1073         with self._write_lock:
1074             self._flush_unlocked()
1075             if pos is None:
1076                 pos = self.raw.tell()
1077             return self.raw.truncate(pos)
1078
1079     def flush(self):
1080         with self._write_lock:
1081             self._flush_unlocked()
1082
1083     def _flush_unlocked(self):
1084         if self.closed:
1085             raise ValueError("flush of closed file")
1086         written = 0
1087         try:
1088             while self._write_buf:
1089                 n = self.raw.write(self._write_buf)
1090                 del self._write_buf[:n]
1091                 written += n
1092         except BlockingIOError as e:
1093             n = e.characters_written
1094             del self._write_buf[:n]
1095             written += n
1096             raise BlockingIOError(e.errno, e.strerror, written)
1097
1098     def tell(self):
1099         return self.raw.tell() + len(self._write_buf)
1100
1101     def seek(self, pos, whence=0):
1102         with self._write_lock:
1103             self._flush_unlocked()
1104             return self.raw.seek(pos, whence)
1105
1106
1107 class BufferedRWPair(BufferedIOBase):
1108
1109     """A buffered reader and writer object together.
1110
1111     A buffered reader object and buffered writer object put together to
1112     form a sequential IO object that can read and write. This is typically
1113     used with a socket or two-way pipe.
1114
1115     reader and writer are RawIOBase objects that are readable and
1116     writeable respectively. If the buffer_size is omitted it defaults to
1117     DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1118     defaults to twice the buffer size.
1119     """
1120
1121     # XXX The usefulness of this (compared to having two separate IO
1122     # objects) is questionable.
1123
1124     def __init__(self, reader, writer,
1125                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1126         """Constructor.
1127
1128         The arguments are two RawIO instances.
1129         """
1130         reader._checkReadable()
1131         writer._checkWritable()
1132         self.reader = BufferedReader(reader, buffer_size)
1133         self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1134
1135     def read(self, n=None):
1136         if n is None:
1137             n = -1
1138         return self.reader.read(n)
1139
1140     def readinto(self, b):
1141         return self.reader.readinto(b)
1142
1143     def write(self, b):
1144         return self.writer.write(b)
1145
1146     def peek(self, n=0):
1147         return self.reader.peek(n)
1148
1149     def read1(self, n):
1150         return self.reader.read1(n)
1151
1152     def readable(self):
1153         return self.reader.readable()
1154
1155     def writable(self):
1156         return self.writer.writable()
1157
1158     def flush(self):
1159         return self.writer.flush()
1160
1161     def close(self):
1162         self.writer.close()
1163         self.reader.close()
1164
1165     def isatty(self):
1166         return self.reader.isatty() or self.writer.isatty()
1167
1168     @property
1169     def closed(self):
1170         return self.writer.closed
1171
1172
1173 class BufferedRandom(BufferedWriter, BufferedReader):
1174
1175     """A buffered interface to random access streams.
1176
1177     The constructor creates a reader and writer for a seekable stream,
1178     raw, given in the first argument. If the buffer_size is omitted it
1179     defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1180     writer) defaults to twice the buffer size.
1181     """
1182
1183     def __init__(self, raw,
1184                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1185         raw._checkSeekable()
1186         BufferedReader.__init__(self, raw, buffer_size)
1187         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1188
1189     def seek(self, pos, whence=0):
1190         self.flush()
1191         # First do the raw seek, then empty the read buffer, so that
1192         # if the raw seek fails, we don't lose buffered data forever.
1193         pos = self.raw.seek(pos, whence)
1194         with self._read_lock:
1195             self._reset_read_buf()
1196         return pos
1197
1198     def tell(self):
1199         if self._write_buf:
1200             return self.raw.tell() + len(self._write_buf)
1201         else:
1202             return BufferedReader.tell(self)
1203
1204     def truncate(self, pos=None):
1205         if pos is None:
1206             pos = self.tell()
1207         # Use seek to flush the read buffer.
1208         self.seek(pos)
1209         return BufferedWriter.truncate(self)
1210
1211     def read(self, n=None):
1212         if n is None:
1213             n = -1
1214         self.flush()
1215         return BufferedReader.read(self, n)
1216
1217     def readinto(self, b):
1218         self.flush()
1219         return BufferedReader.readinto(self, b)
1220
1221     def peek(self, n=0):
1222         self.flush()
1223         return BufferedReader.peek(self, n)
1224
1225     def read1(self, n):
1226         self.flush()
1227         return BufferedReader.read1(self, n)
1228
1229     def write(self, b):
1230         if self._read_buf:
1231             # Undo readahead
1232             with self._read_lock:
1233                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1234                 self._reset_read_buf()
1235         return BufferedWriter.write(self, b)
1236
1237
1238 class TextIOBase(IOBase):
1239
1240     """Base class for text I/O.
1241
1242     This class provides a character and line based interface to stream
1243     I/O. There is no readinto method because Python's character strings
1244     are immutable. There is no public constructor.
1245     """
1246
1247     def read(self, n = -1):
1248         """Read at most n characters from stream.
1249
1250         Read from underlying buffer until we have n characters or we hit EOF.
1251         If n is negative or omitted, read until EOF.
1252         """
1253         self._unsupported("read")
1254
1255     def write(self, s):
1256         """Write string s to stream."""
1257         self._unsupported("write")
1258
1259     def truncate(self, pos = None):
1260         """Truncate size to pos."""
1261         self._unsupported("truncate")
1262
1263     def readline(self):
1264         """Read until newline or EOF.
1265
1266         Returns an empty string if EOF is hit immediately.
1267         """
1268         self._unsupported("readline")
1269
1270     @property
1271     def encoding(self):
1272         """Subclasses should override."""
1273         return None
1274
1275     @property
1276     def newlines(self):
1277         """Line endings translated so far.
1278
1279         Only line endings translated during reading are considered.
1280
1281         Subclasses should override.
1282         """
1283         return None
1284
1285
1286 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1287     """Codec used when reading a file in universal newlines mode.
1288     It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1289     It also records the types of newlines encountered.
1290     When used with translate=False, it ensures that the newline sequence is
1291     returned in one piece.
1292     """
1293     def __init__(self, decoder, translate, errors='strict'):
1294         codecs.IncrementalDecoder.__init__(self, errors=errors)
1295         self.translate = translate
1296         self.decoder = decoder
1297         self.seennl = 0
1298         self.pendingcr = False
1299
1300     def decode(self, input, final=False):
1301         # decode input (with the eventual \r from a previous pass)
1302         output = self.decoder.decode(input, final=final)
1303         if self.pendingcr and (output or final):
1304             output = "\r" + output
1305             self.pendingcr = False
1306
1307         # retain last \r even when not translating data:
1308         # then readline() is sure to get \r\n in one pass
1309         if output.endswith("\r") and not final:
1310             output = output[:-1]
1311             self.pendingcr = True
1312
1313         # Record which newlines are read
1314         crlf = output.count('\r\n')
1315         cr = output.count('\r') - crlf
1316         lf = output.count('\n') - crlf
1317         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1318                     | (crlf and self._CRLF)
1319
1320         if self.translate:
1321             if crlf:
1322                 output = output.replace("\r\n", "\n")
1323             if cr:
1324                 output = output.replace("\r", "\n")
1325
1326         return output
1327
1328     def getstate(self):
1329         buf, flag = self.decoder.getstate()
1330         flag <<= 1
1331         if self.pendingcr:
1332             flag |= 1
1333         return buf, flag
1334
1335     def setstate(self, state):
1336         buf, flag = state
1337         self.pendingcr = bool(flag & 1)
1338         self.decoder.setstate((buf, flag >> 1))
1339
1340     def reset(self):
1341         self.seennl = 0
1342         self.pendingcr = False
1343         self.decoder.reset()
1344
1345     _LF = 1
1346     _CR = 2
1347     _CRLF = 4
1348
1349     @property
1350     def newlines(self):
1351         return (None,
1352                 "\n",
1353                 "\r",
1354                 ("\r", "\n"),
1355                 "\r\n",
1356                 ("\n", "\r\n"),
1357                 ("\r", "\r\n"),
1358                 ("\r", "\n", "\r\n")
1359                )[self.seennl]
1360
1361
1362 class TextIOWrapper(TextIOBase):
1363
1364     r"""Character and line based layer over a BufferedIOBase object, buffer.
1365
1366     encoding gives the name of the encoding that the stream will be
1367     decoded or encoded with. It defaults to locale.getpreferredencoding.
1368
1369     errors determines the strictness of encoding and decoding (see the
1370     codecs.register) and defaults to "strict".
1371
1372     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1373     handling of line endings. If it is None, universal newlines is
1374     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1375     or '\r\n' are translated to '\n' before being returned to the
1376     caller. Conversely, on output, '\n' is translated to the system
1377     default line seperator, os.linesep. If newline is any other of its
1378     legal values, that newline becomes the newline when the file is read
1379     and it is returned untranslated. On output, '\n' is converted to the
1380     newline.
1381
1382     If line_buffering is True, a call to flush is implied when a call to
1383     write contains a newline character.
1384     """
1385
1386     _CHUNK_SIZE = 128
1387
1388     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1389                  line_buffering=False):
1390         if newline not in (None, "", "\n", "\r", "\r\n"):
1391             raise ValueError("illegal newline value: %r" % (newline,))
1392         if encoding is None:
1393             try:
1394                 encoding = os.device_encoding(buffer.fileno())
1395             except (AttributeError, UnsupportedOperation):
1396                 pass
1397             if encoding is None:
1398                 try:
1399                     import locale
1400                 except ImportError:
1401                     # Importing locale may fail if Python is being built
1402                     encoding = "ascii"
1403                 else:
1404                     encoding = locale.getpreferredencoding()
1405
1406         if not isinstance(encoding, basestring):
1407             raise ValueError("invalid encoding: %r" % encoding)
1408
1409         if errors is None:
1410             errors = "strict"
1411         else:
1412             if not isinstance(errors, basestring):
1413                 raise ValueError("invalid errors: %r" % errors)
1414
1415         self.buffer = buffer
1416         self._line_buffering = line_buffering
1417         self._encoding = encoding
1418         self._errors = errors
1419         self._readuniversal = not newline
1420         self._readtranslate = newline is None
1421         self._readnl = newline
1422         self._writetranslate = newline != ''
1423         self._writenl = newline or os.linesep
1424         self._encoder = None
1425         self._decoder = None
1426         self._decoded_chars = ''  # buffer for text returned from decoder
1427         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1428         self._snapshot = None  # info for reconstructing decoder state
1429         self._seekable = self._telling = self.buffer.seekable()
1430
1431     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1432     # where dec_flags is the second (integer) item of the decoder state
1433     # and next_input is the chunk of input bytes that comes next after the
1434     # snapshot point.  We use this to reconstruct decoder states in tell().
1435
1436     # Naming convention:
1437     #   - "bytes_..." for integer variables that count input bytes
1438     #   - "chars_..." for integer variables that count decoded characters
1439
1440     @property
1441     def encoding(self):
1442         return self._encoding
1443
1444     @property
1445     def errors(self):
1446         return self._errors
1447
1448     @property
1449     def line_buffering(self):
1450         return self._line_buffering
1451
1452     def seekable(self):
1453         return self._seekable
1454
1455     def readable(self):
1456         return self.buffer.readable()
1457
1458     def writable(self):
1459         return self.buffer.writable()
1460
1461     def flush(self):
1462         self.buffer.flush()
1463         self._telling = self._seekable
1464
1465     def close(self):
1466         try:
1467             self.flush()
1468         except:
1469             pass  # If flush() fails, just give up
1470         self.buffer.close()
1471
1472     @property
1473     def closed(self):
1474         return self.buffer.closed
1475
1476     @property
1477     def name(self):
1478         return self.buffer.name
1479
1480     def fileno(self):
1481         return self.buffer.fileno()
1482
1483     def isatty(self):
1484         return self.buffer.isatty()
1485
1486     def write(self, s):
1487         if self.closed:
1488             raise ValueError("write to closed file")
1489         if not isinstance(s, unicode):
1490             raise TypeError("can't write %s to text stream" %
1491                             s.__class__.__name__)
1492         length = len(s)
1493         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1494         if haslf and self._writetranslate and self._writenl != "\n":
1495             s = s.replace("\n", self._writenl)
1496         encoder = self._encoder or self._get_encoder()
1497         # XXX What if we were just reading?
1498         b = encoder.encode(s)
1499         self.buffer.write(b)
1500         if self._line_buffering and (haslf or "\r" in s):
1501             self.flush()
1502         self._snapshot = None
1503         if self._decoder:
1504             self._decoder.reset()
1505         return length
1506
1507     def _get_encoder(self):
1508         make_encoder = codecs.getincrementalencoder(self._encoding)
1509         self._encoder = make_encoder(self._errors)
1510         return self._encoder
1511
1512     def _get_decoder(self):
1513         make_decoder = codecs.getincrementaldecoder(self._encoding)
1514         decoder = make_decoder(self._errors)
1515         if self._readuniversal:
1516             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1517         self._decoder = decoder
1518         return decoder
1519
1520     # The following three methods implement an ADT for _decoded_chars.
1521     # Text returned from the decoder is buffered here until the client
1522     # requests it by calling our read() or readline() method.
1523     def _set_decoded_chars(self, chars):
1524         """Set the _decoded_chars buffer."""
1525         self._decoded_chars = chars
1526         self._decoded_chars_used = 0
1527
1528     def _get_decoded_chars(self, n=None):
1529         """Advance into the _decoded_chars buffer."""
1530         offset = self._decoded_chars_used
1531         if n is None:
1532             chars = self._decoded_chars[offset:]
1533         else:
1534             chars = self._decoded_chars[offset:offset + n]
1535         self._decoded_chars_used += len(chars)
1536         return chars
1537
1538     def _rewind_decoded_chars(self, n):
1539         """Rewind the _decoded_chars buffer."""
1540         if self._decoded_chars_used < n:
1541             raise AssertionError("rewind decoded_chars out of bounds")
1542         self._decoded_chars_used -= n
1543
1544     def _read_chunk(self):
1545         """
1546         Read and decode the next chunk of data from the BufferedReader.
1547
1548         The return value is True unless EOF was reached.  The decoded string
1549         is placed in self._decoded_chars (replacing its previous value).
1550         The entire input chunk is sent to the decoder, though some of it
1551         may remain buffered in the decoder, yet to be converted.
1552         """
1553
1554         if self._decoder is None:
1555             raise ValueError("no decoder")
1556
1557         if self._telling:
1558             # To prepare for tell(), we need to snapshot a point in the
1559             # file where the decoder's input buffer is empty.
1560
1561             dec_buffer, dec_flags = self._decoder.getstate()
1562             # Given this, we know there was a valid snapshot point
1563             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1564
1565         # Read a chunk, decode it, and put the result in self._decoded_chars.
1566         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1567         eof = not input_chunk
1568         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1569
1570         if self._telling:
1571             # At the snapshot point, len(dec_buffer) bytes before the read,
1572             # the next input to be decoded is dec_buffer + input_chunk.
1573             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1574
1575         return not eof
1576
1577     def _pack_cookie(self, position, dec_flags=0,
1578                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1579         # The meaning of a tell() cookie is: seek to position, set the
1580         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1581         # into the decoder with need_eof as the EOF flag, then skip
1582         # chars_to_skip characters of the decoded result.  For most simple
1583         # decoders, tell() will often just give a byte offset in the file.
1584         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1585                (chars_to_skip<<192) | bool(need_eof)<<256)
1586
1587     def _unpack_cookie(self, bigint):
1588         rest, position = divmod(bigint, 1<<64)
1589         rest, dec_flags = divmod(rest, 1<<64)
1590         rest, bytes_to_feed = divmod(rest, 1<<64)
1591         need_eof, chars_to_skip = divmod(rest, 1<<64)
1592         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1593
1594     def tell(self):
1595         if not self._seekable:
1596             raise IOError("underlying stream is not seekable")
1597         if not self._telling:
1598             raise IOError("telling position disabled by next() call")
1599         self.flush()
1600         position = self.buffer.tell()
1601         decoder = self._decoder
1602         if decoder is None or self._snapshot is None:
1603             if self._decoded_chars:
1604                 # This should never happen.
1605                 raise AssertionError("pending decoded text")
1606             return position
1607
1608         # Skip backward to the snapshot point (see _read_chunk).
1609         dec_flags, next_input = self._snapshot
1610         position -= len(next_input)
1611
1612         # How many decoded characters have been used up since the snapshot?
1613         chars_to_skip = self._decoded_chars_used
1614         if chars_to_skip == 0:
1615             # We haven't moved from the snapshot point.
1616             return self._pack_cookie(position, dec_flags)
1617
1618         # Starting from the snapshot position, we will walk the decoder
1619         # forward until it gives us enough decoded characters.
1620         saved_state = decoder.getstate()
1621         try:
1622             # Note our initial start point.
1623             decoder.setstate((b'', dec_flags))
1624             start_pos = position
1625             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1626             need_eof = 0
1627
1628             # Feed the decoder one byte at a time.  As we go, note the
1629             # nearest "safe start point" before the current location
1630             # (a point where the decoder has nothing buffered, so seek()
1631             # can safely start from there and advance to this location).
1632             for next_byte in next_input:
1633                 bytes_fed += 1
1634                 chars_decoded += len(decoder.decode(next_byte))
1635                 dec_buffer, dec_flags = decoder.getstate()
1636                 if not dec_buffer and chars_decoded <= chars_to_skip:
1637                     # Decoder buffer is empty, so this is a safe start point.
1638                     start_pos += bytes_fed
1639                     chars_to_skip -= chars_decoded
1640                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1641                 if chars_decoded >= chars_to_skip:
1642                     break
1643             else:
1644                 # We didn't get enough decoded data; signal EOF to get more.
1645                 chars_decoded += len(decoder.decode(b'', final=True))
1646                 need_eof = 1
1647                 if chars_decoded < chars_to_skip:
1648                     raise IOError("can't reconstruct logical file position")
1649
1650             # The returned cookie corresponds to the last safe start point.
1651             return self._pack_cookie(
1652                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1653         finally:
1654             decoder.setstate(saved_state)
1655
1656     def truncate(self, pos=None):
1657         self.flush()
1658         if pos is None:
1659             pos = self.tell()
1660         self.seek(pos)
1661         return self.buffer.truncate()
1662
1663     def seek(self, cookie, whence=0):
1664         if self.closed:
1665             raise ValueError("tell on closed file")
1666         if not self._seekable:
1667             raise IOError("underlying stream is not seekable")
1668         if whence == 1: # seek relative to current position
1669             if cookie != 0:
1670                 raise IOError("can't do nonzero cur-relative seeks")
1671             # Seeking to the current position should attempt to
1672             # sync the underlying buffer with the current position.
1673             whence = 0
1674             cookie = self.tell()
1675         if whence == 2: # seek relative to end of file
1676             if cookie != 0:
1677                 raise IOError("can't do nonzero end-relative seeks")
1678             self.flush()
1679             position = self.buffer.seek(0, 2)
1680             self._set_decoded_chars('')
1681             self._snapshot = None
1682             if self._decoder:
1683                 self._decoder.reset()
1684             return position
1685         if whence != 0:
1686             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1687                              (whence,))
1688         if cookie < 0:
1689             raise ValueError("negative seek position %r" % (cookie,))
1690         self.flush()
1691
1692         # The strategy of seek() is to go back to the safe start point
1693         # and replay the effect of read(chars_to_skip) from there.
1694         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1695             self._unpack_cookie(cookie)
1696
1697         # Seek back to the safe start point.
1698         self.buffer.seek(start_pos)
1699         self._set_decoded_chars('')
1700         self._snapshot = None
1701
1702         # Restore the decoder to its state from the safe start point.
1703         if self._decoder or dec_flags or chars_to_skip:
1704             self._decoder = self._decoder or self._get_decoder()
1705             self._decoder.setstate((b'', dec_flags))
1706             self._snapshot = (dec_flags, b'')
1707
1708         if chars_to_skip:
1709             # Just like _read_chunk, feed the decoder and save a snapshot.
1710             input_chunk = self.buffer.read(bytes_to_feed)
1711             self._set_decoded_chars(
1712                 self._decoder.decode(input_chunk, need_eof))
1713             self._snapshot = (dec_flags, input_chunk)
1714
1715             # Skip chars_to_skip of the decoded characters.
1716             if len(self._decoded_chars) < chars_to_skip:
1717                 raise IOError("can't restore logical file position")
1718             self._decoded_chars_used = chars_to_skip
1719
1720         return cookie
1721
1722     def read(self, n=None):
1723         if n is None:
1724             n = -1
1725         decoder = self._decoder or self._get_decoder()
1726         if n < 0:
1727             # Read everything.
1728             result = (self._get_decoded_chars() +
1729                       decoder.decode(self.buffer.read(), final=True))
1730             self._set_decoded_chars('')
1731             self._snapshot = None
1732             return result
1733         else:
1734             # Keep reading chunks until we have n characters to return.
1735             eof = False
1736             result = self._get_decoded_chars(n)
1737             while len(result) < n and not eof:
1738                 eof = not self._read_chunk()
1739                 result += self._get_decoded_chars(n - len(result))
1740             return result
1741
1742     def next(self):
1743         self._telling = False
1744         line = self.readline()
1745         if not line:
1746             self._snapshot = None
1747             self._telling = self._seekable
1748             raise StopIteration
1749         return line
1750
1751     def readline(self, limit=None):
1752         if self.closed:
1753             raise ValueError("read from closed file")
1754         if limit is None:
1755             limit = -1
1756         if not isinstance(limit, (int, long)):
1757             raise TypeError("limit must be an integer")
1758
1759         # Grab all the decoded text (we will rewind any extra bits later).
1760         line = self._get_decoded_chars()
1761
1762         start = 0
1763         decoder = self._decoder or self._get_decoder()
1764
1765         pos = endpos = None
1766         while True:
1767             if self._readtranslate:
1768                 # Newlines are already translated, only search for \n
1769                 pos = line.find('\n', start)
1770                 if pos >= 0:
1771                     endpos = pos + 1
1772                     break
1773                 else:
1774                     start = len(line)
1775
1776             elif self._readuniversal:
1777                 # Universal newline search. Find any of \r, \r\n, \n
1778                 # The decoder ensures that \r\n are not split in two pieces
1779
1780                 # In C we'd look for these in parallel of course.
1781                 nlpos = line.find("\n", start)
1782                 crpos = line.find("\r", start)
1783                 if crpos == -1:
1784                     if nlpos == -1:
1785                         # Nothing found
1786                         start = len(line)
1787                     else:
1788                         # Found \n
1789                         endpos = nlpos + 1
1790                         break
1791                 elif nlpos == -1:
1792                     # Found lone \r
1793                     endpos = crpos + 1
1794                     break
1795                 elif nlpos < crpos:
1796                     # Found \n
1797                     endpos = nlpos + 1
1798                     break
1799                 elif nlpos == crpos + 1:
1800                     # Found \r\n
1801                     endpos = crpos + 2
1802                     break
1803                 else:
1804                     # Found \r
1805                     endpos = crpos + 1
1806                     break
1807             else:
1808                 # non-universal
1809                 pos = line.find(self._readnl)
1810                 if pos >= 0:
1811                     endpos = pos + len(self._readnl)
1812                     break
1813
1814             if limit >= 0 and len(line) >= limit:
1815                 endpos = limit  # reached length limit
1816                 break
1817
1818             # No line ending seen yet - get more data
1819             more_line = ''
1820             while self._read_chunk():
1821                 if self._decoded_chars:
1822                     break
1823             if self._decoded_chars:
1824                 line += self._get_decoded_chars()
1825             else:
1826                 # end of file
1827                 self._set_decoded_chars('')
1828                 self._snapshot = None
1829                 return line
1830
1831         if limit >= 0 and endpos > limit:
1832             endpos = limit  # don't exceed limit
1833
1834         # Rewind _decoded_chars to just after the line ending we found.
1835         self._rewind_decoded_chars(len(line) - endpos)
1836         return line[:endpos]
1837
1838     @property
1839     def newlines(self):
1840         return self._decoder.newlines if self._decoder else None
1841
1842 class StringIO(TextIOWrapper):
1843
1844     """An in-memory stream for text. The initial_value argument sets the
1845     value of object. The other arguments are like those of TextIOWrapper's
1846     constructor.
1847     """
1848
1849     def __init__(self, initial_value="", encoding="utf-8",
1850                  errors="strict", newline="\n"):
1851         super(StringIO, self).__init__(BytesIO(),
1852                                        encoding=encoding,
1853                                        errors=errors,
1854                                        newline=newline)
1855         if initial_value:
1856             if not isinstance(initial_value, unicode):
1857                 initial_value = unicode(initial_value)
1858             self.write(initial_value)
1859             self.seek(0)
1860
1861     def getvalue(self):
1862         self.flush()
1863         return self.buffer.getvalue().decode(self._encoding, self._errors)