Lib/io.py

   1 """
   2 The io module provides the Python interfaces to stream handling. The
   3 builtin open function is defined in this module.
   4
   5 At the top of the I/O hierarchy is the abstract base class IOBase. It
   6 defines the basic interface to a stream. Note, however, that there is no
   7 separation between reading and writing to streams; implementations are
   8 allowed to throw an IOError if they do not support a given operation.
   9
  10 Extending IOBase is RawIOBase which deals simply with the reading and
  11 writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
  12 an interface to OS files.
  13
  14 BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
  15 subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
  16 streams that are readable, writable, and both respectively.
  17 BufferedRandom provides a buffered interface to random access
  18 streams. BytesIO is a simple stream of in-memory bytes.
  19
  20 Another IOBase subclass, TextIOBase, deals with the encoding and decoding
  21 of streams into text. TextIOWrapper, which extends it, is a buffered text
  22 interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
  23 is a in-memory stream for text.
  24
  25 Argument names are not part of the specification, and only the arguments
  26 of open() are intended to be used as keyword arguments.
  27
  28 data:
  29
  30 DEFAULT_BUFFER_SIZE
  31
  32    An int containing the default buffer size used by the module's buffered
  33    I/O classes. open() uses the file's blksize (as obtained by os.stat) if
  34    possible.
  35 """
  36 # New I/O library conforming to PEP 3116.
  37
  38 # This is a prototype; hopefully eventually some of this will be
  39 # reimplemented in C.
  40
  41 # XXX edge cases when switching between reading/writing
  42 # XXX need to support 1 meaning line-buffered
  43 # XXX whenever an argument is None, use the default value
  44 # XXX read/write ops should check readable/writable
  45 # XXX buffered readinto should work with arbitrary buffer objects
  46 # XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
  47 # XXX check writable, readable and seekable in appropriate places
  48 from __future__ import print_function
  49 from __future__ import unicode_literals
  50
  51 __author__ = ("Guido van Rossum <guido@python.org>, "
  52               "Mike Verdone <mike.verdone@gmail.com>, "
  53               "Mark Russell <mark.russell@zen.co.uk>")
  54
  55 __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
  56            "BytesIO", "StringIO", "BufferedIOBase",
  57            "BufferedReader", "BufferedWriter", "BufferedRWPair",
  58            "BufferedRandom", "TextIOBase", "TextIOWrapper",
  59            "SEEK_SET", "SEEK_CUR", "SEEK_END"]
  60
  61 import os
  62 import abc
  63 import codecs
  64 import _fileio
  65 import threading
  66
  67 # open() uses st_blksize whenever we can
  68 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  69
  70 # for seek()
  71 SEEK_SET = 0
  72 SEEK_CUR = 1
  73 SEEK_END = 2
  74
  75 # py3k has only new style classes
  76 __metaclass__ = type
  77
  78 class BlockingIOError(IOError):
  79
  80     """Exception raised when I/O would block on a non-blocking I/O stream."""
  81
  82     def __init__(self, errno, strerror, characters_written=0):
  83         IOError.__init__(self, errno, strerror)
  84         self.characters_written = characters_written
  85
  86
  87 def open(file, mode="r", buffering=None, encoding=None, errors=None,
  88          newline=None, closefd=True):
  89     r"""Open file and return a stream. If the file cannot be opened, an IOError is
  90     raised.
  91
  92     file is either a string giving the name (and the path if the file
  93     isn't in the current working directory) of the file to be opened or an
  94     integer file descriptor of the file to be wrapped. (If a file
  95     descriptor is given, it is closed when the returned I/O object is
  96     closed, unless closefd is set to False.)
  97
  98     mode is an optional string that specifies the mode in which the file
  99     is opened. It defaults to 'r' which means open for reading in text
 100     mode.  Other common values are 'w' for writing (truncating the file if
 101     it already exists), and 'a' for appending (which on some Unix systems,
 102     means that all writes append to the end of the file regardless of the
 103     current seek position). In text mode, if encoding is not specified the
 104     encoding used is platform dependent. (For reading and writing raw
 105     bytes use binary mode and leave encoding unspecified.) The available
 106     modes are:
 107
 108     ========= ===============================================================
 109     Character Meaning
 110     --------- ---------------------------------------------------------------
 111     'r'       open for reading (default)
 112     'w'       open for writing, truncating the file first
 113     'a'       open for writing, appending to the end of the file if it exists
 114     'b'       binary mode
 115     't'       text mode (default)
 116     '+'       open a disk file for updating (reading and writing)
 117     'U'       universal newline mode (for backwards compatibility; unneeded
 118               for new code)
 119     ========= ===============================================================
 120
 121     The default mode is 'rt' (open for reading text). For binary random
 122     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
 123     'r+b' opens the file without truncation.
 124
 125     Python distinguishes between files opened in binary and text modes,
 126     even when the underlying operating system doesn't. Files opened in
 127     binary mode (appending 'b' to the mode argument) return contents as
 128     bytes objects without any decoding. In text mode (the default, or when
 129     't' is appended to the mode argument), the contents of the file are
 130     returned as strings, the bytes having been first decoded using a
 131     platform-dependent encoding or using the specified encoding if given.
 132
 133     buffering is an optional integer used to set the buffering policy. By
 134     default full buffering is on. Pass 0 to switch buffering off (only
 135     allowed in binary mode), 1 to set line buffering, and an integer > 1
 136     for full buffering.
 137
 138     encoding is the name of the encoding used to decode or encode the
 139     file. This should only be used in text mode. The default encoding is
 140     platform dependent, but any encoding supported by Python can be
 141     passed.  See the codecs module for the list of supported encodings.
 142
 143     errors is an optional string that specifies how encoding errors are to
 144     be handled---this argument should not be used in binary mode. Pass
 145     'strict' to raise a ValueError exception if there is an encoding error
 146     (the default of None has the same effect), or pass 'ignore' to ignore
 147     errors. (Note that ignoring encoding errors can lead to data loss.)
 148     See the documentation for codecs.register for a list of the permitted
 149     encoding error strings.
 150
 151     newline controls how universal newlines works (it only applies to text
 152     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 153     follows:
 154
 155     * On input, if newline is None, universal newlines mode is
 156       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 157       these are translated into '\n' before being returned to the
 158       caller. If it is '', universal newline mode is enabled, but line
 159       endings are returned to the caller untranslated. If it has any of
 160       the other legal values, input lines are only terminated by the given
 161       string, and the line ending is returned to the caller untranslated.
 162
 163     * On output, if newline is None, any '\n' characters written are
 164       translated to the system default line separator, os.linesep. If
 165       newline is '', no translation takes place. If newline is any of the
 166       other legal values, any '\n' characters written are translated to
 167       the given string.
 168
 169     If closefd is False, the underlying file descriptor will be kept open
 170     when the file is closed. This does not work when a file name is given
 171     and must be True in that case.
 172
 173     open() returns a file object whose type depends on the mode, and
 174     through which the standard file operations such as reading and writing
 175     are performed. When open() is used to open a file in a text mode ('w',
 176     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 177     a file in a binary mode, the returned class varies: in read binary
 178     mode, it returns a BufferedReader; in write binary and append binary
 179     modes, it returns a BufferedWriter, and in read/write mode, it returns
 180     a BufferedRandom.
 181
 182     It is also possible to use a string or bytearray as a file for both
 183     reading and writing. For strings StringIO can be used like a file
 184     opened in a text mode, and for bytes a BytesIO can be used like a file
 185     opened in a binary mode.
 186     """
 187     if not isinstance(file, (basestring, int)):
 188         raise TypeError("invalid file: %r" % file)
 189     if not isinstance(mode, basestring):
 190         raise TypeError("invalid mode: %r" % mode)
 191     if buffering is not None and not isinstance(buffering, int):
 192         raise TypeError("invalid buffering: %r" % buffering)
 193     if encoding is not None and not isinstance(encoding, basestring):
 194         raise TypeError("invalid encoding: %r" % encoding)
 195     if errors is not None and not isinstance(errors, basestring):
 196         raise TypeError("invalid errors: %r" % errors)
 197     modes = set(mode)
 198     if modes - set("arwb+tU") or len(mode) > len(modes):
 199         raise ValueError("invalid mode: %r" % mode)
 200     reading = "r" in modes
 201     writing = "w" in modes
 202     appending = "a" in modes
 203     updating = "+" in modes
 204     text = "t" in modes
 205     binary = "b" in modes
 206     if "U" in modes:
 207         if writing or appending:
 208             raise ValueError("can't use U and writing mode at once")
 209         reading = True
 210     if text and binary:
 211         raise ValueError("can't have text and binary mode at once")
 212     if reading + writing + appending > 1:
 213         raise ValueError("can't have read/write/append mode at once")
 214     if not (reading or writing or appending):
 215         raise ValueError("must have exactly one of read/write/append mode")
 216     if binary and encoding is not None:
 217         raise ValueError("binary mode doesn't take an encoding argument")
 218     if binary and errors is not None:
 219         raise ValueError("binary mode doesn't take an errors argument")
 220     if binary and newline is not None:
 221         raise ValueError("binary mode doesn't take a newline argument")
 222     raw = FileIO(file,
 223                  (reading and "r" or "") +
 224                  (writing and "w" or "") +
 225                  (appending and "a" or "") +
 226                  (updating and "+" or ""),
 227                  closefd)
 228     if buffering is None:
 229         buffering = -1
 230     line_buffering = False
 231     if buffering == 1 or buffering < 0 and raw.isatty():
 232         buffering = -1
 233         line_buffering = True
 234     if buffering < 0:
 235         buffering = DEFAULT_BUFFER_SIZE
 236         try:
 237             bs = os.fstat(raw.fileno()).st_blksize
 238         except (os.error, AttributeError):
 239             pass
 240         else:
 241             if bs > 1:
 242                 buffering = bs
 243     if buffering < 0:
 244         raise ValueError("invalid buffering size")
 245     if buffering == 0:
 246         if binary:
 247             return raw
 248         raise ValueError("can't have unbuffered text I/O")
 249     if updating:
 250         buffer = BufferedRandom(raw, buffering)
 251     elif writing or appending:
 252         buffer = BufferedWriter(raw, buffering)
 253     elif reading:
 254         buffer = BufferedReader(raw, buffering)
 255     else:
 256         raise ValueError("unknown mode: %r" % mode)
 257     if binary:
 258         return buffer
 259     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 260     text.mode = mode
 261     return text
 262
 263 class _DocDescriptor:
 264     """Helper for builtins.open.__doc__
 265     """
 266     def __get__(self, obj, typ):
 267         return (
 268             "open(file, mode='r', buffering=None, encoding=None, "
 269                  "errors=None, newline=None, closefd=True)\n\n" +
 270             open.__doc__)
 271
 272 class OpenWrapper:
 273     """Wrapper for builtins.open
 274
 275     Trick so that open won't become a bound method when stored
 276     as a class variable (as dumbdbm does).
 277
 278     See initstdio() in Python/pythonrun.c.
 279     """
 280     __doc__ = _DocDescriptor()
 281
 282     def __new__(cls, *args, **kwargs):
 283         return open(*args, **kwargs)
 284
 285
 286 class UnsupportedOperation(ValueError, IOError):
 287     pass
 288
 289
 290 class IOBase(object):
 291
 292     """The abstract base class for all I/O classes, acting on streams of
 293     bytes. There is no public constructor.
 294
 295     This class provides dummy implementations for many methods that
 296     derived classes can override selectively; the default implementations
 297     represent a file that cannot be read, written or seeked.
 298
 299     Even though IOBase does not declare read, readinto, or write because
 300     their signatures will vary, implementations and clients should
 301     consider those methods part of the interface. Also, implementations
 302     may raise a IOError when operations they do not support are called.
 303
 304     The basic type used for binary data read from or written to a file is
 305     bytes. bytearrays are accepted too, and in some cases (such as
 306     readinto) needed. Text I/O classes work with str data.
 307
 308     Note that calling any method (even inquiries) on a closed stream is
 309     undefined. Implementations may raise IOError in this case.
 310
 311     IOBase (and its subclasses) support the iterator protocol, meaning
 312     that an IOBase object can be iterated over yielding the lines in a
 313     stream.
 314
 315     IOBase also supports the :keyword:`with` statement. In this example,
 316     fp is closed after the suite of the with statment is complete:
 317
 318     with open('spam.txt', 'r') as fp:
 319         fp.write('Spam and eggs!')
 320     """
 321
 322     __metaclass__ = abc.ABCMeta
 323
 324     ### Internal ###
 325
 326     def _unsupported(self, name):
 327         """Internal: raise an exception for unsupported operations."""
 328         raise UnsupportedOperation("%s.%s() not supported" %
 329                                    (self.__class__.__name__, name))
 330
 331     ### Positioning ###
 332
 333     def seek(self, pos, whence = 0):
 334         """Change stream position.
 335
 336         Change the stream position to byte offset offset. offset is
 337         interpreted relative to the position indicated by whence.  Values
 338         for whence are:
 339
 340         * 0 -- start of stream (the default); offset should be zero or positive
 341         * 1 -- current stream position; offset may be negative
 342         * 2 -- end of stream; offset is usually negative
 343
 344         Return the new absolute position.
 345         """
 346         self._unsupported("seek")
 347
 348     def tell(self):
 349         """Return current stream position."""
 350         return self.seek(0, 1)
 351
 352     def truncate(self, pos = None):
 353         """Truncate file to size bytes.
 354
 355         Size defaults to the current IO position as reported by tell().  Return
 356         the new size.
 357         """
 358         self._unsupported("truncate")
 359
 360     ### Flush and close ###
 361
 362     def flush(self):
 363         """Flush write buffers, if applicable.
 364
 365         This is not implemented for read-only and non-blocking streams.
 366         """
 367         # XXX Should this return the number of bytes written???
 368
 369     __closed = False
 370
 371     def close(self):
 372         """Flush and close the IO object.
 373
 374         This method has no effect if the file is already closed.
 375         """
 376         if not self.__closed:
 377             try:
 378                 self.flush()
 379             except IOError:
 380                 pass  # If flush() fails, just give up
 381             self.__closed = True
 382
 383     def __del__(self):
 384         """Destructor.  Calls close()."""
 385         # The try/except block is in case this is called at program
 386         # exit time, when it's possible that globals have already been
 387         # deleted, and then the close() call might fail.  Since
 388         # there's nothing we can do about such failures and they annoy
 389         # the end users, we suppress the traceback.
 390         try:
 391             self.close()
 392         except:
 393             pass
 394
 395     ### Inquiries ###
 396
 397     def seekable(self):
 398         """Return whether object supports random access.
 399
 400         If False, seek(), tell() and truncate() will raise IOError.
 401         This method may need to do a test seek().
 402         """
 403         return False
 404
 405     def _checkSeekable(self, msg=None):
 406         """Internal: raise an IOError if file is not seekable
 407         """
 408         if not self.seekable():
 409             raise IOError("File or stream is not seekable."
 410                           if msg is None else msg)
 411
 412
 413     def readable(self):
 414         """Return whether object was opened for reading.
 415
 416         If False, read() will raise IOError.
 417         """
 418         return False
 419
 420     def _checkReadable(self, msg=None):
 421         """Internal: raise an IOError if file is not readable
 422         """
 423         if not self.readable():
 424             raise IOError("File or stream is not readable."
 425                           if msg is None else msg)
 426
 427     def writable(self):
 428         """Return whether object was opened for writing.
 429
 430         If False, write() and truncate() will raise IOError.
 431         """
 432         return False
 433
 434     def _checkWritable(self, msg=None):
 435         """Internal: raise an IOError if file is not writable
 436         """
 437         if not self.writable():
 438             raise IOError("File or stream is not writable."
 439                           if msg is None else msg)
 440
 441     @property
 442     def closed(self):
 443         """closed: bool.  True iff the file has been closed.
 444
 445         For backwards compatibility, this is a property, not a predicate.
 446         """
 447         return self.__closed
 448
 449     def _checkClosed(self, msg=None):
 450         """Internal: raise an ValueError if file is closed
 451         """
 452         if self.closed:
 453             raise ValueError("I/O operation on closed file."
 454                              if msg is None else msg)
 455
 456     ### Context manager ###
 457
 458     def __enter__(self):
 459         """Context management protocol.  Returns self."""
 460         self._checkClosed()
 461         return self
 462
 463     def __exit__(self, *args):
 464         """Context management protocol.  Calls close()"""
 465         self.close()
 466
 467     ### Lower-level APIs ###
 468
 469     # XXX Should these be present even if unimplemented?
 470
 471     def fileno(self):
 472         """Returns underlying file descriptor if one exists.
 473
 474         An IOError is raised if the IO object does not use a file descriptor.
 475         """
 476         self._unsupported("fileno")
 477
 478     def isatty(self):
 479         """Return whether this is an 'interactive' stream.
 480
 481         Return False if it can't be determined.
 482         """
 483         self._checkClosed()
 484         return False
 485
 486     ### Readline[s] and writelines ###
 487
 488     def readline(self, limit = -1):
 489         r"""Read and return a line from the stream.
 490
 491         If limit is specified, at most limit bytes will be read.
 492
 493         The line terminator is always b'\n' for binary files; for text
 494         files, the newlines argument to open can be used to select the line
 495         terminator(s) recognized.
 496         """
 497         self._checkClosed()
 498         if hasattr(self, "peek"):
 499             def nreadahead():
 500                 readahead = self.peek(1)
 501                 if not readahead:
 502                     return 1
 503                 n = (readahead.find(b"\n") + 1) or len(readahead)
 504                 if limit >= 0:
 505                     n = min(n, limit)
 506                 return n
 507         else:
 508             def nreadahead():
 509                 return 1
 510         if limit is None:
 511             limit = -1
 512         if not isinstance(limit, (int, long)):
 513             raise TypeError("limit must be an integer")
 514         res = bytearray()
 515         while limit < 0 or len(res) < limit:
 516             b = self.read(nreadahead())
 517             if not b:
 518                 break
 519             res += b
 520             if res.endswith(b"\n"):
 521                 break
 522         return bytes(res)
 523
 524     def __iter__(self):
 525         self._checkClosed()
 526         return self
 527
 528     def next(self):
 529         line = self.readline()
 530         if not line:
 531             raise StopIteration
 532         return line
 533
 534     def readlines(self, hint=None):
 535         """Return a list of lines from the stream.
 536
 537         hint can be specified to control the number of lines read: no more
 538         lines will be read if the total size (in bytes/characters) of all
 539         lines so far exceeds hint.
 540         """
 541         if hint is None:
 542             hint = -1
 543         if not isinstance(hint, (int, long)):
 544             raise TypeError("hint must be an integer")
 545         if hint <= 0:
 546             return list(self)
 547         n = 0
 548         lines = []
 549         for line in self:
 550             lines.append(line)
 551             n += len(line)
 552             if n >= hint:
 553                 break
 554         return lines
 555
 556     def writelines(self, lines):
 557         self._checkClosed()
 558         for line in lines:
 559             self.write(line)
 560
 561
 562 class RawIOBase(IOBase):
 563
 564     """Base class for raw binary I/O."""
 565
 566     # The read() method is implemented by calling readinto(); derived
 567     # classes that want to support read() only need to implement
 568     # readinto() as a primitive operation.  In general, readinto() can be
 569     # more efficient than read().
 570
 571     # (It would be tempting to also provide an implementation of
 572     # readinto() in terms of read(), in case the latter is a more suitable
 573     # primitive operation, but that would lead to nasty recursion in case
 574     # a subclass doesn't implement either.)
 575
 576     def read(self, n = -1):
 577         """Read and return up to n bytes.
 578
 579         Returns an empty bytes array on EOF, or None if the object is
 580         set not to block and has no data to read.
 581         """
 582         if n is None:
 583             n = -1
 584         if n < 0:
 585             return self.readall()
 586         b = bytearray(n.__index__())
 587         n = self.readinto(b)
 588         del b[n:]
 589         return bytes(b)
 590
 591     def readall(self):
 592         """Read until EOF, using multiple read() call."""
 593         res = bytearray()
 594         while True:
 595             data = self.read(DEFAULT_BUFFER_SIZE)
 596             if not data:
 597                 break
 598             res += data
 599         return bytes(res)
 600
 601     def readinto(self, b):
 602         """Read up to len(b) bytes into b.
 603
 604         Returns number of bytes read (0 for EOF), or None if the object
 605         is set not to block as has no data to read.
 606         """
 607         self._unsupported("readinto")
 608
 609     def write(self, b):
 610         """Write the given buffer to the IO stream.
 611
 612         Returns the number of bytes written, which may be less than len(b).
 613         """
 614         self._unsupported("write")
 615
 616
 617 class FileIO(_fileio._FileIO, RawIOBase):
 618
 619     """Raw I/O implementation for OS files."""
 620
 621     # This multiply inherits from _FileIO and RawIOBase to make
 622     # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
 623     # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
 624     # to do since _fileio.c is written in C).
 625
 626     def __init__(self, name, mode="r", closefd=True):
 627         _fileio._FileIO.__init__(self, name, mode, closefd)
 628         self._name = name
 629
 630     def close(self):
 631         _fileio._FileIO.close(self)
 632         RawIOBase.close(self)
 633
 634     @property
 635     def name(self):
 636         return self._name
 637
 638
 639 class BufferedIOBase(IOBase):
 640
 641     """Base class for buffered IO objects.
 642
 643     The main difference with RawIOBase is that the read() method
 644     supports omitting the size argument, and does not have a default
 645     implementation that defers to readinto().
 646
 647     In addition, read(), readinto() and write() may raise
 648     BlockingIOError if the underlying raw stream is in non-blocking
 649     mode and not ready; unlike their raw counterparts, they will never
 650     return None.
 651
 652     A typical implementation should not inherit from a RawIOBase
 653     implementation, but wrap one.
 654     """
 655
 656     def read(self, n = None):
 657         """Read and return up to n bytes.
 658
 659         If the argument is omitted, None, or negative, reads and
 660         returns all data until EOF.
 661
 662         If the argument is positive, and the underlying raw stream is
 663         not 'interactive', multiple raw reads may be issued to satisfy
 664         the byte count (unless EOF is reached first).  But for
 665         interactive raw streams (XXX and for pipes?), at most one raw
 666         read will be issued, and a short result does not imply that
 667         EOF is imminent.
 668
 669         Returns an empty bytes array on EOF.
 670
 671         Raises BlockingIOError if the underlying raw stream has no
 672         data at the moment.
 673         """
 674         self._unsupported("read")
 675
 676     def readinto(self, b):
 677         """Read up to len(b) bytes into b.
 678
 679         Like read(), this may issue multiple reads to the underlying raw
 680         stream, unless the latter is 'interactive'.
 681
 682         Returns the number of bytes read (0 for EOF).
 683
 684         Raises BlockingIOError if the underlying raw stream has no
 685         data at the moment.
 686         """
 687         # XXX This ought to work with anything that supports the buffer API
 688         data = self.read(len(b))
 689         n = len(data)
 690         try:
 691             b[:n] = data
 692         except TypeError as err:
 693             import array
 694             if not isinstance(b, array.array):
 695                 raise err
 696             b[:n] = array.array(b'b', data)
 697         return n
 698
 699     def write(self, b):
 700         """Write the given buffer to the IO stream.
 701
 702         Return the number of bytes written, which is never less than
 703         len(b).
 704
 705         Raises BlockingIOError if the buffer is full and the
 706         underlying raw stream cannot accept more data at the moment.
 707         """
 708         self._unsupported("write")
 709
 710
 711 class _BufferedIOMixin(BufferedIOBase):
 712
 713     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 714
 715     This passes most requests on to the underlying raw stream.  It
 716     does *not* provide implementations of read(), readinto() or
 717     write().
 718     """
 719
 720     def __init__(self, raw):
 721         self.raw = raw
 722
 723     ### Positioning ###
 724
 725     def seek(self, pos, whence=0):
 726         return self.raw.seek(pos, whence)
 727
 728     def tell(self):
 729         return self.raw.tell()
 730
 731     def truncate(self, pos=None):
 732         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 733         # and a flush may be necessary to synch both views of the current
 734         # file state.
 735         self.flush()
 736
 737         if pos is None:
 738             pos = self.tell()
 739         # XXX: Should seek() be used, instead of passing the position
 740         # XXX  directly to truncate?
 741         return self.raw.truncate(pos)
 742
 743     ### Flush and close ###
 744
 745     def flush(self):
 746         self.raw.flush()
 747
 748     def close(self):
 749         if not self.closed:
 750             try:
 751                 self.flush()
 752             except IOError:
 753                 pass  # If flush() fails, just give up
 754             self.raw.close()
 755
 756     ### Inquiries ###
 757
 758     def seekable(self):
 759         return self.raw.seekable()
 760
 761     def readable(self):
 762         return self.raw.readable()
 763
 764     def writable(self):
 765         return self.raw.writable()
 766
 767     @property
 768     def closed(self):
 769         return self.raw.closed
 770
 771     @property
 772     def name(self):
 773         return self.raw.name
 774
 775     @property
 776     def mode(self):
 777         return self.raw.mode
 778
 779     ### Lower-level APIs ###
 780
 781     def fileno(self):
 782         return self.raw.fileno()
 783
 784     def isatty(self):
 785         return self.raw.isatty()
 786
 787
 788 class _BytesIO(BufferedIOBase):
 789
 790     """Buffered I/O implementation using an in-memory bytes buffer."""
 791
 792     # XXX More docs
 793
 794     def __init__(self, initial_bytes=None):
 795         buf = bytearray()
 796         if initial_bytes is not None:
 797             buf += bytearray(initial_bytes)
 798         self._buffer = buf
 799         self._pos = 0
 800
 801     def getvalue(self):
 802         """Return the bytes value (contents) of the buffer
 803         """
 804         if self.closed:
 805             raise ValueError("getvalue on closed file")
 806         return bytes(self._buffer)
 807
 808     def read(self, n=None):
 809         if self.closed:
 810             raise ValueError("read from closed file")
 811         if n is None:
 812             n = -1
 813         if not isinstance(n, (int, long)):
 814             raise TypeError("argument must be an integer")
 815         if n < 0:
 816             n = len(self._buffer)
 817         if len(self._buffer) <= self._pos:
 818             return b""
 819         newpos = min(len(self._buffer), self._pos + n)
 820         b = self._buffer[self._pos : newpos]
 821         self._pos = newpos
 822         return bytes(b)
 823
 824     def read1(self, n):
 825         """this is the same as read.
 826         """
 827         return self.read(n)
 828
 829     def write(self, b):
 830         if self.closed:
 831             raise ValueError("write to closed file")
 832         if isinstance(b, unicode):
 833             raise TypeError("can't write unicode to binary stream")
 834         n = len(b)
 835         if n == 0:
 836             return 0
 837         pos = self._pos
 838         if pos > len(self._buffer):
 839             # Inserts null bytes between the current end of the file
 840             # and the new write position.
 841             padding = b'\x00' * (pos - len(self._buffer))
 842             self._buffer += padding
 843         self._buffer[pos:pos + n] = b
 844         self._pos += n
 845         return n
 846
 847     def seek(self, pos, whence=0):
 848         if self.closed:
 849             raise ValueError("seek on closed file")
 850         try:
 851             pos = pos.__index__()
 852         except AttributeError as err:
 853             raise TypeError("an integer is required") # from err
 854         if whence == 0:
 855             if pos < 0:
 856                 raise ValueError("negative seek position %r" % (pos,))
 857             self._pos = pos
 858         elif whence == 1:
 859             self._pos = max(0, self._pos + pos)
 860         elif whence == 2:
 861             self._pos = max(0, len(self._buffer) + pos)
 862         else:
 863             raise ValueError("invalid whence value")
 864         return self._pos
 865
 866     def tell(self):
 867         if self.closed:
 868             raise ValueError("tell on closed file")
 869         return self._pos
 870
 871     def truncate(self, pos=None):
 872         if self.closed:
 873             raise ValueError("truncate on closed file")
 874         if pos is None:
 875             pos = self._pos
 876         elif pos < 0:
 877             raise ValueError("negative truncate position %r" % (pos,))
 878         del self._buffer[pos:]
 879         return self.seek(pos)
 880
 881     def readable(self):
 882         return True
 883
 884     def writable(self):
 885         return True
 886
 887     def seekable(self):
 888         return True
 889
 890 # Use the faster implementation of BytesIO if available
 891 try:
 892     import _bytesio
 893
 894     class BytesIO(_bytesio._BytesIO, BufferedIOBase):
 895         __doc__ = _bytesio._BytesIO.__doc__
 896
 897 except ImportError:
 898     BytesIO = _BytesIO
 899
 900
 901 class BufferedReader(_BufferedIOMixin):
 902
 903     """BufferedReader(raw[, buffer_size])
 904
 905     A buffer for a readable, sequential BaseRawIO object.
 906
 907     The constructor creates a BufferedReader for the given readable raw
 908     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 909     is used.
 910     """
 911
 912     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 913         """Create a new buffered reader using the given readable raw IO object.
 914         """
 915         raw._checkReadable()
 916         _BufferedIOMixin.__init__(self, raw)
 917         self.buffer_size = buffer_size
 918         self._reset_read_buf()
 919         self._read_lock = threading.Lock()
 920
 921     def _reset_read_buf(self):
 922         self._read_buf = b""
 923         self._read_pos = 0
 924
 925     def read(self, n=None):
 926         """Read n bytes.
 927
 928         Returns exactly n bytes of data unless the underlying raw IO
 929         stream reaches EOF or if the call would block in non-blocking
 930         mode. If n is negative, read until EOF or until read() would
 931         block.
 932         """
 933         with self._read_lock:
 934             return self._read_unlocked(n)
 935
 936     def _read_unlocked(self, n=None):
 937         nodata_val = b""
 938         empty_values = (b"", None)
 939         buf = self._read_buf
 940         pos = self._read_pos
 941
 942         # Special case for when the number of bytes to read is unspecified.
 943         if n is None or n == -1:
 944             self._reset_read_buf()
 945             chunks = [buf[pos:]]  # Strip the consumed bytes.
 946             current_size = 0
 947             while True:
 948                 # Read until EOF or until read() would block.
 949                 chunk = self.raw.read()
 950                 if chunk in empty_values:
 951                     nodata_val = chunk
 952                     break
 953                 current_size += len(chunk)
 954                 chunks.append(chunk)
 955             return b"".join(chunks) or nodata_val
 956
 957         # The number of bytes to read is specified, return at most n bytes.
 958         avail = len(buf) - pos  # Length of the available buffered data.
 959         if n <= avail:
 960             # Fast path: the data to read is fully buffered.
 961             self._read_pos += n
 962             return buf[pos:pos+n]
 963         # Slow path: read from the stream until enough bytes are read,
 964         # or until an EOF occurs or until read() would block.
 965         chunks = [buf[pos:]]
 966         wanted = max(self.buffer_size, n)
 967         while avail < n:
 968             chunk = self.raw.read(wanted)
 969             if chunk in empty_values:
 970                 nodata_val = chunk
 971                 break
 972             avail += len(chunk)
 973             chunks.append(chunk)
 974         # n is more then avail only when an EOF occurred or when
 975         # read() would have blocked.
 976         n = min(n, avail)
 977         out = b"".join(chunks)
 978         self._read_buf = out[n:]  # Save the extra data in the buffer.
 979         self._read_pos = 0
 980         return out[:n] if out else nodata_val
 981
 982     def peek(self, n=0):
 983         """Returns buffered bytes without advancing the position.
 984
 985         The argument indicates a desired minimal number of bytes; we
 986         do at most one raw read to satisfy it.  We never return more
 987         than self.buffer_size.
 988         """
 989         with self._read_lock:
 990             return self._peek_unlocked(n)
 991
 992     def _peek_unlocked(self, n=0):
 993         want = min(n, self.buffer_size)
 994         have = len(self._read_buf) - self._read_pos
 995         if have < want:
 996             to_read = self.buffer_size - have
 997             current = self.raw.read(to_read)
 998             if current:
 999                 self._read_buf = self._read_buf[self._read_pos:] + current
1000                 self._read_pos = 0
1001         return self._read_buf[self._read_pos:]
1002
1003     def read1(self, n):
1004         """Reads up to n bytes, with at most one read() system call."""
1005         # Returns up to n bytes.  If at least one byte is buffered, we
1006         # only return buffered bytes.  Otherwise, we do one raw read.
1007         if n <= 0:
1008             return b""
1009         with self._read_lock:
1010             self._peek_unlocked(1)
1011             return self._read_unlocked(
1012                 min(n, len(self._read_buf) - self._read_pos))
1013
1014     def tell(self):
1015         return self.raw.tell() - len(self._read_buf) + self._read_pos
1016
1017     def seek(self, pos, whence=0):
1018         with self._read_lock:
1019             if whence == 1:
1020                 pos -= len(self._read_buf) - self._read_pos
1021             pos = self.raw.seek(pos, whence)
1022             self._reset_read_buf()
1023             return pos
1024
1025
1026 class BufferedWriter(_BufferedIOMixin):
1027
1028     """A buffer for a writeable sequential RawIO object.
1029
1030     The constructor creates a BufferedWriter for the given writeable raw
1031     stream. If the buffer_size is not given, it defaults to
1032     DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1033     twice the buffer size.
1034     """
1035
1036     def __init__(self, raw,
1037                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1038         raw._checkWritable()
1039         _BufferedIOMixin.__init__(self, raw)
1040         self.buffer_size = buffer_size
1041         self.max_buffer_size = (2*buffer_size
1042                                 if max_buffer_size is None
1043                                 else max_buffer_size)
1044         self._write_buf = bytearray()
1045         self._write_lock = threading.Lock()
1046
1047     def write(self, b):
1048         if self.closed:
1049             raise ValueError("write to closed file")
1050         if isinstance(b, unicode):
1051             raise TypeError("can't write unicode to binary stream")
1052         with self._write_lock:
1053             # XXX we can implement some more tricks to try and avoid
1054             # partial writes
1055             if len(self._write_buf) > self.buffer_size:
1056                 # We're full, so let's pre-flush the buffer
1057                 try:
1058                     self._flush_unlocked()
1059                 except BlockingIOError as e:
1060                     # We can't accept anything else.
1061                     # XXX Why not just let the exception pass through?
1062                     raise BlockingIOError(e.errno, e.strerror, 0)
1063             before = len(self._write_buf)
1064             self._write_buf.extend(b)
1065             written = len(self._write_buf) - before
1066             if len(self._write_buf) > self.buffer_size:
1067                 try:
1068                     self._flush_unlocked()
1069                 except BlockingIOError as e:
1070                     if len(self._write_buf) > self.max_buffer_size:
1071                         # We've hit max_buffer_size. We have to accept a
1072                         # partial write and cut back our buffer.
1073                         overage = len(self._write_buf) - self.max_buffer_size
1074                         self._write_buf = self._write_buf[:self.max_buffer_size]
1075                         raise BlockingIOError(e.errno, e.strerror, overage)
1076             return written
1077
1078     def truncate(self, pos=None):
1079         with self._write_lock:
1080             self._flush_unlocked()
1081             if pos is None:
1082                 pos = self.raw.tell()
1083             return self.raw.truncate(pos)
1084
1085     def flush(self):
1086         with self._write_lock:
1087             self._flush_unlocked()
1088
1089     def _flush_unlocked(self):
1090         if self.closed:
1091             raise ValueError("flush of closed file")
1092         written = 0
1093         try:
1094             while self._write_buf:
1095                 n = self.raw.write(self._write_buf)
1096                 del self._write_buf[:n]
1097                 written += n
1098         except BlockingIOError as e:
1099             n = e.characters_written
1100             del self._write_buf[:n]
1101             written += n
1102             raise BlockingIOError(e.errno, e.strerror, written)
1103
1104     def tell(self):
1105         return self.raw.tell() + len(self._write_buf)
1106
1107     def seek(self, pos, whence=0):
1108         with self._write_lock:
1109             self._flush_unlocked()
1110             return self.raw.seek(pos, whence)
1111
1112
1113 class BufferedRWPair(BufferedIOBase):
1114
1115     """A buffered reader and writer object together.
1116
1117     A buffered reader object and buffered writer object put together to
1118     form a sequential IO object that can read and write. This is typically
1119     used with a socket or two-way pipe.
1120
1121     reader and writer are RawIOBase objects that are readable and
1122     writeable respectively. If the buffer_size is omitted it defaults to
1123     DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1124     defaults to twice the buffer size.
1125     """
1126
1127     # XXX The usefulness of this (compared to having two separate IO
1128     # objects) is questionable.
1129
1130     def __init__(self, reader, writer,
1131                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1132         """Constructor.
1133
1134         The arguments are two RawIO instances.
1135         """
1136         reader._checkReadable()
1137         writer._checkWritable()
1138         self.reader = BufferedReader(reader, buffer_size)
1139         self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1140
1141     def read(self, n=None):
1142         if n is None:
1143             n = -1
1144         return self.reader.read(n)
1145
1146     def readinto(self, b):
1147         return self.reader.readinto(b)
1148
1149     def write(self, b):
1150         return self.writer.write(b)
1151
1152     def peek(self, n=0):
1153         return self.reader.peek(n)
1154
1155     def read1(self, n):
1156         return self.reader.read1(n)
1157
1158     def readable(self):
1159         return self.reader.readable()
1160
1161     def writable(self):
1162         return self.writer.writable()
1163
1164     def flush(self):
1165         return self.writer.flush()
1166
1167     def close(self):
1168         self.writer.close()
1169         self.reader.close()
1170
1171     def isatty(self):
1172         return self.reader.isatty() or self.writer.isatty()
1173
1174     @property
1175     def closed(self):
1176         return self.writer.closed
1177
1178
1179 class BufferedRandom(BufferedWriter, BufferedReader):
1180
1181     """A buffered interface to random access streams.
1182
1183     The constructor creates a reader and writer for a seekable stream,
1184     raw, given in the first argument. If the buffer_size is omitted it
1185     defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1186     writer) defaults to twice the buffer size.
1187     """
1188
1189     def __init__(self, raw,
1190                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1191         raw._checkSeekable()
1192         BufferedReader.__init__(self, raw, buffer_size)
1193         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1194
1195     def seek(self, pos, whence=0):
1196         self.flush()
1197         # First do the raw seek, then empty the read buffer, so that
1198         # if the raw seek fails, we don't lose buffered data forever.
1199         pos = self.raw.seek(pos, whence)
1200         with self._read_lock:
1201             self._reset_read_buf()
1202         return pos
1203
1204     def tell(self):
1205         if self._write_buf:
1206             return self.raw.tell() + len(self._write_buf)
1207         else:
1208             return BufferedReader.tell(self)
1209
1210     def truncate(self, pos=None):
1211         if pos is None:
1212             pos = self.tell()
1213         # Use seek to flush the read buffer.
1214         self.seek(pos)
1215         return BufferedWriter.truncate(self)
1216
1217     def read(self, n=None):
1218         if n is None:
1219             n = -1
1220         self.flush()
1221         return BufferedReader.read(self, n)
1222
1223     def readinto(self, b):
1224         self.flush()
1225         return BufferedReader.readinto(self, b)
1226
1227     def peek(self, n=0):
1228         self.flush()
1229         return BufferedReader.peek(self, n)
1230
1231     def read1(self, n):
1232         self.flush()
1233         return BufferedReader.read1(self, n)
1234
1235     def write(self, b):
1236         if self._read_buf:
1237             # Undo readahead
1238             with self._read_lock:
1239                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1240                 self._reset_read_buf()
1241         return BufferedWriter.write(self, b)
1242
1243
1244 class TextIOBase(IOBase):
1245
1246     """Base class for text I/O.
1247
1248     This class provides a character and line based interface to stream
1249     I/O. There is no readinto method because Python's character strings
1250     are immutable. There is no public constructor.
1251     """
1252
1253     def read(self, n = -1):
1254         """Read at most n characters from stream.
1255
1256         Read from underlying buffer until we have n characters or we hit EOF.
1257         If n is negative or omitted, read until EOF.
1258         """
1259         self._unsupported("read")
1260
1261     def write(self, s):
1262         """Write string s to stream."""
1263         self._unsupported("write")
1264
1265     def truncate(self, pos = None):
1266         """Truncate size to pos."""
1267         self._unsupported("truncate")
1268
1269     def readline(self):
1270         """Read until newline or EOF.
1271
1272         Returns an empty string if EOF is hit immediately.
1273         """
1274         self._unsupported("readline")
1275
1276     @property
1277     def encoding(self):
1278         """Subclasses should override."""
1279         return None
1280
1281     @property
1282     def newlines(self):
1283         """Line endings translated so far.
1284
1285         Only line endings translated during reading are considered.
1286
1287         Subclasses should override.
1288         """
1289         return None
1290
1291
1292 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1293     """Codec used when reading a file in universal newlines mode.
1294     It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1295     It also records the types of newlines encountered.
1296     When used with translate=False, it ensures that the newline sequence is
1297     returned in one piece.
1298     """
1299     def __init__(self, decoder, translate, errors='strict'):
1300         codecs.IncrementalDecoder.__init__(self, errors=errors)
1301         self.translate = translate
1302         self.decoder = decoder
1303         self.seennl = 0
1304         self.pendingcr = False
1305
1306     def decode(self, input, final=False):
1307         # decode input (with the eventual \r from a previous pass)
1308         output = self.decoder.decode(input, final=final)
1309         if self.pendingcr and (output or final):
1310             output = "\r" + output
1311             self.pendingcr = False
1312
1313         # retain last \r even when not translating data:
1314         # then readline() is sure to get \r\n in one pass
1315         if output.endswith("\r") and not final:
1316             output = output[:-1]
1317             self.pendingcr = True
1318
1319         # Record which newlines are read
1320         crlf = output.count('\r\n')
1321         cr = output.count('\r') - crlf
1322         lf = output.count('\n') - crlf
1323         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1324                     | (crlf and self._CRLF)
1325
1326         if self.translate:
1327             if crlf:
1328                 output = output.replace("\r\n", "\n")
1329             if cr:
1330                 output = output.replace("\r", "\n")
1331
1332         return output
1333
1334     def getstate(self):
1335         buf, flag = self.decoder.getstate()
1336         flag <<= 1
1337         if self.pendingcr:
1338             flag |= 1
1339         return buf, flag
1340
1341     def setstate(self, state):
1342         buf, flag = state
1343         self.pendingcr = bool(flag & 1)
1344         self.decoder.setstate((buf, flag >> 1))
1345
1346     def reset(self):
1347         self.seennl = 0
1348         self.pendingcr = False
1349         self.decoder.reset()
1350
1351     _LF = 1
1352     _CR = 2
1353     _CRLF = 4
1354
1355     @property
1356     def newlines(self):
1357         return (None,
1358                 "\n",
1359                 "\r",
1360                 ("\r", "\n"),
1361                 "\r\n",
1362                 ("\n", "\r\n"),
1363                 ("\r", "\r\n"),
1364                 ("\r", "\n", "\r\n")
1365                )[self.seennl]
1366
1367
1368 class TextIOWrapper(TextIOBase):
1369
1370     r"""Character and line based layer over a BufferedIOBase object, buffer.
1371
1372     encoding gives the name of the encoding that the stream will be
1373     decoded or encoded with. It defaults to locale.getpreferredencoding.
1374
1375     errors determines the strictness of encoding and decoding (see the
1376     codecs.register) and defaults to "strict".
1377
1378     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1379     handling of line endings. If it is None, universal newlines is
1380     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1381     or '\r\n' are translated to '\n' before being returned to the
1382     caller. Conversely, on output, '\n' is translated to the system
1383     default line separator, os.linesep. If newline is any other of its
1384     legal values, that newline becomes the newline when the file is read
1385     and it is returned untranslated. On output, '\n' is converted to the
1386     newline.
1387
1388     If line_buffering is True, a call to flush is implied when a call to
1389     write contains a newline character.
1390     """
1391
1392     _CHUNK_SIZE = 128
1393
1394     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1395                  line_buffering=False):
1396         if newline not in (None, "", "\n", "\r", "\r\n"):
1397             raise ValueError("illegal newline value: %r" % (newline,))
1398         if encoding is None:
1399             try:
1400                 encoding = os.device_encoding(buffer.fileno())
1401             except (AttributeError, UnsupportedOperation):
1402                 pass
1403             if encoding is None:
1404                 try:
1405                     import locale
1406                 except ImportError:
1407                     # Importing locale may fail if Python is being built
1408                     encoding = "ascii"
1409                 else:
1410                     encoding = locale.getpreferredencoding()
1411
1412         if not isinstance(encoding, basestring):
1413             raise ValueError("invalid encoding: %r" % encoding)
1414
1415         if errors is None:
1416             errors = "strict"
1417         else:
1418             if not isinstance(errors, basestring):
1419                 raise ValueError("invalid errors: %r" % errors)
1420
1421         self.buffer = buffer
1422         self._line_buffering = line_buffering
1423         self._encoding = encoding
1424         self._errors = errors
1425         self._readuniversal = not newline
1426         self._readtranslate = newline is None
1427         self._readnl = newline
1428         self._writetranslate = newline != ''
1429         self._writenl = newline or os.linesep
1430         self._encoder = None
1431         self._decoder = None
1432         self._decoded_chars = ''  # buffer for text returned from decoder
1433         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1434         self._snapshot = None  # info for reconstructing decoder state
1435         self._seekable = self._telling = self.buffer.seekable()
1436
1437     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1438     # where dec_flags is the second (integer) item of the decoder state
1439     # and next_input is the chunk of input bytes that comes next after the
1440     # snapshot point.  We use this to reconstruct decoder states in tell().
1441
1442     # Naming convention:
1443     #   - "bytes_..." for integer variables that count input bytes
1444     #   - "chars_..." for integer variables that count decoded characters
1445
1446     @property
1447     def encoding(self):
1448         return self._encoding
1449
1450     @property
1451     def errors(self):
1452         return self._errors
1453
1454     @property
1455     def line_buffering(self):
1456         return self._line_buffering
1457
1458     def seekable(self):
1459         return self._seekable
1460
1461     def readable(self):
1462         return self.buffer.readable()
1463
1464     def writable(self):
1465         return self.buffer.writable()
1466
1467     def flush(self):
1468         self.buffer.flush()
1469         self._telling = self._seekable
1470
1471     def close(self):
1472         try:
1473             self.flush()
1474         except:
1475             pass  # If flush() fails, just give up
1476         self.buffer.close()
1477
1478     @property
1479     def closed(self):
1480         return self.buffer.closed
1481
1482     @property
1483     def name(self):
1484         return self.buffer.name
1485
1486     def fileno(self):
1487         return self.buffer.fileno()
1488
1489     def isatty(self):
1490         return self.buffer.isatty()
1491
1492     def write(self, s):
1493         if self.closed:
1494             raise ValueError("write to closed file")
1495         if not isinstance(s, unicode):
1496             raise TypeError("can't write %s to text stream" %
1497                             s.__class__.__name__)
1498         length = len(s)
1499         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1500         if haslf and self._writetranslate and self._writenl != "\n":
1501             s = s.replace("\n", self._writenl)
1502         encoder = self._encoder or self._get_encoder()
1503         # XXX What if we were just reading?
1504         b = encoder.encode(s)
1505         self.buffer.write(b)
1506         if self._line_buffering and (haslf or "\r" in s):
1507             self.flush()
1508         self._snapshot = None
1509         if self._decoder:
1510             self._decoder.reset()
1511         return length
1512
1513     def _get_encoder(self):
1514         make_encoder = codecs.getincrementalencoder(self._encoding)
1515         self._encoder = make_encoder(self._errors)
1516         return self._encoder
1517
1518     def _get_decoder(self):
1519         make_decoder = codecs.getincrementaldecoder(self._encoding)
1520         decoder = make_decoder(self._errors)
1521         if self._readuniversal:
1522             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1523         self._decoder = decoder
1524         return decoder
1525
1526     # The following three methods implement an ADT for _decoded_chars.
1527     # Text returned from the decoder is buffered here until the client
1528     # requests it by calling our read() or readline() method.
1529     def _set_decoded_chars(self, chars):
1530         """Set the _decoded_chars buffer."""
1531         self._decoded_chars = chars
1532         self._decoded_chars_used = 0
1533
1534     def _get_decoded_chars(self, n=None):
1535         """Advance into the _decoded_chars buffer."""
1536         offset = self._decoded_chars_used
1537         if n is None:
1538             chars = self._decoded_chars[offset:]
1539         else:
1540             chars = self._decoded_chars[offset:offset + n]
1541         self._decoded_chars_used += len(chars)
1542         return chars
1543
1544     def _rewind_decoded_chars(self, n):
1545         """Rewind the _decoded_chars buffer."""
1546         if self._decoded_chars_used < n:
1547             raise AssertionError("rewind decoded_chars out of bounds")
1548         self._decoded_chars_used -= n
1549
1550     def _read_chunk(self):
1551         """
1552         Read and decode the next chunk of data from the BufferedReader.
1553
1554         The return value is True unless EOF was reached.  The decoded string
1555         is placed in self._decoded_chars (replacing its previous value).
1556         The entire input chunk is sent to the decoder, though some of it
1557         may remain buffered in the decoder, yet to be converted.
1558         """
1559
1560         if self._decoder is None:
1561             raise ValueError("no decoder")
1562
1563         if self._telling:
1564             # To prepare for tell(), we need to snapshot a point in the
1565             # file where the decoder's input buffer is empty.
1566
1567             dec_buffer, dec_flags = self._decoder.getstate()
1568             # Given this, we know there was a valid snapshot point
1569             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1570
1571         # Read a chunk, decode it, and put the result in self._decoded_chars.
1572         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1573         eof = not input_chunk
1574         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1575
1576         if self._telling:
1577             # At the snapshot point, len(dec_buffer) bytes before the read,
1578             # the next input to be decoded is dec_buffer + input_chunk.
1579             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1580
1581         return not eof
1582
1583     def _pack_cookie(self, position, dec_flags=0,
1584                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1585         # The meaning of a tell() cookie is: seek to position, set the
1586         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1587         # into the decoder with need_eof as the EOF flag, then skip
1588         # chars_to_skip characters of the decoded result.  For most simple
1589         # decoders, tell() will often just give a byte offset in the file.
1590         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1591                (chars_to_skip<<192) | bool(need_eof)<<256)
1592
1593     def _unpack_cookie(self, bigint):
1594         rest, position = divmod(bigint, 1<<64)
1595         rest, dec_flags = divmod(rest, 1<<64)
1596         rest, bytes_to_feed = divmod(rest, 1<<64)
1597         need_eof, chars_to_skip = divmod(rest, 1<<64)
1598         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1599
1600     def tell(self):
1601         if not self._seekable:
1602             raise IOError("underlying stream is not seekable")
1603         if not self._telling:
1604             raise IOError("telling position disabled by next() call")
1605         self.flush()
1606         position = self.buffer.tell()
1607         decoder = self._decoder
1608         if decoder is None or self._snapshot is None:
1609             if self._decoded_chars:
1610                 # This should never happen.
1611                 raise AssertionError("pending decoded text")
1612             return position
1613
1614         # Skip backward to the snapshot point (see _read_chunk).
1615         dec_flags, next_input = self._snapshot
1616         position -= len(next_input)
1617
1618         # How many decoded characters have been used up since the snapshot?
1619         chars_to_skip = self._decoded_chars_used
1620         if chars_to_skip == 0:
1621             # We haven't moved from the snapshot point.
1622             return self._pack_cookie(position, dec_flags)
1623
1624         # Starting from the snapshot position, we will walk the decoder
1625         # forward until it gives us enough decoded characters.
1626         saved_state = decoder.getstate()
1627         try:
1628             # Note our initial start point.
1629             decoder.setstate((b'', dec_flags))
1630             start_pos = position
1631             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1632             need_eof = 0
1633
1634             # Feed the decoder one byte at a time.  As we go, note the
1635             # nearest "safe start point" before the current location
1636             # (a point where the decoder has nothing buffered, so seek()
1637             # can safely start from there and advance to this location).
1638             for next_byte in next_input:
1639                 bytes_fed += 1
1640                 chars_decoded += len(decoder.decode(next_byte))
1641                 dec_buffer, dec_flags = decoder.getstate()
1642                 if not dec_buffer and chars_decoded <= chars_to_skip:
1643                     # Decoder buffer is empty, so this is a safe start point.
1644                     start_pos += bytes_fed
1645                     chars_to_skip -= chars_decoded
1646                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1647                 if chars_decoded >= chars_to_skip:
1648                     break
1649             else:
1650                 # We didn't get enough decoded data; signal EOF to get more.
1651                 chars_decoded += len(decoder.decode(b'', final=True))
1652                 need_eof = 1
1653                 if chars_decoded < chars_to_skip:
1654                     raise IOError("can't reconstruct logical file position")
1655
1656             # The returned cookie corresponds to the last safe start point.
1657             return self._pack_cookie(
1658                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1659         finally:
1660             decoder.setstate(saved_state)
1661
1662     def truncate(self, pos=None):
1663         self.flush()
1664         if pos is None:
1665             pos = self.tell()
1666         self.seek(pos)
1667         return self.buffer.truncate()
1668
1669     def seek(self, cookie, whence=0):
1670         if self.closed:
1671             raise ValueError("tell on closed file")
1672         if not self._seekable:
1673             raise IOError("underlying stream is not seekable")
1674         if whence == 1: # seek relative to current position
1675             if cookie != 0:
1676                 raise IOError("can't do nonzero cur-relative seeks")
1677             # Seeking to the current position should attempt to
1678             # sync the underlying buffer with the current position.
1679             whence = 0
1680             cookie = self.tell()
1681         if whence == 2: # seek relative to end of file
1682             if cookie != 0:
1683                 raise IOError("can't do nonzero end-relative seeks")
1684             self.flush()
1685             position = self.buffer.seek(0, 2)
1686             self._set_decoded_chars('')
1687             self._snapshot = None
1688             if self._decoder:
1689                 self._decoder.reset()
1690             return position
1691         if whence != 0:
1692             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1693                              (whence,))
1694         if cookie < 0:
1695             raise ValueError("negative seek position %r" % (cookie,))
1696         self.flush()
1697
1698         # The strategy of seek() is to go back to the safe start point
1699         # and replay the effect of read(chars_to_skip) from there.
1700         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1701             self._unpack_cookie(cookie)
1702
1703         # Seek back to the safe start point.
1704         self.buffer.seek(start_pos)
1705         self._set_decoded_chars('')
1706         self._snapshot = None
1707
1708         # Restore the decoder to its state from the safe start point.
1709         if self._decoder or dec_flags or chars_to_skip:
1710             self._decoder = self._decoder or self._get_decoder()
1711             self._decoder.setstate((b'', dec_flags))
1712             self._snapshot = (dec_flags, b'')
1713
1714         if chars_to_skip:
1715             # Just like _read_chunk, feed the decoder and save a snapshot.
1716             input_chunk = self.buffer.read(bytes_to_feed)
1717             self._set_decoded_chars(
1718                 self._decoder.decode(input_chunk, need_eof))
1719             self._snapshot = (dec_flags, input_chunk)
1720
1721             # Skip chars_to_skip of the decoded characters.
1722             if len(self._decoded_chars) < chars_to_skip:
1723                 raise IOError("can't restore logical file position")
1724             self._decoded_chars_used = chars_to_skip
1725
1726         return cookie
1727
1728     def read(self, n=None):
1729         if n is None:
1730             n = -1
1731         decoder = self._decoder or self._get_decoder()
1732         if n < 0:
1733             # Read everything.
1734             result = (self._get_decoded_chars() +
1735                       decoder.decode(self.buffer.read(), final=True))
1736             self._set_decoded_chars('')
1737             self._snapshot = None
1738             return result
1739         else:
1740             # Keep reading chunks until we have n characters to return.
1741             eof = False
1742             result = self._get_decoded_chars(n)
1743             while len(result) < n and not eof:
1744                 eof = not self._read_chunk()
1745                 result += self._get_decoded_chars(n - len(result))
1746             return result
1747
1748     def next(self):
1749         self._telling = False
1750         line = self.readline()
1751         if not line:
1752             self._snapshot = None
1753             self._telling = self._seekable
1754             raise StopIteration
1755         return line
1756
1757     def readline(self, limit=None):
1758         if self.closed:
1759             raise ValueError("read from closed file")
1760         if limit is None:
1761             limit = -1
1762         if not isinstance(limit, (int, long)):
1763             raise TypeError("limit must be an integer")
1764
1765         # Grab all the decoded text (we will rewind any extra bits later).
1766         line = self._get_decoded_chars()
1767
1768         start = 0
1769         decoder = self._decoder or self._get_decoder()
1770
1771         pos = endpos = None
1772         while True:
1773             if self._readtranslate:
1774                 # Newlines are already translated, only search for \n
1775                 pos = line.find('\n', start)
1776                 if pos >= 0:
1777                     endpos = pos + 1
1778                     break
1779                 else:
1780                     start = len(line)
1781
1782             elif self._readuniversal:
1783                 # Universal newline search. Find any of \r, \r\n, \n
1784                 # The decoder ensures that \r\n are not split in two pieces
1785
1786                 # In C we'd look for these in parallel of course.
1787                 nlpos = line.find("\n", start)
1788                 crpos = line.find("\r", start)
1789                 if crpos == -1:
1790                     if nlpos == -1:
1791                         # Nothing found
1792                         start = len(line)
1793                     else:
1794                         # Found \n
1795                         endpos = nlpos + 1
1796                         break
1797                 elif nlpos == -1:
1798                     # Found lone \r
1799                     endpos = crpos + 1
1800                     break
1801                 elif nlpos < crpos:
1802                     # Found \n
1803                     endpos = nlpos + 1
1804                     break
1805                 elif nlpos == crpos + 1:
1806                     # Found \r\n
1807                     endpos = crpos + 2
1808                     break
1809                 else:
1810                     # Found \r
1811                     endpos = crpos + 1
1812                     break
1813             else:
1814                 # non-universal
1815                 pos = line.find(self._readnl)
1816                 if pos >= 0:
1817                     endpos = pos + len(self._readnl)
1818                     break
1819
1820             if limit >= 0 and len(line) >= limit:
1821                 endpos = limit  # reached length limit
1822                 break
1823
1824             # No line ending seen yet - get more data
1825             more_line = ''
1826             while self._read_chunk():
1827                 if self._decoded_chars:
1828                     break
1829             if self._decoded_chars:
1830                 line += self._get_decoded_chars()
1831             else:
1832                 # end of file
1833                 self._set_decoded_chars('')
1834                 self._snapshot = None
1835                 return line
1836
1837         if limit >= 0 and endpos > limit:
1838             endpos = limit  # don't exceed limit
1839
1840         # Rewind _decoded_chars to just after the line ending we found.
1841         self._rewind_decoded_chars(len(line) - endpos)
1842         return line[:endpos]
1843
1844     @property
1845     def newlines(self):
1846         return self._decoder.newlines if self._decoder else None
1847
1848 class StringIO(TextIOWrapper):
1849
1850     """An in-memory stream for text. The initial_value argument sets the
1851     value of object. The other arguments are like those of TextIOWrapper's
1852     constructor.
1853     """
1854
1855     def __init__(self, initial_value="", encoding="utf-8",
1856                  errors="strict", newline="\n"):
1857         super(StringIO, self).__init__(BytesIO(),
1858                                        encoding=encoding,
1859                                        errors=errors,
1860                                        newline=newline)
1861         if initial_value:
1862             if not isinstance(initial_value, unicode):
1863                 initial_value = unicode(initial_value)
1864             self.write(initial_value)
1865             self.seek(0)
1866
1867     def getvalue(self):
1868         self.flush()
1869         return self.buffer.getvalue().decode(self._encoding, self._errors)