Lib/io.py

   1 """
   2 The io module provides the Python interfaces to stream handling. The
   3 builtin open function is defined in this module.
   4
   5 At the top of the I/O hierarchy is the abstract base class IOBase. It
   6 defines the basic interface to a stream. Note, however, that there is no
   7 seperation between reading and writing to streams; implementations are
   8 allowed to throw an IOError if they do not support a given operation.
   9
  10 Extending IOBase is RawIOBase which deals simply with the reading and
  11 writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
  12 an interface to OS files.
  13
  14 BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
  15 subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
  16 streams that are readable, writable, and both respectively.
  17 BufferedRandom provides a buffered interface to random access
  18 streams. BytesIO is a simple stream of in-memory bytes.
  19
  20 Another IOBase subclass, TextIOBase, deals with the encoding and decoding
  21 of streams into text. TextIOWrapper, which extends it, is a buffered text
  22 interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
  23 is a in-memory stream for text.
  24
  25 Argument names are not part of the specification, and only the arguments
  26 of open() are intended to be used as keyword arguments.
  27
  28 data:
  29
  30 DEFAULT_BUFFER_SIZE
  31
  32    An int containing the default buffer size used by the module's buffered
  33    I/O classes. open() uses the file's blksize (as obtained by os.stat) if
  34    possible.
  35 """
  36 # New I/O library conforming to PEP 3116.
  37
  38 # This is a prototype; hopefully eventually some of this will be
  39 # reimplemented in C.
  40
  41 # XXX edge cases when switching between reading/writing
  42 # XXX need to support 1 meaning line-buffered
  43 # XXX whenever an argument is None, use the default value
  44 # XXX read/write ops should check readable/writable
  45 # XXX buffered readinto should work with arbitrary buffer objects
  46 # XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
  47 # XXX check writable, readable and seekable in appropriate places
  48 from __future__ import print_function
  49 from __future__ import unicode_literals
  50
  51 __author__ = ("Guido van Rossum <guido@python.org>, "
  52               "Mike Verdone <mike.verdone@gmail.com>, "
  53               "Mark Russell <mark.russell@zen.co.uk>")
  54
  55 __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
  56            "BytesIO", "StringIO", "BufferedIOBase",
  57            "BufferedReader", "BufferedWriter", "BufferedRWPair",
  58            "BufferedRandom", "TextIOBase", "TextIOWrapper"]
  59
  60 import os
  61 import abc
  62 import codecs
  63 import _fileio
  64 import threading
  65
  66 # open() uses st_blksize whenever we can
  67 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  68
  69 # py3k has only new style classes
  70 __metaclass__ = type
  71
  72 class BlockingIOError(IOError):
  73
  74     """Exception raised when I/O would block on a non-blocking I/O stream."""
  75
  76     def __init__(self, errno, strerror, characters_written=0):
  77         IOError.__init__(self, errno, strerror)
  78         self.characters_written = characters_written
  79
  80
  81 def open(file, mode="r", buffering=None, encoding=None, errors=None,
  82          newline=None, closefd=True):
  83     r"""Open file and return a stream. If the file cannot be opened, an IOError is
  84     raised.
  85
  86     file is either a string giving the name (and the path if the file
  87     isn't in the current working directory) of the file to be opened or an
  88     integer file descriptor of the file to be wrapped. (If a file
  89     descriptor is given, it is closed when the returned I/O object is
  90     closed, unless closefd is set to False.)
  91
  92     mode is an optional string that specifies the mode in which the file
  93     is opened. It defaults to 'r' which means open for reading in text
  94     mode.  Other common values are 'w' for writing (truncating the file if
  95     it already exists), and 'a' for appending (which on some Unix systems,
  96     means that all writes append to the end of the file regardless of the
  97     current seek position). In text mode, if encoding is not specified the
  98     encoding used is platform dependent. (For reading and writing raw
  99     bytes use binary mode and leave encoding unspecified.) The available
 100     modes are:
 101
 102     ========= ===============================================================
 103     Character Meaning
 104     --------- ---------------------------------------------------------------
 105     'r'       open for reading (default)
 106     'w'       open for writing, truncating the file first
 107     'a'       open for writing, appending to the end of the file if it exists
 108     'b'       binary mode
 109     't'       text mode (default)
 110     '+'       open a disk file for updating (reading and writing)
 111     'U'       universal newline mode (for backwards compatibility; unneeded
 112               for new code)
 113     ========= ===============================================================
 114
 115     The default mode is 'rt' (open for reading text). For binary random
 116     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
 117     'r+b' opens the file without truncation.
 118
 119     Python distinguishes between files opened in binary and text modes,
 120     even when the underlying operating system doesn't. Files opened in
 121     binary mode (appending 'b' to the mode argument) return contents as
 122     bytes objects without any decoding. In text mode (the default, or when
 123     't' is appended to the mode argument), the contents of the file are
 124     returned as strings, the bytes having been first decoded using a
 125     platform-dependent encoding or using the specified encoding if given.
 126
 127     buffering is an optional integer used to set the buffering policy. By
 128     default full buffering is on. Pass 0 to switch buffering off (only
 129     allowed in binary mode), 1 to set line buffering, and an integer > 1
 130     for full buffering.
 131
 132     encoding is the name of the encoding used to decode or encode the
 133     file. This should only be used in text mode. The default encoding is
 134     platform dependent, but any encoding supported by Python can be
 135     passed.  See the codecs module for the list of supported encodings.
 136
 137     errors is an optional string that specifies how encoding errors are to
 138     be handled---this argument should not be used in binary mode. Pass
 139     'strict' to raise a ValueError exception if there is an encoding error
 140     (the default of None has the same effect), or pass 'ignore' to ignore
 141     errors. (Note that ignoring encoding errors can lead to data loss.)
 142     See the documentation for codecs.register for a list of the permitted
 143     encoding error strings.
 144
 145     newline controls how universal newlines works (it only applies to text
 146     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 147     follows:
 148
 149     * On input, if newline is None, universal newlines mode is
 150       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 151       these are translated into '\n' before being returned to the
 152       caller. If it is '', universal newline mode is enabled, but line
 153       endings are returned to the caller untranslated. If it has any of
 154       the other legal values, input lines are only terminated by the given
 155       string, and the line ending is returned to the caller untranslated.
 156
 157     * On output, if newline is None, any '\n' characters written are
 158       translated to the system default line separator, os.linesep. If
 159       newline is '', no translation takes place. If newline is any of the
 160       other legal values, any '\n' characters written are translated to
 161       the given string.
 162
 163     If closefd is False, the underlying file descriptor will be kept open
 164     when the file is closed. This does not work when a file name is given
 165     and must be True in that case.
 166
 167     open() returns a file object whose type depends on the mode, and
 168     through which the standard file operations such as reading and writing
 169     are performed. When open() is used to open a file in a text mode ('w',
 170     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 171     a file in a binary mode, the returned class varies: in read binary
 172     mode, it returns a BufferedReader; in write binary and append binary
 173     modes, it returns a BufferedWriter, and in read/write mode, it returns
 174     a BufferedRandom.
 175
 176     It is also possible to use a string or bytearray as a file for both
 177     reading and writing. For strings StringIO can be used like a file
 178     opened in a text mode, and for bytes a BytesIO can be used like a file
 179     opened in a binary mode.
 180     """
 181     if not isinstance(file, (basestring, int)):
 182         raise TypeError("invalid file: %r" % file)
 183     if not isinstance(mode, basestring):
 184         raise TypeError("invalid mode: %r" % mode)
 185     if buffering is not None and not isinstance(buffering, int):
 186         raise TypeError("invalid buffering: %r" % buffering)
 187     if encoding is not None and not isinstance(encoding, basestring):
 188         raise TypeError("invalid encoding: %r" % encoding)
 189     if errors is not None and not isinstance(errors, basestring):
 190         raise TypeError("invalid errors: %r" % errors)
 191     modes = set(mode)
 192     if modes - set("arwb+tU") or len(mode) > len(modes):
 193         raise ValueError("invalid mode: %r" % mode)
 194     reading = "r" in modes
 195     writing = "w" in modes
 196     appending = "a" in modes
 197     updating = "+" in modes
 198     text = "t" in modes
 199     binary = "b" in modes
 200     if "U" in modes:
 201         if writing or appending:
 202             raise ValueError("can't use U and writing mode at once")
 203         reading = True
 204     if text and binary:
 205         raise ValueError("can't have text and binary mode at once")
 206     if reading + writing + appending > 1:
 207         raise ValueError("can't have read/write/append mode at once")
 208     if not (reading or writing or appending):
 209         raise ValueError("must have exactly one of read/write/append mode")
 210     if binary and encoding is not None:
 211         raise ValueError("binary mode doesn't take an encoding argument")
 212     if binary and errors is not None:
 213         raise ValueError("binary mode doesn't take an errors argument")
 214     if binary and newline is not None:
 215         raise ValueError("binary mode doesn't take a newline argument")
 216     raw = FileIO(file,
 217                  (reading and "r" or "") +
 218                  (writing and "w" or "") +
 219                  (appending and "a" or "") +
 220                  (updating and "+" or ""),
 221                  closefd)
 222     if buffering is None:
 223         buffering = -1
 224     line_buffering = False
 225     if buffering == 1 or buffering < 0 and raw.isatty():
 226         buffering = -1
 227         line_buffering = True
 228     if buffering < 0:
 229         buffering = DEFAULT_BUFFER_SIZE
 230         try:
 231             bs = os.fstat(raw.fileno()).st_blksize
 232         except (os.error, AttributeError):
 233             pass
 234         else:
 235             if bs > 1:
 236                 buffering = bs
 237     if buffering < 0:
 238         raise ValueError("invalid buffering size")
 239     if buffering == 0:
 240         if binary:
 241             raw._name = file
 242             raw._mode = mode
 243             return raw
 244         raise ValueError("can't have unbuffered text I/O")
 245     if updating:
 246         buffer = BufferedRandom(raw, buffering)
 247     elif writing or appending:
 248         buffer = BufferedWriter(raw, buffering)
 249     elif reading:
 250         buffer = BufferedReader(raw, buffering)
 251     else:
 252         raise ValueError("unknown mode: %r" % mode)
 253     if binary:
 254         buffer.name = file
 255         buffer.mode = mode
 256         return buffer
 257     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 258     text.name = file
 259     text.mode = mode
 260     return text
 261
 262 class _DocDescriptor:
 263     """Helper for builtins.open.__doc__
 264     """
 265     def __get__(self, obj, typ):
 266         return (
 267             "open(file, mode='r', buffering=None, encoding=None, "
 268                  "errors=None, newline=None, closefd=True)\n\n" +
 269             open.__doc__)
 270
 271 class OpenWrapper:
 272     """Wrapper for builtins.open
 273
 274     Trick so that open won't become a bound method when stored
 275     as a class variable (as dumbdbm does).
 276
 277     See initstdio() in Python/pythonrun.c.
 278     """
 279     __doc__ = _DocDescriptor()
 280
 281     def __new__(cls, *args, **kwargs):
 282         return open(*args, **kwargs)
 283
 284
 285 class UnsupportedOperation(ValueError, IOError):
 286     pass
 287
 288
 289 class IOBase(object):
 290
 291     """The abstract base class for all I/O classes, acting on streams of
 292     bytes. There is no public constructor.
 293
 294     This class provides dummy implementations for many methods that
 295     derived classes can override selectively; the default implementations
 296     represent a file that cannot be read, written or seeked.
 297
 298     Even though IOBase does not declare read, readinto, or write because
 299     their signatures will vary, implementations and clients should
 300     consider those methods part of the interface. Also, implementations
 301     may raise a IOError when operations they do not support are called.
 302
 303     The basic type used for binary data read from or written to a file is
 304     bytes. bytearrays are accepted too, and in some cases (such as
 305     readinto) needed. Text I/O classes work with str data.
 306
 307     Note that calling any method (even inquiries) on a closed stream is
 308     undefined. Implementations may raise IOError in this case.
 309
 310     IOBase (and its subclasses) support the iterator protocol, meaning
 311     that an IOBase object can be iterated over yielding the lines in a
 312     stream.
 313
 314     IOBase also supports the :keyword:`with` statement. In this example,
 315     fp is closed after the suite of the with statment is complete:
 316
 317     with open('spam.txt', 'r') as fp:
 318         fp.write('Spam and eggs!')
 319     """
 320
 321     __metaclass__ = abc.ABCMeta
 322
 323     ### Internal ###
 324
 325     def _unsupported(self, name):
 326         """Internal: raise an exception for unsupported operations."""
 327         raise UnsupportedOperation("%s.%s() not supported" %
 328                                    (self.__class__.__name__, name))
 329
 330     ### Positioning ###
 331
 332     def seek(self, pos, whence = 0):
 333         """Change stream position.
 334
 335         Change the stream position to byte offset offset. offset is
 336         interpreted relative to the position indicated by whence.  Values
 337         for whence are:
 338
 339         * 0 -- start of stream (the default); offset should be zero or positive
 340         * 1 -- current stream position; offset may be negative
 341         * 2 -- end of stream; offset is usually negative
 342
 343         Return the new absolute position.
 344         """
 345         self._unsupported("seek")
 346
 347     def tell(self):
 348         """Return current stream position."""
 349         return self.seek(0, 1)
 350
 351     def truncate(self, pos = None):
 352         """Truncate file to size bytes.
 353
 354         Size defaults to the current IO position as reported by tell().  Return
 355         the new size.
 356         """
 357         self._unsupported("truncate")
 358
 359     ### Flush and close ###
 360
 361     def flush(self):
 362         """Flush write buffers, if applicable.
 363
 364         This is not implemented for read-only and non-blocking streams.
 365         """
 366         # XXX Should this return the number of bytes written???
 367
 368     __closed = False
 369
 370     def close(self):
 371         """Flush and close the IO object.
 372
 373         This method has no effect if the file is already closed.
 374         """
 375         if not self.__closed:
 376             try:
 377                 self.flush()
 378             except IOError:
 379                 pass  # If flush() fails, just give up
 380             self.__closed = True
 381
 382     def __del__(self):
 383         """Destructor.  Calls close()."""
 384         # The try/except block is in case this is called at program
 385         # exit time, when it's possible that globals have already been
 386         # deleted, and then the close() call might fail.  Since
 387         # there's nothing we can do about such failures and they annoy
 388         # the end users, we suppress the traceback.
 389         try:
 390             self.close()
 391         except:
 392             pass
 393
 394     ### Inquiries ###
 395
 396     def seekable(self):
 397         """Return whether object supports random access.
 398
 399         If False, seek(), tell() and truncate() will raise IOError.
 400         This method may need to do a test seek().
 401         """
 402         return False
 403
 404     def _checkSeekable(self, msg=None):
 405         """Internal: raise an IOError if file is not seekable
 406         """
 407         if not self.seekable():
 408             raise IOError("File or stream is not seekable."
 409                           if msg is None else msg)
 410
 411
 412     def readable(self):
 413         """Return whether object was opened for reading.
 414
 415         If False, read() will raise IOError.
 416         """
 417         return False
 418
 419     def _checkReadable(self, msg=None):
 420         """Internal: raise an IOError if file is not readable
 421         """
 422         if not self.readable():
 423             raise IOError("File or stream is not readable."
 424                           if msg is None else msg)
 425
 426     def writable(self):
 427         """Return whether object was opened for writing.
 428
 429         If False, write() and truncate() will raise IOError.
 430         """
 431         return False
 432
 433     def _checkWritable(self, msg=None):
 434         """Internal: raise an IOError if file is not writable
 435         """
 436         if not self.writable():
 437             raise IOError("File or stream is not writable."
 438                           if msg is None else msg)
 439
 440     @property
 441     def closed(self):
 442         """closed: bool.  True iff the file has been closed.
 443
 444         For backwards compatibility, this is a property, not a predicate.
 445         """
 446         return self.__closed
 447
 448     def _checkClosed(self, msg=None):
 449         """Internal: raise an ValueError if file is closed
 450         """
 451         if self.closed:
 452             raise ValueError("I/O operation on closed file."
 453                              if msg is None else msg)
 454
 455     ### Context manager ###
 456
 457     def __enter__(self):
 458         """Context management protocol.  Returns self."""
 459         self._checkClosed()
 460         return self
 461
 462     def __exit__(self, *args):
 463         """Context management protocol.  Calls close()"""
 464         self.close()
 465
 466     ### Lower-level APIs ###
 467
 468     # XXX Should these be present even if unimplemented?
 469
 470     def fileno(self):
 471         """Returns underlying file descriptor if one exists.
 472
 473         An IOError is raised if the IO object does not use a file descriptor.
 474         """
 475         self._unsupported("fileno")
 476
 477     def isatty(self):
 478         """Return whether this is an 'interactive' stream.
 479
 480         Return False if it can't be determined.
 481         """
 482         self._checkClosed()
 483         return False
 484
 485     ### Readline[s] and writelines ###
 486
 487     def readline(self, limit = -1):
 488         r"""Read and return a line from the stream.
 489
 490         If limit is specified, at most limit bytes will be read.
 491
 492         The line terminator is always b'\n' for binary files; for text
 493         files, the newlines argument to open can be used to select the line
 494         terminator(s) recognized.
 495         """
 496         self._checkClosed()
 497         if hasattr(self, "peek"):
 498             def nreadahead():
 499                 readahead = self.peek(1)
 500                 if not readahead:
 501                     return 1
 502                 n = (readahead.find(b"\n") + 1) or len(readahead)
 503                 if limit >= 0:
 504                     n = min(n, limit)
 505                 return n
 506         else:
 507             def nreadahead():
 508                 return 1
 509         if limit is None:
 510             limit = -1
 511         if not isinstance(limit, (int, long)):
 512             raise TypeError("limit must be an integer")
 513         res = bytearray()
 514         while limit < 0 or len(res) < limit:
 515             b = self.read(nreadahead())
 516             if not b:
 517                 break
 518             res += b
 519             if res.endswith(b"\n"):
 520                 break
 521         return bytes(res)
 522
 523     def __iter__(self):
 524         self._checkClosed()
 525         return self
 526
 527     def next(self):
 528         line = self.readline()
 529         if not line:
 530             raise StopIteration
 531         return line
 532
 533     def readlines(self, hint=None):
 534         """Return a list of lines from the stream.
 535
 536         hint can be specified to control the number of lines read: no more
 537         lines will be read if the total size (in bytes/characters) of all
 538         lines so far exceeds hint.
 539         """
 540         if hint is None:
 541             hint = -1
 542         if not isinstance(hint, (int, long)):
 543             raise TypeError("hint must be an integer")
 544         if hint <= 0:
 545             return list(self)
 546         n = 0
 547         lines = []
 548         for line in self:
 549             lines.append(line)
 550             n += len(line)
 551             if n >= hint:
 552                 break
 553         return lines
 554
 555     def writelines(self, lines):
 556         self._checkClosed()
 557         for line in lines:
 558             self.write(line)
 559
 560
 561 class RawIOBase(IOBase):
 562
 563     """Base class for raw binary I/O."""
 564
 565     # The read() method is implemented by calling readinto(); derived
 566     # classes that want to support read() only need to implement
 567     # readinto() as a primitive operation.  In general, readinto() can be
 568     # more efficient than read().
 569
 570     # (It would be tempting to also provide an implementation of
 571     # readinto() in terms of read(), in case the latter is a more suitable
 572     # primitive operation, but that would lead to nasty recursion in case
 573     # a subclass doesn't implement either.)
 574
 575     def read(self, n = -1):
 576         """Read and return up to n bytes.
 577
 578         Returns an empty bytes array on EOF, or None if the object is
 579         set not to block and has no data to read.
 580         """
 581         if n is None:
 582             n = -1
 583         if n < 0:
 584             return self.readall()
 585         b = bytearray(n.__index__())
 586         n = self.readinto(b)
 587         del b[n:]
 588         return bytes(b)
 589
 590     def readall(self):
 591         """Read until EOF, using multiple read() call."""
 592         res = bytearray()
 593         while True:
 594             data = self.read(DEFAULT_BUFFER_SIZE)
 595             if not data:
 596                 break
 597             res += data
 598         return bytes(res)
 599
 600     def readinto(self, b):
 601         """Read up to len(b) bytes into b.
 602
 603         Returns number of bytes read (0 for EOF), or None if the object
 604         is set not to block as has no data to read.
 605         """
 606         self._unsupported("readinto")
 607
 608     def write(self, b):
 609         """Write the given buffer to the IO stream.
 610
 611         Returns the number of bytes written, which may be less than len(b).
 612         """
 613         self._unsupported("write")
 614
 615
 616 class FileIO(_fileio._FileIO, RawIOBase):
 617
 618     """Raw I/O implementation for OS files."""
 619
 620     # This multiply inherits from _FileIO and RawIOBase to make
 621     # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
 622     # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
 623     # to do since _fileio.c is written in C).
 624
 625     def close(self):
 626         _fileio._FileIO.close(self)
 627         RawIOBase.close(self)
 628
 629     @property
 630     def name(self):
 631         return self._name
 632
 633     @property
 634     def mode(self):
 635         return self._mode
 636
 637
 638 class BufferedIOBase(IOBase):
 639
 640     """Base class for buffered IO objects.
 641
 642     The main difference with RawIOBase is that the read() method
 643     supports omitting the size argument, and does not have a default
 644     implementation that defers to readinto().
 645
 646     In addition, read(), readinto() and write() may raise
 647     BlockingIOError if the underlying raw stream is in non-blocking
 648     mode and not ready; unlike their raw counterparts, they will never
 649     return None.
 650
 651     A typical implementation should not inherit from a RawIOBase
 652     implementation, but wrap one.
 653     """
 654
 655     def read(self, n = None):
 656         """Read and return up to n bytes.
 657
 658         If the argument is omitted, None, or negative, reads and
 659         returns all data until EOF.
 660
 661         If the argument is positive, and the underlying raw stream is
 662         not 'interactive', multiple raw reads may be issued to satisfy
 663         the byte count (unless EOF is reached first).  But for
 664         interactive raw streams (XXX and for pipes?), at most one raw
 665         read will be issued, and a short result does not imply that
 666         EOF is imminent.
 667
 668         Returns an empty bytes array on EOF.
 669
 670         Raises BlockingIOError if the underlying raw stream has no
 671         data at the moment.
 672         """
 673         self._unsupported("read")
 674
 675     def readinto(self, b):
 676         """Read up to len(b) bytes into b.
 677
 678         Like read(), this may issue multiple reads to the underlying raw
 679         stream, unless the latter is 'interactive'.
 680
 681         Returns the number of bytes read (0 for EOF).
 682
 683         Raises BlockingIOError if the underlying raw stream has no
 684         data at the moment.
 685         """
 686         # XXX This ought to work with anything that supports the buffer API
 687         data = self.read(len(b))
 688         n = len(data)
 689         try:
 690             b[:n] = data
 691         except TypeError as err:
 692             import array
 693             if not isinstance(b, array.array):
 694                 raise err
 695             b[:n] = array.array(b'b', data)
 696         return n
 697
 698     def write(self, b):
 699         """Write the given buffer to the IO stream.
 700
 701         Return the number of bytes written, which is never less than
 702         len(b).
 703
 704         Raises BlockingIOError if the buffer is full and the
 705         underlying raw stream cannot accept more data at the moment.
 706         """
 707         self._unsupported("write")
 708
 709
 710 class _BufferedIOMixin(BufferedIOBase):
 711
 712     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 713
 714     This passes most requests on to the underlying raw stream.  It
 715     does *not* provide implementations of read(), readinto() or
 716     write().
 717     """
 718
 719     def __init__(self, raw):
 720         self.raw = raw
 721
 722     ### Positioning ###
 723
 724     def seek(self, pos, whence=0):
 725         return self.raw.seek(pos, whence)
 726
 727     def tell(self):
 728         return self.raw.tell()
 729
 730     def truncate(self, pos=None):
 731         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 732         # and a flush may be necessary to synch both views of the current
 733         # file state.
 734         self.flush()
 735
 736         if pos is None:
 737             pos = self.tell()
 738         # XXX: Should seek() be used, instead of passing the position
 739         # XXX  directly to truncate?
 740         return self.raw.truncate(pos)
 741
 742     ### Flush and close ###
 743
 744     def flush(self):
 745         self.raw.flush()
 746
 747     def close(self):
 748         if not self.closed:
 749             try:
 750                 self.flush()
 751             except IOError:
 752                 pass  # If flush() fails, just give up
 753             self.raw.close()
 754
 755     ### Inquiries ###
 756
 757     def seekable(self):
 758         return self.raw.seekable()
 759
 760     def readable(self):
 761         return self.raw.readable()
 762
 763     def writable(self):
 764         return self.raw.writable()
 765
 766     @property
 767     def closed(self):
 768         return self.raw.closed
 769
 770     ### Lower-level APIs ###
 771
 772     def fileno(self):
 773         return self.raw.fileno()
 774
 775     def isatty(self):
 776         return self.raw.isatty()
 777
 778
 779 class _BytesIO(BufferedIOBase):
 780
 781     """Buffered I/O implementation using an in-memory bytes buffer."""
 782
 783     # XXX More docs
 784
 785     def __init__(self, initial_bytes=None):
 786         buf = bytearray()
 787         if initial_bytes is not None:
 788             buf += bytearray(initial_bytes)
 789         self._buffer = buf
 790         self._pos = 0
 791
 792     def getvalue(self):
 793         """Return the bytes value (contents) of the buffer
 794         """
 795         if self.closed:
 796             raise ValueError("getvalue on closed file")
 797         return bytes(self._buffer)
 798
 799     def read(self, n=None):
 800         if self.closed:
 801             raise ValueError("read from closed file")
 802         if n is None:
 803             n = -1
 804         if not isinstance(n, (int, long)):
 805             raise TypeError("argument must be an integer")
 806         if n < 0:
 807             n = len(self._buffer)
 808         if len(self._buffer) <= self._pos:
 809             return b""
 810         newpos = min(len(self._buffer), self._pos + n)
 811         b = self._buffer[self._pos : newpos]
 812         self._pos = newpos
 813         return bytes(b)
 814
 815     def read1(self, n):
 816         """this is the same as read.
 817         """
 818         return self.read(n)
 819
 820     def write(self, b):
 821         if self.closed:
 822             raise ValueError("write to closed file")
 823         if isinstance(b, unicode):
 824             raise TypeError("can't write unicode to binary stream")
 825         n = len(b)
 826         if n == 0:
 827             return 0
 828         pos = self._pos
 829         if pos > len(self._buffer):
 830             # Inserts null bytes between the current end of the file
 831             # and the new write position.
 832             padding = b'\x00' * (pos - len(self._buffer))
 833             self._buffer += padding
 834         self._buffer[pos:pos + n] = b
 835         self._pos += n
 836         return n
 837
 838     def seek(self, pos, whence=0):
 839         if self.closed:
 840             raise ValueError("seek on closed file")
 841         try:
 842             pos = pos.__index__()
 843         except AttributeError as err:
 844             raise TypeError("an integer is required") # from err
 845         if whence == 0:
 846             if pos < 0:
 847                 raise ValueError("negative seek position %r" % (pos,))
 848             self._pos = pos
 849         elif whence == 1:
 850             self._pos = max(0, self._pos + pos)
 851         elif whence == 2:
 852             self._pos = max(0, len(self._buffer) + pos)
 853         else:
 854             raise ValueError("invalid whence value")
 855         return self._pos
 856
 857     def tell(self):
 858         if self.closed:
 859             raise ValueError("tell on closed file")
 860         return self._pos
 861
 862     def truncate(self, pos=None):
 863         if self.closed:
 864             raise ValueError("truncate on closed file")
 865         if pos is None:
 866             pos = self._pos
 867         elif pos < 0:
 868             raise ValueError("negative truncate position %r" % (pos,))
 869         del self._buffer[pos:]
 870         return self.seek(pos)
 871
 872     def readable(self):
 873         return True
 874
 875     def writable(self):
 876         return True
 877
 878     def seekable(self):
 879         return True
 880
 881 # Use the faster implementation of BytesIO if available
 882 try:
 883     import _bytesio
 884
 885     class BytesIO(_bytesio._BytesIO, BufferedIOBase):
 886         __doc__ = _bytesio._BytesIO.__doc__
 887
 888 except ImportError:
 889     BytesIO = _BytesIO
 890
 891
 892 class BufferedReader(_BufferedIOMixin):
 893
 894     """BufferedReader(raw[, buffer_size])
 895
 896     A buffer for a readable, sequential BaseRawIO object.
 897
 898     The constructor creates a BufferedReader for the given readable raw
 899     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 900     is used.
 901     """
 902
 903     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 904         """Create a new buffered reader using the given readable raw IO object.
 905         """
 906         raw._checkReadable()
 907         _BufferedIOMixin.__init__(self, raw)
 908         self.buffer_size = buffer_size
 909         self._reset_read_buf()
 910         self._read_lock = threading.Lock()
 911
 912     def _reset_read_buf(self):
 913         self._read_buf = b""
 914         self._read_pos = 0
 915
 916     def read(self, n=None):
 917         """Read n bytes.
 918
 919         Returns exactly n bytes of data unless the underlying raw IO
 920         stream reaches EOF or if the call would block in non-blocking
 921         mode. If n is negative, read until EOF or until read() would
 922         block.
 923         """
 924         with self._read_lock:
 925             return self._read_unlocked(n)
 926
 927     def _read_unlocked(self, n=None):
 928         nodata_val = b""
 929         empty_values = (b"", None)
 930         buf = self._read_buf
 931         pos = self._read_pos
 932
 933         # Special case for when the number of bytes to read is unspecified.
 934         if n is None or n == -1:
 935             self._reset_read_buf()
 936             chunks = [buf[pos:]]  # Strip the consumed bytes.
 937             current_size = 0
 938             while True:
 939                 # Read until EOF or until read() would block.
 940                 chunk = self.raw.read()
 941                 if chunk in empty_values:
 942                     nodata_val = chunk
 943                     break
 944                 current_size += len(chunk)
 945                 chunks.append(chunk)
 946             return b"".join(chunks) or nodata_val
 947
 948         # The number of bytes to read is specified, return at most n bytes.
 949         avail = len(buf) - pos  # Length of the available buffered data.
 950         if n <= avail:
 951             # Fast path: the data to read is fully buffered.
 952             self._read_pos += n
 953             return buf[pos:pos+n]
 954         # Slow path: read from the stream until enough bytes are read,
 955         # or until an EOF occurs or until read() would block.
 956         chunks = [buf[pos:]]
 957         wanted = max(self.buffer_size, n)
 958         while avail < n:
 959             chunk = self.raw.read(wanted)
 960             if chunk in empty_values:
 961                 nodata_val = chunk
 962                 break
 963             avail += len(chunk)
 964             chunks.append(chunk)
 965         # n is more then avail only when an EOF occurred or when
 966         # read() would have blocked.
 967         n = min(n, avail)
 968         out = b"".join(chunks)
 969         self._read_buf = out[n:]  # Save the extra data in the buffer.
 970         self._read_pos = 0
 971         return out[:n] if out else nodata_val
 972
 973     def peek(self, n=0):
 974         """Returns buffered bytes without advancing the position.
 975
 976         The argument indicates a desired minimal number of bytes; we
 977         do at most one raw read to satisfy it.  We never return more
 978         than self.buffer_size.
 979         """
 980         with self._read_lock:
 981             return self._peek_unlocked(n)
 982
 983     def _peek_unlocked(self, n=0):
 984         want = min(n, self.buffer_size)
 985         have = len(self._read_buf) - self._read_pos
 986         if have < want:
 987             to_read = self.buffer_size - have
 988             current = self.raw.read(to_read)
 989             if current:
 990                 self._read_buf = self._read_buf[self._read_pos:] + current
 991                 self._read_pos = 0
 992         return self._read_buf[self._read_pos:]
 993
 994     def read1(self, n):
 995         """Reads up to n bytes, with at most one read() system call."""
 996         # Returns up to n bytes.  If at least one byte is buffered, we
 997         # only return buffered bytes.  Otherwise, we do one raw read.
 998         if n <= 0:
 999             return b""
1000         with self._read_lock:
1001             self._peek_unlocked(1)
1002             return self._read_unlocked(
1003                 min(n, len(self._read_buf) - self._read_pos))
1004
1005     def tell(self):
1006         return self.raw.tell() - len(self._read_buf) + self._read_pos
1007
1008     def seek(self, pos, whence=0):
1009         with self._read_lock:
1010             if whence == 1:
1011                 pos -= len(self._read_buf) - self._read_pos
1012             pos = self.raw.seek(pos, whence)
1013             self._reset_read_buf()
1014             return pos
1015
1016
1017 class BufferedWriter(_BufferedIOMixin):
1018
1019     """A buffer for a writeable sequential RawIO object.
1020
1021     The constructor creates a BufferedWriter for the given writeable raw
1022     stream. If the buffer_size is not given, it defaults to
1023     DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1024     twice the buffer size.
1025     """
1026
1027     def __init__(self, raw,
1028                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1029         raw._checkWritable()
1030         _BufferedIOMixin.__init__(self, raw)
1031         self.buffer_size = buffer_size
1032         self.max_buffer_size = (2*buffer_size
1033                                 if max_buffer_size is None
1034                                 else max_buffer_size)
1035         self._write_buf = bytearray()
1036         self._write_lock = threading.Lock()
1037
1038     def write(self, b):
1039         if self.closed:
1040             raise ValueError("write to closed file")
1041         if isinstance(b, unicode):
1042             raise TypeError("can't write unicode to binary stream")
1043         with self._write_lock:
1044             # XXX we can implement some more tricks to try and avoid
1045             # partial writes
1046             if len(self._write_buf) > self.buffer_size:
1047                 # We're full, so let's pre-flush the buffer
1048                 try:
1049                     self._flush_unlocked()
1050                 except BlockingIOError as e:
1051                     # We can't accept anything else.
1052                     # XXX Why not just let the exception pass through?
1053                     raise BlockingIOError(e.errno, e.strerror, 0)
1054             before = len(self._write_buf)
1055             self._write_buf.extend(b)
1056             written = len(self._write_buf) - before
1057             if len(self._write_buf) > self.buffer_size:
1058                 try:
1059                     self._flush_unlocked()
1060                 except BlockingIOError as e:
1061                     if len(self._write_buf) > self.max_buffer_size:
1062                         # We've hit max_buffer_size. We have to accept a
1063                         # partial write and cut back our buffer.
1064                         overage = len(self._write_buf) - self.max_buffer_size
1065                         self._write_buf = self._write_buf[:self.max_buffer_size]
1066                         raise BlockingIOError(e.errno, e.strerror, overage)
1067             return written
1068
1069     def truncate(self, pos=None):
1070         with self._write_lock:
1071             self._flush_unlocked()
1072             if pos is None:
1073                 pos = self.raw.tell()
1074             return self.raw.truncate(pos)
1075
1076     def flush(self):
1077         with self._write_lock:
1078             self._flush_unlocked()
1079
1080     def _flush_unlocked(self):
1081         if self.closed:
1082             raise ValueError("flush of closed file")
1083         written = 0
1084         try:
1085             while self._write_buf:
1086                 n = self.raw.write(self._write_buf)
1087                 del self._write_buf[:n]
1088                 written += n
1089         except BlockingIOError as e:
1090             n = e.characters_written
1091             del self._write_buf[:n]
1092             written += n
1093             raise BlockingIOError(e.errno, e.strerror, written)
1094
1095     def tell(self):
1096         return self.raw.tell() + len(self._write_buf)
1097
1098     def seek(self, pos, whence=0):
1099         with self._write_lock:
1100             self._flush_unlocked()
1101             return self.raw.seek(pos, whence)
1102
1103
1104 class BufferedRWPair(BufferedIOBase):
1105
1106     """A buffered reader and writer object together.
1107
1108     A buffered reader object and buffered writer object put together to
1109     form a sequential IO object that can read and write. This is typically
1110     used with a socket or two-way pipe.
1111
1112     reader and writer are RawIOBase objects that are readable and
1113     writeable respectively. If the buffer_size is omitted it defaults to
1114     DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1115     defaults to twice the buffer size.
1116     """
1117
1118     # XXX The usefulness of this (compared to having two separate IO
1119     # objects) is questionable.
1120
1121     def __init__(self, reader, writer,
1122                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1123         """Constructor.
1124
1125         The arguments are two RawIO instances.
1126         """
1127         reader._checkReadable()
1128         writer._checkWritable()
1129         self.reader = BufferedReader(reader, buffer_size)
1130         self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1131
1132     def read(self, n=None):
1133         if n is None:
1134             n = -1
1135         return self.reader.read(n)
1136
1137     def readinto(self, b):
1138         return self.reader.readinto(b)
1139
1140     def write(self, b):
1141         return self.writer.write(b)
1142
1143     def peek(self, n=0):
1144         return self.reader.peek(n)
1145
1146     def read1(self, n):
1147         return self.reader.read1(n)
1148
1149     def readable(self):
1150         return self.reader.readable()
1151
1152     def writable(self):
1153         return self.writer.writable()
1154
1155     def flush(self):
1156         return self.writer.flush()
1157
1158     def close(self):
1159         self.writer.close()
1160         self.reader.close()
1161
1162     def isatty(self):
1163         return self.reader.isatty() or self.writer.isatty()
1164
1165     @property
1166     def closed(self):
1167         return self.writer.closed()
1168
1169
1170 class BufferedRandom(BufferedWriter, BufferedReader):
1171
1172     """A buffered interface to random access streams.
1173
1174     The constructor creates a reader and writer for a seekable stream,
1175     raw, given in the first argument. If the buffer_size is omitted it
1176     defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1177     writer) defaults to twice the buffer size.
1178     """
1179
1180     def __init__(self, raw,
1181                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1182         raw._checkSeekable()
1183         BufferedReader.__init__(self, raw, buffer_size)
1184         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1185
1186     def seek(self, pos, whence=0):
1187         self.flush()
1188         # First do the raw seek, then empty the read buffer, so that
1189         # if the raw seek fails, we don't lose buffered data forever.
1190         pos = self.raw.seek(pos, whence)
1191         with self._read_lock:
1192             self._reset_read_buf()
1193         return pos
1194
1195     def tell(self):
1196         if self._write_buf:
1197             return self.raw.tell() + len(self._write_buf)
1198         else:
1199             return BufferedReader.tell(self)
1200
1201     def truncate(self, pos=None):
1202         if pos is None:
1203             pos = self.tell()
1204         # Use seek to flush the read buffer.
1205         self.seek(pos)
1206         return BufferedWriter.truncate(self)
1207
1208     def read(self, n=None):
1209         if n is None:
1210             n = -1
1211         self.flush()
1212         return BufferedReader.read(self, n)
1213
1214     def readinto(self, b):
1215         self.flush()
1216         return BufferedReader.readinto(self, b)
1217
1218     def peek(self, n=0):
1219         self.flush()
1220         return BufferedReader.peek(self, n)
1221
1222     def read1(self, n):
1223         self.flush()
1224         return BufferedReader.read1(self, n)
1225
1226     def write(self, b):
1227         if self._read_buf:
1228             # Undo readahead
1229             with self._read_lock:
1230                 self.raw.seek(self._read_pos - len(self._read_buf), 1)
1231                 self._reset_read_buf()
1232         return BufferedWriter.write(self, b)
1233
1234
1235 class TextIOBase(IOBase):
1236
1237     """Base class for text I/O.
1238
1239     This class provides a character and line based interface to stream
1240     I/O. There is no readinto method because Python's character strings
1241     are immutable. There is no public constructor.
1242     """
1243
1244     def read(self, n = -1):
1245         """Read at most n characters from stream.
1246
1247         Read from underlying buffer until we have n characters or we hit EOF.
1248         If n is negative or omitted, read until EOF.
1249         """
1250         self._unsupported("read")
1251
1252     def write(self, s):
1253         """Write string s to stream."""
1254         self._unsupported("write")
1255
1256     def truncate(self, pos = None):
1257         """Truncate size to pos."""
1258         self._unsupported("truncate")
1259
1260     def readline(self):
1261         """Read until newline or EOF.
1262
1263         Returns an empty string if EOF is hit immediately.
1264         """
1265         self._unsupported("readline")
1266
1267     @property
1268     def encoding(self):
1269         """Subclasses should override."""
1270         return None
1271
1272     @property
1273     def newlines(self):
1274         """Line endings translated so far.
1275
1276         Only line endings translated during reading are considered.
1277
1278         Subclasses should override.
1279         """
1280         return None
1281
1282
1283 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1284     """Codec used when reading a file in universal newlines mode.
1285     It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1286     It also records the types of newlines encountered.
1287     When used with translate=False, it ensures that the newline sequence is
1288     returned in one piece.
1289     """
1290     def __init__(self, decoder, translate, errors='strict'):
1291         codecs.IncrementalDecoder.__init__(self, errors=errors)
1292         self.buffer = b''
1293         self.translate = translate
1294         self.decoder = decoder
1295         self.seennl = 0
1296
1297     def decode(self, input, final=False):
1298         # decode input (with the eventual \r from a previous pass)
1299         if self.buffer:
1300             input = self.buffer + input
1301
1302         output = self.decoder.decode(input, final=final)
1303
1304         # retain last \r even when not translating data:
1305         # then readline() is sure to get \r\n in one pass
1306         if output.endswith("\r") and not final:
1307             output = output[:-1]
1308             self.buffer = b'\r'
1309         else:
1310             self.buffer = b''
1311
1312         # Record which newlines are read
1313         crlf = output.count('\r\n')
1314         cr = output.count('\r') - crlf
1315         lf = output.count('\n') - crlf
1316         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1317                     | (crlf and self._CRLF)
1318
1319         if self.translate:
1320             if crlf:
1321                 output = output.replace("\r\n", "\n")
1322             if cr:
1323                 output = output.replace("\r", "\n")
1324
1325         return output
1326
1327     def getstate(self):
1328         buf, flag = self.decoder.getstate()
1329         return buf + self.buffer, flag
1330
1331     def setstate(self, state):
1332         buf, flag = state
1333         if buf.endswith(b'\r'):
1334             self.buffer = b'\r'
1335             buf = buf[:-1]
1336         else:
1337             self.buffer = b''
1338         self.decoder.setstate((buf, flag))
1339
1340     def reset(self):
1341         self.seennl = 0
1342         self.buffer = b''
1343         self.decoder.reset()
1344
1345     _LF = 1
1346     _CR = 2
1347     _CRLF = 4
1348
1349     @property
1350     def newlines(self):
1351         return (None,
1352                 "\n",
1353                 "\r",
1354                 ("\r", "\n"),
1355                 "\r\n",
1356                 ("\n", "\r\n"),
1357                 ("\r", "\r\n"),
1358                 ("\r", "\n", "\r\n")
1359                )[self.seennl]
1360
1361
1362 class TextIOWrapper(TextIOBase):
1363
1364     r"""Character and line based layer over a BufferedIOBase object, buffer.
1365
1366     encoding gives the name of the encoding that the stream will be
1367     decoded or encoded with. It defaults to locale.getpreferredencoding.
1368
1369     errors determines the strictness of encoding and decoding (see the
1370     codecs.register) and defaults to "strict".
1371
1372     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1373     handling of line endings. If it is None, universal newlines is
1374     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1375     or '\r\n' are translated to '\n' before being returned to the
1376     caller. Conversely, on output, '\n' is translated to the system
1377     default line seperator, os.linesep. If newline is any other of its
1378     legal values, that newline becomes the newline when the file is read
1379     and it is returned untranslated. On output, '\n' is converted to the
1380     newline.
1381
1382     If line_buffering is True, a call to flush is implied when a call to
1383     write contains a newline character.
1384     """
1385
1386     _CHUNK_SIZE = 128
1387
1388     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1389                  line_buffering=False):
1390         if newline not in (None, "", "\n", "\r", "\r\n"):
1391             raise ValueError("illegal newline value: %r" % (newline,))
1392         if encoding is None:
1393             try:
1394                 encoding = os.device_encoding(buffer.fileno())
1395             except (AttributeError, UnsupportedOperation):
1396                 pass
1397             if encoding is None:
1398                 try:
1399                     import locale
1400                 except ImportError:
1401                     # Importing locale may fail if Python is being built
1402                     encoding = "ascii"
1403                 else:
1404                     encoding = locale.getpreferredencoding()
1405
1406         if not isinstance(encoding, basestring):
1407             raise ValueError("invalid encoding: %r" % encoding)
1408
1409         if errors is None:
1410             errors = "strict"
1411         else:
1412             if not isinstance(errors, basestring):
1413                 raise ValueError("invalid errors: %r" % errors)
1414
1415         self.buffer = buffer
1416         self._line_buffering = line_buffering
1417         self._encoding = encoding
1418         self._errors = errors
1419         self._readuniversal = not newline
1420         self._readtranslate = newline is None
1421         self._readnl = newline
1422         self._writetranslate = newline != ''
1423         self._writenl = newline or os.linesep
1424         self._encoder = None
1425         self._decoder = None
1426         self._decoded_chars = ''  # buffer for text returned from decoder
1427         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1428         self._snapshot = None  # info for reconstructing decoder state
1429         self._seekable = self._telling = self.buffer.seekable()
1430
1431     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1432     # where dec_flags is the second (integer) item of the decoder state
1433     # and next_input is the chunk of input bytes that comes next after the
1434     # snapshot point.  We use this to reconstruct decoder states in tell().
1435
1436     # Naming convention:
1437     #   - "bytes_..." for integer variables that count input bytes
1438     #   - "chars_..." for integer variables that count decoded characters
1439
1440     @property
1441     def encoding(self):
1442         return self._encoding
1443
1444     @property
1445     def errors(self):
1446         return self._errors
1447
1448     @property
1449     def line_buffering(self):
1450         return self._line_buffering
1451
1452     def seekable(self):
1453         return self._seekable
1454
1455     def readable(self):
1456         return self.buffer.readable()
1457
1458     def writable(self):
1459         return self.buffer.writable()
1460
1461     def flush(self):
1462         self.buffer.flush()
1463         self._telling = self._seekable
1464
1465     def close(self):
1466         try:
1467             self.flush()
1468         except:
1469             pass  # If flush() fails, just give up
1470         self.buffer.close()
1471
1472     @property
1473     def closed(self):
1474         return self.buffer.closed
1475
1476     def fileno(self):
1477         return self.buffer.fileno()
1478
1479     def isatty(self):
1480         return self.buffer.isatty()
1481
1482     def write(self, s):
1483         if self.closed:
1484             raise ValueError("write to closed file")
1485         if not isinstance(s, unicode):
1486             raise TypeError("can't write %s to text stream" %
1487                             s.__class__.__name__)
1488         length = len(s)
1489         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1490         if haslf and self._writetranslate and self._writenl != "\n":
1491             s = s.replace("\n", self._writenl)
1492         encoder = self._encoder or self._get_encoder()
1493         # XXX What if we were just reading?
1494         b = encoder.encode(s)
1495         self.buffer.write(b)
1496         if self._line_buffering and (haslf or "\r" in s):
1497             self.flush()
1498         self._snapshot = None
1499         if self._decoder:
1500             self._decoder.reset()
1501         return length
1502
1503     def _get_encoder(self):
1504         make_encoder = codecs.getincrementalencoder(self._encoding)
1505         self._encoder = make_encoder(self._errors)
1506         return self._encoder
1507
1508     def _get_decoder(self):
1509         make_decoder = codecs.getincrementaldecoder(self._encoding)
1510         decoder = make_decoder(self._errors)
1511         if self._readuniversal:
1512             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1513         self._decoder = decoder
1514         return decoder
1515
1516     # The following three methods implement an ADT for _decoded_chars.
1517     # Text returned from the decoder is buffered here until the client
1518     # requests it by calling our read() or readline() method.
1519     def _set_decoded_chars(self, chars):
1520         """Set the _decoded_chars buffer."""
1521         self._decoded_chars = chars
1522         self._decoded_chars_used = 0
1523
1524     def _get_decoded_chars(self, n=None):
1525         """Advance into the _decoded_chars buffer."""
1526         offset = self._decoded_chars_used
1527         if n is None:
1528             chars = self._decoded_chars[offset:]
1529         else:
1530             chars = self._decoded_chars[offset:offset + n]
1531         self._decoded_chars_used += len(chars)
1532         return chars
1533
1534     def _rewind_decoded_chars(self, n):
1535         """Rewind the _decoded_chars buffer."""
1536         if self._decoded_chars_used < n:
1537             raise AssertionError("rewind decoded_chars out of bounds")
1538         self._decoded_chars_used -= n
1539
1540     def _read_chunk(self):
1541         """
1542         Read and decode the next chunk of data from the BufferedReader.
1543
1544         The return value is True unless EOF was reached.  The decoded string
1545         is placed in self._decoded_chars (replacing its previous value).
1546         The entire input chunk is sent to the decoder, though some of it
1547         may remain buffered in the decoder, yet to be converted.
1548         """
1549
1550         if self._decoder is None:
1551             raise ValueError("no decoder")
1552
1553         if self._telling:
1554             # To prepare for tell(), we need to snapshot a point in the
1555             # file where the decoder's input buffer is empty.
1556
1557             dec_buffer, dec_flags = self._decoder.getstate()
1558             # Given this, we know there was a valid snapshot point
1559             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1560
1561         # Read a chunk, decode it, and put the result in self._decoded_chars.
1562         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1563         eof = not input_chunk
1564         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1565
1566         if self._telling:
1567             # At the snapshot point, len(dec_buffer) bytes before the read,
1568             # the next input to be decoded is dec_buffer + input_chunk.
1569             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1570
1571         return not eof
1572
1573     def _pack_cookie(self, position, dec_flags=0,
1574                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1575         # The meaning of a tell() cookie is: seek to position, set the
1576         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1577         # into the decoder with need_eof as the EOF flag, then skip
1578         # chars_to_skip characters of the decoded result.  For most simple
1579         # decoders, tell() will often just give a byte offset in the file.
1580         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1581                (chars_to_skip<<192) | bool(need_eof)<<256)
1582
1583     def _unpack_cookie(self, bigint):
1584         rest, position = divmod(bigint, 1<<64)
1585         rest, dec_flags = divmod(rest, 1<<64)
1586         rest, bytes_to_feed = divmod(rest, 1<<64)
1587         need_eof, chars_to_skip = divmod(rest, 1<<64)
1588         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1589
1590     def tell(self):
1591         if not self._seekable:
1592             raise IOError("underlying stream is not seekable")
1593         if not self._telling:
1594             raise IOError("telling position disabled by next() call")
1595         self.flush()
1596         position = self.buffer.tell()
1597         decoder = self._decoder
1598         if decoder is None or self._snapshot is None:
1599             if self._decoded_chars:
1600                 # This should never happen.
1601                 raise AssertionError("pending decoded text")
1602             return position
1603
1604         # Skip backward to the snapshot point (see _read_chunk).
1605         dec_flags, next_input = self._snapshot
1606         position -= len(next_input)
1607
1608         # How many decoded characters have been used up since the snapshot?
1609         chars_to_skip = self._decoded_chars_used
1610         if chars_to_skip == 0:
1611             # We haven't moved from the snapshot point.
1612             return self._pack_cookie(position, dec_flags)
1613
1614         # Starting from the snapshot position, we will walk the decoder
1615         # forward until it gives us enough decoded characters.
1616         saved_state = decoder.getstate()
1617         try:
1618             # Note our initial start point.
1619             decoder.setstate((b'', dec_flags))
1620             start_pos = position
1621             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1622             need_eof = 0
1623
1624             # Feed the decoder one byte at a time.  As we go, note the
1625             # nearest "safe start point" before the current location
1626             # (a point where the decoder has nothing buffered, so seek()
1627             # can safely start from there and advance to this location).
1628             for next_byte in next_input:
1629                 bytes_fed += 1
1630                 chars_decoded += len(decoder.decode(next_byte))
1631                 dec_buffer, dec_flags = decoder.getstate()
1632                 if not dec_buffer and chars_decoded <= chars_to_skip:
1633                     # Decoder buffer is empty, so this is a safe start point.
1634                     start_pos += bytes_fed
1635                     chars_to_skip -= chars_decoded
1636                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1637                 if chars_decoded >= chars_to_skip:
1638                     break
1639             else:
1640                 # We didn't get enough decoded data; signal EOF to get more.
1641                 chars_decoded += len(decoder.decode(b'', final=True))
1642                 need_eof = 1
1643                 if chars_decoded < chars_to_skip:
1644                     raise IOError("can't reconstruct logical file position")
1645
1646             # The returned cookie corresponds to the last safe start point.
1647             return self._pack_cookie(
1648                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1649         finally:
1650             decoder.setstate(saved_state)
1651
1652     def truncate(self, pos=None):
1653         self.flush()
1654         if pos is None:
1655             pos = self.tell()
1656         self.seek(pos)
1657         return self.buffer.truncate()
1658
1659     def seek(self, cookie, whence=0):
1660         if self.closed:
1661             raise ValueError("tell on closed file")
1662         if not self._seekable:
1663             raise IOError("underlying stream is not seekable")
1664         if whence == 1: # seek relative to current position
1665             if cookie != 0:
1666                 raise IOError("can't do nonzero cur-relative seeks")
1667             # Seeking to the current position should attempt to
1668             # sync the underlying buffer with the current position.
1669             whence = 0
1670             cookie = self.tell()
1671         if whence == 2: # seek relative to end of file
1672             if cookie != 0:
1673                 raise IOError("can't do nonzero end-relative seeks")
1674             self.flush()
1675             position = self.buffer.seek(0, 2)
1676             self._set_decoded_chars('')
1677             self._snapshot = None
1678             if self._decoder:
1679                 self._decoder.reset()
1680             return position
1681         if whence != 0:
1682             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1683                              (whence,))
1684         if cookie < 0:
1685             raise ValueError("negative seek position %r" % (cookie,))
1686         self.flush()
1687
1688         # The strategy of seek() is to go back to the safe start point
1689         # and replay the effect of read(chars_to_skip) from there.
1690         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1691             self._unpack_cookie(cookie)
1692
1693         # Seek back to the safe start point.
1694         self.buffer.seek(start_pos)
1695         self._set_decoded_chars('')
1696         self._snapshot = None
1697
1698         # Restore the decoder to its state from the safe start point.
1699         if self._decoder or dec_flags or chars_to_skip:
1700             self._decoder = self._decoder or self._get_decoder()
1701             self._decoder.setstate((b'', dec_flags))
1702             self._snapshot = (dec_flags, b'')
1703
1704         if chars_to_skip:
1705             # Just like _read_chunk, feed the decoder and save a snapshot.
1706             input_chunk = self.buffer.read(bytes_to_feed)
1707             self._set_decoded_chars(
1708                 self._decoder.decode(input_chunk, need_eof))
1709             self._snapshot = (dec_flags, input_chunk)
1710
1711             # Skip chars_to_skip of the decoded characters.
1712             if len(self._decoded_chars) < chars_to_skip:
1713                 raise IOError("can't restore logical file position")
1714             self._decoded_chars_used = chars_to_skip
1715
1716         return cookie
1717
1718     def read(self, n=None):
1719         if n is None:
1720             n = -1
1721         decoder = self._decoder or self._get_decoder()
1722         if n < 0:
1723             # Read everything.
1724             result = (self._get_decoded_chars() +
1725                       decoder.decode(self.buffer.read(), final=True))
1726             self._set_decoded_chars('')
1727             self._snapshot = None
1728             return result
1729         else:
1730             # Keep reading chunks until we have n characters to return.
1731             eof = False
1732             result = self._get_decoded_chars(n)
1733             while len(result) < n and not eof:
1734                 eof = not self._read_chunk()
1735                 result += self._get_decoded_chars(n - len(result))
1736             return result
1737
1738     def next(self):
1739         self._telling = False
1740         line = self.readline()
1741         if not line:
1742             self._snapshot = None
1743             self._telling = self._seekable
1744             raise StopIteration
1745         return line
1746
1747     def readline(self, limit=None):
1748         if self.closed:
1749             raise ValueError("read from closed file")
1750         if limit is None:
1751             limit = -1
1752         if not isinstance(limit, (int, long)):
1753             raise TypeError("limit must be an integer")
1754
1755         # Grab all the decoded text (we will rewind any extra bits later).
1756         line = self._get_decoded_chars()
1757
1758         start = 0
1759         decoder = self._decoder or self._get_decoder()
1760
1761         pos = endpos = None
1762         while True:
1763             if self._readtranslate:
1764                 # Newlines are already translated, only search for \n
1765                 pos = line.find('\n', start)
1766                 if pos >= 0:
1767                     endpos = pos + 1
1768                     break
1769                 else:
1770                     start = len(line)
1771
1772             elif self._readuniversal:
1773                 # Universal newline search. Find any of \r, \r\n, \n
1774                 # The decoder ensures that \r\n are not split in two pieces
1775
1776                 # In C we'd look for these in parallel of course.
1777                 nlpos = line.find("\n", start)
1778                 crpos = line.find("\r", start)
1779                 if crpos == -1:
1780                     if nlpos == -1:
1781                         # Nothing found
1782                         start = len(line)
1783                     else:
1784                         # Found \n
1785                         endpos = nlpos + 1
1786                         break
1787                 elif nlpos == -1:
1788                     # Found lone \r
1789                     endpos = crpos + 1
1790                     break
1791                 elif nlpos < crpos:
1792                     # Found \n
1793                     endpos = nlpos + 1
1794                     break
1795                 elif nlpos == crpos + 1:
1796                     # Found \r\n
1797                     endpos = crpos + 2
1798                     break
1799                 else:
1800                     # Found \r
1801                     endpos = crpos + 1
1802                     break
1803             else:
1804                 # non-universal
1805                 pos = line.find(self._readnl)
1806                 if pos >= 0:
1807                     endpos = pos + len(self._readnl)
1808                     break
1809
1810             if limit >= 0 and len(line) >= limit:
1811                 endpos = limit  # reached length limit
1812                 break
1813
1814             # No line ending seen yet - get more data
1815             more_line = ''
1816             while self._read_chunk():
1817                 if self._decoded_chars:
1818                     break
1819             if self._decoded_chars:
1820                 line += self._get_decoded_chars()
1821             else:
1822                 # end of file
1823                 self._set_decoded_chars('')
1824                 self._snapshot = None
1825                 return line
1826
1827         if limit >= 0 and endpos > limit:
1828             endpos = limit  # don't exceed limit
1829
1830         # Rewind _decoded_chars to just after the line ending we found.
1831         self._rewind_decoded_chars(len(line) - endpos)
1832         return line[:endpos]
1833
1834     @property
1835     def newlines(self):
1836         return self._decoder.newlines if self._decoder else None
1837
1838 class StringIO(TextIOWrapper):
1839
1840     """An in-memory stream for text. The initial_value argument sets the
1841     value of object. The other arguments are like those of TextIOWrapper's
1842     constructor.
1843     """
1844
1845     def __init__(self, initial_value="", encoding="utf-8",
1846                  errors="strict", newline="\n"):
1847         super(StringIO, self).__init__(BytesIO(),
1848                                        encoding=encoding,
1849                                        errors=errors,
1850                                        newline=newline)
1851         if initial_value:
1852             if not isinstance(initial_value, unicode):
1853                 initial_value = unicode(initial_value)
1854             self.write(initial_value)
1855             self.seek(0)
1856
1857     def getvalue(self):
1858         self.flush()
1859         return self.buffer.getvalue().decode(self._encoding, self._errors)