Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include "structmember.h"
   6
   7 #ifdef HAVE_SYS_TYPES_H
   8 #include <sys/types.h>
   9 #endif /* HAVE_SYS_TYPES_H */
  10
  11 #ifdef MS_WINDOWS
  12 #define fileno _fileno
  13 /* can simulate truncate with Win32 API functions; see file_truncate */
  14 #define HAVE_FTRUNCATE
  15 #define WIN32_LEAN_AND_MEAN
  16 #include <windows.h>
  17 #endif
  18
  19 #ifdef _MSC_VER
  20 /* Need GetVersion to see if on NT so safe to use _wfopen */
  21 #define WIN32_LEAN_AND_MEAN
  22 #include <windows.h>
  23 #endif /* _MSC_VER */
  24
  25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  26 #include <io.h>
  27 #endif
  28
  29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  30
  31 #ifndef DONT_HAVE_ERRNO_H
  32 #include <errno.h>
  33 #endif
  34
  35 #ifdef HAVE_GETC_UNLOCKED
  36 #define GETC(f) getc_unlocked(f)
  37 #define FLOCKFILE(f) flockfile(f)
  38 #define FUNLOCKFILE(f) funlockfile(f)
  39 #else
  40 #define GETC(f) getc(f)
  41 #define FLOCKFILE(f)
  42 #define FUNLOCKFILE(f)
  43 #endif
  44
  45 /* Bits in f_newlinetypes */
  46 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  47 #define NEWLINE_CR 1            /* \r newline seen */
  48 #define NEWLINE_LF 2            /* \n newline seen */
  49 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  50
  51 #ifdef __cplusplus
  52 extern "C" {
  53 #endif
  54
  55 FILE *
  56 PyFile_AsFile(PyObject *f)
  57 {
  58         if (f == NULL || !PyFile_Check(f))
  59                 return NULL;
  60         else
  61                 return ((PyFileObject *)f)->f_fp;
  62 }
  63
  64 PyObject *
  65 PyFile_Name(PyObject *f)
  66 {
  67         if (f == NULL || !PyFile_Check(f))
  68                 return NULL;
  69         else
  70                 return ((PyFileObject *)f)->f_name;
  71 }
  72
  73 /* On Unix, fopen will succeed for directories.
  74    In Python, there should be no file objects referring to
  75    directories, so we need a check.  */
  76
  77 static PyFileObject*
  78 dircheck(PyFileObject* f)
  79 {
  80 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
  81         struct stat buf;
  82         if (f->f_fp == NULL)
  83                 return f;
  84         if (fstat(fileno(f->f_fp), &buf) == 0 &&
  85             S_ISDIR(buf.st_mode)) {
  86                 char *msg = strerror(EISDIR);
  87                 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
  88                                                       EISDIR, msg);
  89                 PyErr_SetObject(PyExc_IOError, exc);
  90                 Py_XDECREF(exc);
  91                 return NULL;
  92         }
  93 #endif
  94         return f;
  95 }
  96
  97
  98 static PyObject *
  99 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
 100                  int (*close)(FILE *))
 101 {
 102         assert(name != NULL);
 103         assert(f != NULL);
 104         assert(PyFile_Check(f));
 105         assert(f->f_fp == NULL);
 106
 107         Py_DECREF(f->f_name);
 108         Py_DECREF(f->f_mode);
 109         Py_DECREF(f->f_encoding);
 110
 111         Py_INCREF(name);
 112         f->f_name = name;
 113
 114         f->f_mode = PyString_FromString(mode);
 115
 116         f->f_close = close;
 117         f->f_softspace = 0;
 118         f->f_binary = strchr(mode,'b') != NULL;
 119         f->f_buf = NULL;
 120         f->f_univ_newline = (strchr(mode, 'U') != NULL);
 121         f->f_newlinetypes = NEWLINE_UNKNOWN;
 122         f->f_skipnextlf = 0;
 123         Py_INCREF(Py_None);
 124         f->f_encoding = Py_None;
 125
 126         if (f->f_mode == NULL)
 127                 return NULL;
 128         f->f_fp = fp;
 129         f = dircheck(f);
 130         return (PyObject *) f;
 131 }
 132
 133 /* check for known incorrect mode strings - problem is, platforms are
 134    free to accept any mode characters they like and are supposed to
 135    ignore stuff they don't understand... write or append mode with
 136    universal newline support is expressly forbidden by PEP 278.
 137    Additionally, remove the 'U' from the mode string as platforms
 138    won't know what it is. Non-zero return signals an exception */
 139 int
 140 _PyFile_SanitizeMode(char *mode)
 141 {
 142         char *upos;
 143         size_t len = strlen(mode);
 144
 145         if (!len) {
 146                 PyErr_SetString(PyExc_ValueError, "empty mode string");
 147                 return -1;
 148         }
 149
 150         upos = strchr(mode, 'U');
 151         if (upos) {
 152                 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
 153
 154                 if (mode[0] == 'w' || mode[0] == 'a') {
 155                         PyErr_Format(PyExc_ValueError, "universal newline "
 156                                      "mode can only be used with modes "
 157                                      "starting with 'r'");
 158                         return -1;
 159                 }
 160
 161                 if (mode[0] != 'r') {
 162                         memmove(mode+1, mode, strlen(mode)+1);
 163                         mode[0] = 'r';
 164                 }
 165
 166                 if (!strchr(mode, 'b')) {
 167                         memmove(mode+2, mode+1, strlen(mode));
 168                         mode[1] = 'b';
 169                 }
 170         } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
 171                 PyErr_Format(PyExc_ValueError, "mode string must begin with "
 172                             "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
 173                 return -1;
 174         }
 175
 176         return 0;
 177 }
 178
 179 static PyObject *
 180 open_the_file(PyFileObject *f, char *name, char *mode)
 181 {
 182         char *newmode;
 183         assert(f != NULL);
 184         assert(PyFile_Check(f));
 185 #ifdef MS_WINDOWS
 186         /* windows ignores the passed name in order to support Unicode */
 187         assert(f->f_name != NULL);
 188 #else
 189         assert(name != NULL);
 190 #endif
 191         assert(mode != NULL);
 192         assert(f->f_fp == NULL);
 193
 194         /* probably need to replace 'U' by 'rb' */
 195         newmode = PyMem_MALLOC(strlen(mode) + 3);
 196         if (!newmode) {
 197                 PyErr_NoMemory();
 198                 return NULL;
 199         }
 200         strcpy(newmode, mode);
 201
 202         if (_PyFile_SanitizeMode(newmode)) {
 203                 f = NULL;
 204                 goto cleanup;
 205         }
 206
 207         /* rexec.py can't stop a user from getting the file() constructor --
 208            all they have to do is get *any* file object f, and then do
 209            type(f).  Here we prevent them from doing damage with it. */
 210         if (PyEval_GetRestricted()) {
 211                 PyErr_SetString(PyExc_IOError,
 212                 "file() constructor not accessible in restricted mode");
 213                 f = NULL;
 214                 goto cleanup;
 215         }
 216         errno = 0;
 217
 218 #ifdef MS_WINDOWS
 219         if (PyUnicode_Check(f->f_name)) {
 220                 PyObject *wmode;
 221                 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
 222                 if (f->f_name && wmode) {
 223                         Py_BEGIN_ALLOW_THREADS
 224                         /* PyUnicode_AS_UNICODE OK without thread
 225                            lock as it is a simple dereference. */
 226                         f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 227                                           PyUnicode_AS_UNICODE(wmode));
 228                         Py_END_ALLOW_THREADS
 229                 }
 230                 Py_XDECREF(wmode);
 231         }
 232 #endif
 233         if (NULL == f->f_fp && NULL != name) {
 234                 Py_BEGIN_ALLOW_THREADS
 235                 f->f_fp = fopen(name, newmode);
 236                 Py_END_ALLOW_THREADS
 237         }
 238
 239         if (f->f_fp == NULL) {
 240 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
 241                 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 242                  * across all Windows flavors.  When it sets EINVAL varies
 243                  * across Windows flavors, the exact conditions aren't
 244                  * documented, and the answer lies in the OS's implementation
 245                  * of Win32's CreateFile function (whose source is secret).
 246                  * Seems the best we can do is map EINVAL to ENOENT.
 247                  * Starting with Visual Studio .NET 2005, EINVAL is correctly
 248                  * set by our CRT error handler (set in exceptions.c.)
 249                  */
 250                 if (errno == 0) /* bad mode string */
 251                         errno = EINVAL;
 252                 else if (errno == EINVAL) /* unknown, but not a mode string */
 253                         errno = ENOENT;
 254 #endif
 255                 /* EINVAL is returned when an invalid filename or
 256                  * an invalid mode is supplied. */
 257                 if (errno == EINVAL)
 258                         PyErr_Format(PyExc_IOError,
 259                                      "invalid filename: %s or mode: %s",
 260                                      name, mode);
 261                 else
 262                         PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 263                 f = NULL;
 264         }
 265         if (f != NULL)
 266                 f = dircheck(f);
 267
 268 cleanup:
 269         PyMem_FREE(newmode);
 270
 271         return (PyObject *)f;
 272 }
 273
 274 PyObject *
 275 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 276 {
 277         PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 278                                                              NULL, NULL);
 279         if (f != NULL) {
 280                 PyObject *o_name = PyString_FromString(name);
 281                 if (o_name == NULL)
 282                         return NULL;
 283                 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 284                         Py_DECREF(f);
 285                         f = NULL;
 286                 }
 287                 Py_DECREF(o_name);
 288         }
 289         return (PyObject *) f;
 290 }
 291
 292 PyObject *
 293 PyFile_FromString(char *name, char *mode)
 294 {
 295         extern int fclose(FILE *);
 296         PyFileObject *f;
 297
 298         f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 299         if (f != NULL) {
 300                 if (open_the_file(f, name, mode) == NULL) {
 301                         Py_DECREF(f);
 302                         f = NULL;
 303                 }
 304         }
 305         return (PyObject *)f;
 306 }
 307
 308 void
 309 PyFile_SetBufSize(PyObject *f, int bufsize)
 310 {
 311         PyFileObject *file = (PyFileObject *)f;
 312         if (bufsize >= 0) {
 313                 int type;
 314                 switch (bufsize) {
 315                 case 0:
 316                         type = _IONBF;
 317                         break;
 318 #ifdef HAVE_SETVBUF
 319                 case 1:
 320                         type = _IOLBF;
 321                         bufsize = BUFSIZ;
 322                         break;
 323 #endif
 324                 default:
 325                         type = _IOFBF;
 326 #ifndef HAVE_SETVBUF
 327                         bufsize = BUFSIZ;
 328 #endif
 329                         break;
 330                 }
 331                 fflush(file->f_fp);
 332                 if (type == _IONBF) {
 333                         PyMem_Free(file->f_setbuf);
 334                         file->f_setbuf = NULL;
 335                 } else {
 336                         file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
 337                                                                 bufsize);
 338                 }
 339 #ifdef HAVE_SETVBUF
 340                 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 341 #else /* !HAVE_SETVBUF */
 342                 setbuf(file->f_fp, file->f_setbuf);
 343 #endif /* !HAVE_SETVBUF */
 344         }
 345 }
 346
 347 /* Set the encoding used to output Unicode strings.
 348    Returh 1 on success, 0 on failure. */
 349
 350 int
 351 PyFile_SetEncoding(PyObject *f, const char *enc)
 352 {
 353         PyFileObject *file = (PyFileObject*)f;
 354         PyObject *str = PyString_FromString(enc);
 355
 356         assert(PyFile_Check(f));
 357         if (!str)
 358                 return 0;
 359         Py_DECREF(file->f_encoding);
 360         file->f_encoding = str;
 361         return 1;
 362 }
 363
 364 static PyObject *
 365 err_closed(void)
 366 {
 367         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 368         return NULL;
 369 }
 370
 371 /* Refuse regular file I/O if there's data in the iteration-buffer.
 372  * Mixing them would cause data to arrive out of order, as the read*
 373  * methods don't use the iteration buffer. */
 374 static PyObject *
 375 err_iterbuffered(void)
 376 {
 377         PyErr_SetString(PyExc_ValueError,
 378                 "Mixing iteration and read methods would lose data");
 379         return NULL;
 380 }
 381
 382 static void drop_readahead(PyFileObject *);
 383
 384 /* Methods */
 385
 386 static void
 387 file_dealloc(PyFileObject *f)
 388 {
 389         int sts = 0;
 390         if (f->weakreflist != NULL)
 391                 PyObject_ClearWeakRefs((PyObject *) f);
 392         if (f->f_fp != NULL && f->f_close != NULL) {
 393                 Py_BEGIN_ALLOW_THREADS
 394                 sts = (*f->f_close)(f->f_fp);
 395                 Py_END_ALLOW_THREADS
 396                 if (sts == EOF)
 397                         PySys_WriteStderr("close failed: [Errno %d] %s\n", errno, strerror(errno));
 398         }
 399         PyMem_Free(f->f_setbuf);
 400         Py_XDECREF(f->f_name);
 401         Py_XDECREF(f->f_mode);
 402         Py_XDECREF(f->f_encoding);
 403         drop_readahead(f);
 404         Py_TYPE(f)->tp_free((PyObject *)f);
 405 }
 406
 407 static PyObject *
 408 file_repr(PyFileObject *f)
 409 {
 410         if (PyUnicode_Check(f->f_name)) {
 411 #ifdef Py_USING_UNICODE
 412                 PyObject *ret = NULL;
 413                 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 414                 const char *name_str = name ? PyString_AsString(name) : "?";
 415                 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 416                                    f->f_fp == NULL ? "closed" : "open",
 417                                    name_str,
 418                                    PyString_AsString(f->f_mode),
 419                                    f);
 420                 Py_XDECREF(name);
 421                 return ret;
 422 #endif
 423         } else {
 424                 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 425                                    f->f_fp == NULL ? "closed" : "open",
 426                                    PyString_AsString(f->f_name),
 427                                    PyString_AsString(f->f_mode),
 428                                    f);
 429         }
 430 }
 431
 432 static PyObject *
 433 file_close(PyFileObject *f)
 434 {
 435         int sts = 0;
 436         if (f->f_fp != NULL) {
 437                 if (f->f_close != NULL) {
 438                         Py_BEGIN_ALLOW_THREADS
 439                         errno = 0;
 440                         sts = (*f->f_close)(f->f_fp);
 441                         Py_END_ALLOW_THREADS
 442                 }
 443                 f->f_fp = NULL;
 444         }
 445         PyMem_Free(f->f_setbuf);
 446         f->f_setbuf = NULL;
 447         if (sts == EOF)
 448                 return PyErr_SetFromErrno(PyExc_IOError);
 449         if (sts != 0)
 450                 return PyInt_FromLong((long)sts);
 451         Py_INCREF(Py_None);
 452         return Py_None;
 453 }
 454
 455
 456 /* Our very own off_t-like type, 64-bit if possible */
 457 #if !defined(HAVE_LARGEFILE_SUPPORT)
 458 typedef off_t Py_off_t;
 459 #elif SIZEOF_OFF_T >= 8
 460 typedef off_t Py_off_t;
 461 #elif SIZEOF_FPOS_T >= 8
 462 typedef fpos_t Py_off_t;
 463 #else
 464 #error "Large file support, but neither off_t nor fpos_t is large enough."
 465 #endif
 466
 467
 468 /* a portable fseek() function
 469    return 0 on success, non-zero on failure (with errno set) */
 470 static int
 471 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 472 {
 473 #if !defined(HAVE_LARGEFILE_SUPPORT)
 474         return fseek(fp, offset, whence);
 475 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 476         return fseeko(fp, offset, whence);
 477 #elif defined(HAVE_FSEEK64)
 478         return fseek64(fp, offset, whence);
 479 #elif defined(__BEOS__)
 480         return _fseek(fp, offset, whence);
 481 #elif SIZEOF_FPOS_T >= 8
 482         /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 483            and fgetpos() to implement fseek()*/
 484         fpos_t pos;
 485         switch (whence) {
 486         case SEEK_END:
 487 #ifdef MS_WINDOWS
 488                 fflush(fp);
 489                 if (_lseeki64(fileno(fp), 0, 2) == -1)
 490                         return -1;
 491 #else
 492                 if (fseek(fp, 0, SEEK_END) != 0)
 493                         return -1;
 494 #endif
 495                 /* fall through */
 496         case SEEK_CUR:
 497                 if (fgetpos(fp, &pos) != 0)
 498                         return -1;
 499                 offset += pos;
 500                 break;
 501         /* case SEEK_SET: break; */
 502         }
 503         return fsetpos(fp, &offset);
 504 #else
 505 #error "Large file support, but no way to fseek."
 506 #endif
 507 }
 508
 509
 510 /* a portable ftell() function
 511    Return -1 on failure with errno set appropriately, current file
 512    position on success */
 513 static Py_off_t
 514 _portable_ftell(FILE* fp)
 515 {
 516 #if !defined(HAVE_LARGEFILE_SUPPORT)
 517         return ftell(fp);
 518 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 519         return ftello(fp);
 520 #elif defined(HAVE_FTELL64)
 521         return ftell64(fp);
 522 #elif SIZEOF_FPOS_T >= 8
 523         fpos_t pos;
 524         if (fgetpos(fp, &pos) != 0)
 525                 return -1;
 526         return pos;
 527 #else
 528 #error "Large file support, but no way to ftell."
 529 #endif
 530 }
 531
 532
 533 static PyObject *
 534 file_seek(PyFileObject *f, PyObject *args)
 535 {
 536         int whence;
 537         int ret;
 538         Py_off_t offset;
 539         PyObject *offobj, *off_index;
 540
 541         if (f->f_fp == NULL)
 542                 return err_closed();
 543         drop_readahead(f);
 544         whence = 0;
 545         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 546                 return NULL;
 547         off_index = PyNumber_Index(offobj);
 548         if (!off_index) {
 549                 if (!PyFloat_Check(offobj))
 550                         return NULL;
 551                 /* Deprecated in 2.6 */
 552                 PyErr_Clear();
 553                 if (PyErr_Warn(PyExc_DeprecationWarning,
 554                                "integer argument expected, got float"))
 555                         return NULL;
 556                 off_index = offobj;
 557                 Py_INCREF(offobj);
 558         }
 559 #if !defined(HAVE_LARGEFILE_SUPPORT)
 560         offset = PyInt_AsLong(off_index);
 561 #else
 562         offset = PyLong_Check(off_index) ?
 563                 PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
 564 #endif
 565         Py_DECREF(off_index);
 566         if (PyErr_Occurred())
 567                 return NULL;
 568
 569         Py_BEGIN_ALLOW_THREADS
 570         errno = 0;
 571         ret = _portable_fseek(f->f_fp, offset, whence);
 572         Py_END_ALLOW_THREADS
 573
 574         if (ret != 0) {
 575                 PyErr_SetFromErrno(PyExc_IOError);
 576                 clearerr(f->f_fp);
 577                 return NULL;
 578         }
 579         f->f_skipnextlf = 0;
 580         Py_INCREF(Py_None);
 581         return Py_None;
 582 }
 583
 584
 585 #ifdef HAVE_FTRUNCATE
 586 static PyObject *
 587 file_truncate(PyFileObject *f, PyObject *args)
 588 {
 589         Py_off_t newsize;
 590         PyObject *newsizeobj = NULL;
 591         Py_off_t initialpos;
 592         int ret;
 593
 594         if (f->f_fp == NULL)
 595                 return err_closed();
 596         if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 597                 return NULL;
 598
 599         /* Get current file position.  If the file happens to be open for
 600          * update and the last operation was an input operation, C doesn't
 601          * define what the later fflush() will do, but we promise truncate()
 602          * won't change the current position (and fflush() *does* change it
 603          * then at least on Windows).  The easiest thing is to capture
 604          * current pos now and seek back to it at the end.
 605          */
 606         Py_BEGIN_ALLOW_THREADS
 607         errno = 0;
 608         initialpos = _portable_ftell(f->f_fp);
 609         Py_END_ALLOW_THREADS
 610         if (initialpos == -1)
 611                 goto onioerror;
 612
 613         /* Set newsize to current postion if newsizeobj NULL, else to the
 614          * specified value.
 615          */
 616         if (newsizeobj != NULL) {
 617 #if !defined(HAVE_LARGEFILE_SUPPORT)
 618                 newsize = PyInt_AsLong(newsizeobj);
 619 #else
 620                 newsize = PyLong_Check(newsizeobj) ?
 621                                 PyLong_AsLongLong(newsizeobj) :
 622                                 PyInt_AsLong(newsizeobj);
 623 #endif
 624                 if (PyErr_Occurred())
 625                         return NULL;
 626         }
 627         else /* default to current position */
 628                 newsize = initialpos;
 629
 630         /* Flush the stream.  We're mixing stream-level I/O with lower-level
 631          * I/O, and a flush may be necessary to synch both platform views
 632          * of the current file state.
 633          */
 634         Py_BEGIN_ALLOW_THREADS
 635         errno = 0;
 636         ret = fflush(f->f_fp);
 637         Py_END_ALLOW_THREADS
 638         if (ret != 0)
 639                 goto onioerror;
 640
 641 #ifdef MS_WINDOWS
 642         /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 643            so don't even try using it. */
 644         {
 645                 HANDLE hFile;
 646
 647                 /* Have to move current pos to desired endpoint on Windows. */
 648                 Py_BEGIN_ALLOW_THREADS
 649                 errno = 0;
 650                 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 651                 Py_END_ALLOW_THREADS
 652                 if (ret)
 653                         goto onioerror;
 654
 655                 /* Truncate.  Note that this may grow the file! */
 656                 Py_BEGIN_ALLOW_THREADS
 657                 errno = 0;
 658                 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 659                 ret = hFile == (HANDLE)-1;
 660                 if (ret == 0) {
 661                         ret = SetEndOfFile(hFile) == 0;
 662                         if (ret)
 663                                 errno = EACCES;
 664                 }
 665                 Py_END_ALLOW_THREADS
 666                 if (ret)
 667                         goto onioerror;
 668         }
 669 #else
 670         Py_BEGIN_ALLOW_THREADS
 671         errno = 0;
 672         ret = ftruncate(fileno(f->f_fp), newsize);
 673         Py_END_ALLOW_THREADS
 674         if (ret != 0)
 675                 goto onioerror;
 676 #endif /* !MS_WINDOWS */
 677
 678         /* Restore original file position. */
 679         Py_BEGIN_ALLOW_THREADS
 680         errno = 0;
 681         ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 682         Py_END_ALLOW_THREADS
 683         if (ret)
 684                 goto onioerror;
 685
 686         Py_INCREF(Py_None);
 687         return Py_None;
 688
 689 onioerror:
 690         PyErr_SetFromErrno(PyExc_IOError);
 691         clearerr(f->f_fp);
 692         return NULL;
 693 }
 694 #endif /* HAVE_FTRUNCATE */
 695
 696 static PyObject *
 697 file_tell(PyFileObject *f)
 698 {
 699         Py_off_t pos;
 700
 701         if (f->f_fp == NULL)
 702                 return err_closed();
 703         Py_BEGIN_ALLOW_THREADS
 704         errno = 0;
 705         pos = _portable_ftell(f->f_fp);
 706         Py_END_ALLOW_THREADS
 707         if (pos == -1) {
 708                 PyErr_SetFromErrno(PyExc_IOError);
 709                 clearerr(f->f_fp);
 710                 return NULL;
 711         }
 712         if (f->f_skipnextlf) {
 713                 int c;
 714                 c = GETC(f->f_fp);
 715                 if (c == '\n') {
 716                         f->f_newlinetypes |= NEWLINE_CRLF;
 717                         pos++;
 718                         f->f_skipnextlf = 0;
 719                 } else if (c != EOF) ungetc(c, f->f_fp);
 720         }
 721 #if !defined(HAVE_LARGEFILE_SUPPORT)
 722         return PyInt_FromLong(pos);
 723 #else
 724         return PyLong_FromLongLong(pos);
 725 #endif
 726 }
 727
 728 static PyObject *
 729 file_fileno(PyFileObject *f)
 730 {
 731         if (f->f_fp == NULL)
 732                 return err_closed();
 733         return PyInt_FromLong((long) fileno(f->f_fp));
 734 }
 735
 736 static PyObject *
 737 file_flush(PyFileObject *f)
 738 {
 739         int res;
 740
 741         if (f->f_fp == NULL)
 742                 return err_closed();
 743         Py_BEGIN_ALLOW_THREADS
 744         errno = 0;
 745         res = fflush(f->f_fp);
 746         Py_END_ALLOW_THREADS
 747         if (res != 0) {
 748                 PyErr_SetFromErrno(PyExc_IOError);
 749                 clearerr(f->f_fp);
 750                 return NULL;
 751         }
 752         Py_INCREF(Py_None);
 753         return Py_None;
 754 }
 755
 756 static PyObject *
 757 file_isatty(PyFileObject *f)
 758 {
 759         long res;
 760         if (f->f_fp == NULL)
 761                 return err_closed();
 762         Py_BEGIN_ALLOW_THREADS
 763         res = isatty((int)fileno(f->f_fp));
 764         Py_END_ALLOW_THREADS
 765         return PyBool_FromLong(res);
 766 }
 767
 768
 769 #if BUFSIZ < 8192
 770 #define SMALLCHUNK 8192
 771 #else
 772 #define SMALLCHUNK BUFSIZ
 773 #endif
 774
 775 #if SIZEOF_INT < 4
 776 #define BIGCHUNK  (512 * 32)
 777 #else
 778 #define BIGCHUNK  (512 * 1024)
 779 #endif
 780
 781 static size_t
 782 new_buffersize(PyFileObject *f, size_t currentsize)
 783 {
 784 #ifdef HAVE_FSTAT
 785         off_t pos, end;
 786         struct stat st;
 787         if (fstat(fileno(f->f_fp), &st) == 0) {
 788                 end = st.st_size;
 789                 /* The following is not a bug: we really need to call lseek()
 790                    *and* ftell().  The reason is that some stdio libraries
 791                    mistakenly flush their buffer when ftell() is called and
 792                    the lseek() call it makes fails, thereby throwing away
 793                    data that cannot be recovered in any way.  To avoid this,
 794                    we first test lseek(), and only call ftell() if lseek()
 795                    works.  We can't use the lseek() value either, because we
 796                    need to take the amount of buffered data into account.
 797                    (Yet another reason why stdio stinks. :-) */
 798                 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
 799                 if (pos >= 0) {
 800                         pos = ftell(f->f_fp);
 801                 }
 802                 if (pos < 0)
 803                         clearerr(f->f_fp);
 804                 if (end > pos && pos >= 0)
 805                         return currentsize + end - pos + 1;
 806                 /* Add 1 so if the file were to grow we'd notice. */
 807         }
 808 #endif
 809         if (currentsize > SMALLCHUNK) {
 810                 /* Keep doubling until we reach BIGCHUNK;
 811                    then keep adding BIGCHUNK. */
 812                 if (currentsize <= BIGCHUNK)
 813                         return currentsize + currentsize;
 814                 else
 815                         return currentsize + BIGCHUNK;
 816         }
 817         return currentsize + SMALLCHUNK;
 818 }
 819
 820 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
 821 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
 822 #else
 823 #ifdef EWOULDBLOCK
 824 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
 825 #else
 826 #ifdef EAGAIN
 827 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
 828 #else
 829 #define BLOCKED_ERRNO(x) 0
 830 #endif
 831 #endif
 832 #endif
 833
 834 static PyObject *
 835 file_read(PyFileObject *f, PyObject *args)
 836 {
 837         long bytesrequested = -1;
 838         size_t bytesread, buffersize, chunksize;
 839         PyObject *v;
 840
 841         if (f->f_fp == NULL)
 842                 return err_closed();
 843         /* refuse to mix with f.next() */
 844         if (f->f_buf != NULL &&
 845             (f->f_bufend - f->f_bufptr) > 0 &&
 846             f->f_buf[0] != '\0')
 847                 return err_iterbuffered();
 848         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 849                 return NULL;
 850         if (bytesrequested < 0)
 851                 buffersize = new_buffersize(f, (size_t)0);
 852         else
 853                 buffersize = bytesrequested;
 854         if (buffersize > PY_SSIZE_T_MAX) {
 855                 PyErr_SetString(PyExc_OverflowError,
 856         "requested number of bytes is more than a Python string can hold");
 857                 return NULL;
 858         }
 859         v = PyString_FromStringAndSize((char *)NULL, buffersize);
 860         if (v == NULL)
 861                 return NULL;
 862         bytesread = 0;
 863         for (;;) {
 864                 Py_BEGIN_ALLOW_THREADS
 865                 errno = 0;
 866                 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
 867                           buffersize - bytesread, f->f_fp, (PyObject *)f);
 868                 Py_END_ALLOW_THREADS
 869                 if (chunksize == 0) {
 870                         if (!ferror(f->f_fp))
 871                                 break;
 872                         clearerr(f->f_fp);
 873                         /* When in non-blocking mode, data shouldn't
 874                          * be discarded if a blocking signal was
 875                          * received. That will also happen if
 876                          * chunksize != 0, but bytesread < buffersize. */
 877                         if (bytesread > 0 && BLOCKED_ERRNO(errno))
 878                                 break;
 879                         PyErr_SetFromErrno(PyExc_IOError);
 880                         Py_DECREF(v);
 881                         return NULL;
 882                 }
 883                 bytesread += chunksize;
 884                 if (bytesread < buffersize) {
 885                         clearerr(f->f_fp);
 886                         break;
 887                 }
 888                 if (bytesrequested < 0) {
 889                         buffersize = new_buffersize(f, buffersize);
 890                         if (_PyString_Resize(&v, buffersize) < 0)
 891                                 return NULL;
 892                 } else {
 893                         /* Got what was requested. */
 894                         break;
 895                 }
 896         }
 897         if (bytesread != buffersize)
 898                 _PyString_Resize(&v, bytesread);
 899         return v;
 900 }
 901
 902 static PyObject *
 903 file_readinto(PyFileObject *f, PyObject *args)
 904 {
 905         char *ptr;
 906         Py_ssize_t ntodo;
 907         Py_ssize_t ndone, nnow;
 908
 909         if (f->f_fp == NULL)
 910                 return err_closed();
 911         /* refuse to mix with f.next() */
 912         if (f->f_buf != NULL &&
 913             (f->f_bufend - f->f_bufptr) > 0 &&
 914             f->f_buf[0] != '\0')
 915                 return err_iterbuffered();
 916         if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
 917                 return NULL;
 918         ndone = 0;
 919         while (ntodo > 0) {
 920                 Py_BEGIN_ALLOW_THREADS
 921                 errno = 0;
 922                 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
 923                                                 (PyObject *)f);
 924                 Py_END_ALLOW_THREADS
 925                 if (nnow == 0) {
 926                         if (!ferror(f->f_fp))
 927                                 break;
 928                         PyErr_SetFromErrno(PyExc_IOError);
 929                         clearerr(f->f_fp);
 930                         return NULL;
 931                 }
 932                 ndone += nnow;
 933                 ntodo -= nnow;
 934         }
 935         return PyInt_FromSsize_t(ndone);
 936 }
 937
 938 /**************************************************************************
 939 Routine to get next line using platform fgets().
 940
 941 Under MSVC 6:
 942
 943 + MS threadsafe getc is very slow (multiple layers of function calls before+
 944   after each character, to lock+unlock the stream).
 945 + The stream-locking functions are MS-internal -- can't access them from user
 946   code.
 947 + There's nothing Tim could find in the MS C or platform SDK libraries that
 948   can worm around this.
 949 + MS fgets locks/unlocks only once per line; it's the only hook we have.
 950
 951 So we use fgets for speed(!), despite that it's painful.
 952
 953 MS realloc is also slow.
 954
 955 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
 956 have):
 957         Linux           a wash
 958         Solaris         a wash
 959         Tru64 Unix      getline_via_fgets significantly faster
 960
 961 CAUTION:  The C std isn't clear about this:  in those cases where fgets
 962 writes something into the buffer, can it write into any position beyond the
 963 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
 964 known on which it does; and it would be a strange way to code fgets. Still,
 965 getline_via_fgets may not work correctly if it does.  The std test
 966 test_bufio.py should fail if platform fgets() routinely writes beyond the
 967 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
 968 **************************************************************************/
 969
 970 /* Use this routine if told to, or by default on non-get_unlocked()
 971  * platforms unless told not to.  Yikes!  Let's spell that out:
 972  * On a platform with getc_unlocked():
 973  *     By default, use getc_unlocked().
 974  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
 975  * On a platform without getc_unlocked():
 976  *     By default, use fgets().
 977  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
 978  */
 979 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
 980 #define USE_FGETS_IN_GETLINE
 981 #endif
 982
 983 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
 984 #undef USE_FGETS_IN_GETLINE
 985 #endif
 986
 987 #ifdef USE_FGETS_IN_GETLINE
 988 static PyObject*
 989 getline_via_fgets(FILE *fp)
 990 {
 991 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
 992  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
 993  * to fill this much of the buffer with a known value in order to figure out
 994  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
 995  * than "most" lines, we waste time filling unused buffer slots.  100 is
 996  * surely adequate for most peoples' email archives, chewing over source code,
 997  * etc -- "regular old text files".
 998  * MAXBUFSIZE is the maximum line length that lets us get away with the less
 999  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1000  * cautions about boosting that.  300 was chosen because the worst real-life
1001  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1002  * half the lines were 254 chars.
1003  */
1004 #define INITBUFSIZE 100
1005 #define MAXBUFSIZE 300
1006         char* p;        /* temp */
1007         char buf[MAXBUFSIZE];
1008         PyObject* v;    /* the string object result */
1009         char* pvfree;   /* address of next free slot */
1010         char* pvend;    /* address one beyond last free slot */
1011         size_t nfree;   /* # of free buffer slots; pvend-pvfree */
1012         size_t total_v_size;  /* total # of slots in buffer */
1013         size_t increment;       /* amount to increment the buffer */
1014         size_t prev_v_size;
1015
1016         /* Optimize for normal case:  avoid _PyString_Resize if at all
1017          * possible via first reading into stack buffer "buf".
1018          */
1019         total_v_size = INITBUFSIZE;     /* start small and pray */
1020         pvfree = buf;
1021         for (;;) {
1022                 Py_BEGIN_ALLOW_THREADS
1023                 pvend = buf + total_v_size;
1024                 nfree = pvend - pvfree;
1025                 memset(pvfree, '\n', nfree);
1026                 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1027                 p = fgets(pvfree, (int)nfree, fp);
1028                 Py_END_ALLOW_THREADS
1029
1030                 if (p == NULL) {
1031                         clearerr(fp);
1032                         if (PyErr_CheckSignals())
1033                                 return NULL;
1034                         v = PyString_FromStringAndSize(buf, pvfree - buf);
1035                         return v;
1036                 }
1037                 /* fgets read *something* */
1038                 p = memchr(pvfree, '\n', nfree);
1039                 if (p != NULL) {
1040                         /* Did the \n come from fgets or from us?
1041                          * Since fgets stops at the first \n, and then writes
1042                          * \0, if it's from fgets a \0 must be next.  But if
1043                          * that's so, it could not have come from us, since
1044                          * the \n's we filled the buffer with have only more
1045                          * \n's to the right.
1046                          */
1047                         if (p+1 < pvend && *(p+1) == '\0') {
1048                                 /* It's from fgets:  we win!  In particular,
1049                                  * we haven't done any mallocs yet, and can
1050                                  * build the final result on the first try.
1051                                  */
1052                                 ++p;    /* include \n from fgets */
1053                         }
1054                         else {
1055                                 /* Must be from us:  fgets didn't fill the
1056                                  * buffer and didn't find a newline, so it
1057                                  * must be the last and newline-free line of
1058                                  * the file.
1059                                  */
1060                                 assert(p > pvfree && *(p-1) == '\0');
1061                                 --p;    /* don't include \0 from fgets */
1062                         }
1063                         v = PyString_FromStringAndSize(buf, p - buf);
1064                         return v;
1065                 }
1066                 /* yuck:  fgets overwrote all the newlines, i.e. the entire
1067                  * buffer.  So this line isn't over yet, or maybe it is but
1068                  * we're exactly at EOF.  If we haven't already, try using the
1069                  * rest of the stack buffer.
1070                  */
1071                 assert(*(pvend-1) == '\0');
1072                 if (pvfree == buf) {
1073                         pvfree = pvend - 1;     /* overwrite trailing null */
1074                         total_v_size = MAXBUFSIZE;
1075                 }
1076                 else
1077                         break;
1078         }
1079
1080         /* The stack buffer isn't big enough; malloc a string object and read
1081          * into its buffer.
1082          */
1083         total_v_size = MAXBUFSIZE << 1;
1084         v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1085         if (v == NULL)
1086                 return v;
1087         /* copy over everything except the last null byte */
1088         memcpy(BUF(v), buf, MAXBUFSIZE-1);
1089         pvfree = BUF(v) + MAXBUFSIZE - 1;
1090
1091         /* Keep reading stuff into v; if it ever ends successfully, break
1092          * after setting p one beyond the end of the line.  The code here is
1093          * very much like the code above, except reads into v's buffer; see
1094          * the code above for detailed comments about the logic.
1095          */
1096         for (;;) {
1097                 Py_BEGIN_ALLOW_THREADS
1098                 pvend = BUF(v) + total_v_size;
1099                 nfree = pvend - pvfree;
1100                 memset(pvfree, '\n', nfree);
1101                 assert(nfree < INT_MAX);
1102                 p = fgets(pvfree, (int)nfree, fp);
1103                 Py_END_ALLOW_THREADS
1104
1105                 if (p == NULL) {
1106                         clearerr(fp);
1107                         if (PyErr_CheckSignals()) {
1108                                 Py_DECREF(v);
1109                                 return NULL;
1110                         }
1111                         p = pvfree;
1112                         break;
1113                 }
1114                 p = memchr(pvfree, '\n', nfree);
1115                 if (p != NULL) {
1116                         if (p+1 < pvend && *(p+1) == '\0') {
1117                                 /* \n came from fgets */
1118                                 ++p;
1119                                 break;
1120                         }
1121                         /* \n came from us; last line of file, no newline */
1122                         assert(p > pvfree && *(p-1) == '\0');
1123                         --p;
1124                         break;
1125                 }
1126                 /* expand buffer and try again */
1127                 assert(*(pvend-1) == '\0');
1128                 increment = total_v_size >> 2;  /* mild exponential growth */
1129                 prev_v_size = total_v_size;
1130                 total_v_size += increment;
1131                 /* check for overflow */
1132                 if (total_v_size <= prev_v_size ||
1133                     total_v_size > PY_SSIZE_T_MAX) {
1134                         PyErr_SetString(PyExc_OverflowError,
1135                             "line is longer than a Python string can hold");
1136                         Py_DECREF(v);
1137                         return NULL;
1138                 }
1139                 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1140                         return NULL;
1141                 /* overwrite the trailing null byte */
1142                 pvfree = BUF(v) + (prev_v_size - 1);
1143         }
1144         if (BUF(v) + total_v_size != p)
1145                 _PyString_Resize(&v, p - BUF(v));
1146         return v;
1147 #undef INITBUFSIZE
1148 #undef MAXBUFSIZE
1149 }
1150 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1151
1152 /* Internal routine to get a line.
1153    Size argument interpretation:
1154    > 0: max length;
1155    <= 0: read arbitrary line
1156 */
1157
1158 static PyObject *
1159 get_line(PyFileObject *f, int n)
1160 {
1161         FILE *fp = f->f_fp;
1162         int c;
1163         char *buf, *end;
1164         size_t total_v_size;    /* total # of slots in buffer */
1165         size_t used_v_size;     /* # used slots in buffer */
1166         size_t increment;       /* amount to increment the buffer */
1167         PyObject *v;
1168         int newlinetypes = f->f_newlinetypes;
1169         int skipnextlf = f->f_skipnextlf;
1170         int univ_newline = f->f_univ_newline;
1171
1172 #if defined(USE_FGETS_IN_GETLINE)
1173         if (n <= 0 && !univ_newline )
1174                 return getline_via_fgets(fp);
1175 #endif
1176         total_v_size = n > 0 ? n : 100;
1177         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1178         if (v == NULL)
1179                 return NULL;
1180         buf = BUF(v);
1181         end = buf + total_v_size;
1182
1183         for (;;) {
1184                 Py_BEGIN_ALLOW_THREADS
1185                 FLOCKFILE(fp);
1186                 if (univ_newline) {
1187                         c = 'x'; /* Shut up gcc warning */
1188                         while ( buf != end && (c = GETC(fp)) != EOF ) {
1189                                 if (skipnextlf ) {
1190                                         skipnextlf = 0;
1191                                         if (c == '\n') {
1192                                                 /* Seeing a \n here with
1193                                                  * skipnextlf true means we
1194                                                  * saw a \r before.
1195                                                  */
1196                                                 newlinetypes |= NEWLINE_CRLF;
1197                                                 c = GETC(fp);
1198                                                 if (c == EOF) break;
1199                                         } else {
1200                                                 newlinetypes |= NEWLINE_CR;
1201                                         }
1202                                 }
1203                                 if (c == '\r') {
1204                                         skipnextlf = 1;
1205                                         c = '\n';
1206                                 } else if ( c == '\n')
1207                                         newlinetypes |= NEWLINE_LF;
1208                                 *buf++ = c;
1209                                 if (c == '\n') break;
1210                         }
1211                         if ( c == EOF && skipnextlf )
1212                                 newlinetypes |= NEWLINE_CR;
1213                 } else /* If not universal newlines use the normal loop */
1214                 while ((c = GETC(fp)) != EOF &&
1215                        (*buf++ = c) != '\n' &&
1216                         buf != end)
1217                         ;
1218                 FUNLOCKFILE(fp);
1219                 Py_END_ALLOW_THREADS
1220                 f->f_newlinetypes = newlinetypes;
1221                 f->f_skipnextlf = skipnextlf;
1222                 if (c == '\n')
1223                         break;
1224                 if (c == EOF) {
1225                         if (ferror(fp)) {
1226                                 PyErr_SetFromErrno(PyExc_IOError);
1227                                 clearerr(fp);
1228                                 Py_DECREF(v);
1229                                 return NULL;
1230                         }
1231                         clearerr(fp);
1232                         if (PyErr_CheckSignals()) {
1233                                 Py_DECREF(v);
1234                                 return NULL;
1235                         }
1236                         break;
1237                 }
1238                 /* Must be because buf == end */
1239                 if (n > 0)
1240                         break;
1241                 used_v_size = total_v_size;
1242                 increment = total_v_size >> 2; /* mild exponential growth */
1243                 total_v_size += increment;
1244                 if (total_v_size > PY_SSIZE_T_MAX) {
1245                         PyErr_SetString(PyExc_OverflowError,
1246                             "line is longer than a Python string can hold");
1247                         Py_DECREF(v);
1248                         return NULL;
1249                 }
1250                 if (_PyString_Resize(&v, total_v_size) < 0)
1251                         return NULL;
1252                 buf = BUF(v) + used_v_size;
1253                 end = BUF(v) + total_v_size;
1254         }
1255
1256         used_v_size = buf - BUF(v);
1257         if (used_v_size != total_v_size)
1258                 _PyString_Resize(&v, used_v_size);
1259         return v;
1260 }
1261
1262 /* External C interface */
1263
1264 PyObject *
1265 PyFile_GetLine(PyObject *f, int n)
1266 {
1267         PyObject *result;
1268
1269         if (f == NULL) {
1270                 PyErr_BadInternalCall();
1271                 return NULL;
1272         }
1273
1274         if (PyFile_Check(f)) {
1275                 PyFileObject *fo = (PyFileObject *)f;
1276                 if (fo->f_fp == NULL)
1277                         return err_closed();
1278                 /* refuse to mix with f.next() */
1279                 if (fo->f_buf != NULL &&
1280                     (fo->f_bufend - fo->f_bufptr) > 0 &&
1281                     fo->f_buf[0] != '\0')
1282                         return err_iterbuffered();
1283                 result = get_line(fo, n);
1284         }
1285         else {
1286                 PyObject *reader;
1287                 PyObject *args;
1288
1289                 reader = PyObject_GetAttrString(f, "readline");
1290                 if (reader == NULL)
1291                         return NULL;
1292                 if (n <= 0)
1293                         args = PyTuple_New(0);
1294                 else
1295                         args = Py_BuildValue("(i)", n);
1296                 if (args == NULL) {
1297                         Py_DECREF(reader);
1298                         return NULL;
1299                 }
1300                 result = PyEval_CallObject(reader, args);
1301                 Py_DECREF(reader);
1302                 Py_DECREF(args);
1303                 if (result != NULL && !PyString_Check(result) &&
1304                     !PyUnicode_Check(result)) {
1305                         Py_DECREF(result);
1306                         result = NULL;
1307                         PyErr_SetString(PyExc_TypeError,
1308                                    "object.readline() returned non-string");
1309                 }
1310         }
1311
1312         if (n < 0 && result != NULL && PyString_Check(result)) {
1313                 char *s = PyString_AS_STRING(result);
1314                 Py_ssize_t len = PyString_GET_SIZE(result);
1315                 if (len == 0) {
1316                         Py_DECREF(result);
1317                         result = NULL;
1318                         PyErr_SetString(PyExc_EOFError,
1319                                         "EOF when reading a line");
1320                 }
1321                 else if (s[len-1] == '\n') {
1322                         if (result->ob_refcnt == 1)
1323                                 _PyString_Resize(&result, len-1);
1324                         else {
1325                                 PyObject *v;
1326                                 v = PyString_FromStringAndSize(s, len-1);
1327                                 Py_DECREF(result);
1328                                 result = v;
1329                         }
1330                 }
1331         }
1332 #ifdef Py_USING_UNICODE
1333         if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1334                 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1335                 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1336                 if (len == 0) {
1337                         Py_DECREF(result);
1338                         result = NULL;
1339                         PyErr_SetString(PyExc_EOFError,
1340                                         "EOF when reading a line");
1341                 }
1342                 else if (s[len-1] == '\n') {
1343                         if (result->ob_refcnt == 1)
1344                                 PyUnicode_Resize(&result, len-1);
1345                         else {
1346                                 PyObject *v;
1347                                 v = PyUnicode_FromUnicode(s, len-1);
1348                                 Py_DECREF(result);
1349                                 result = v;
1350                         }
1351                 }
1352         }
1353 #endif
1354         return result;
1355 }
1356
1357 /* Python method */
1358
1359 static PyObject *
1360 file_readline(PyFileObject *f, PyObject *args)
1361 {
1362         int n = -1;
1363
1364         if (f->f_fp == NULL)
1365                 return err_closed();
1366         /* refuse to mix with f.next() */
1367         if (f->f_buf != NULL &&
1368             (f->f_bufend - f->f_bufptr) > 0 &&
1369             f->f_buf[0] != '\0')
1370                 return err_iterbuffered();
1371         if (!PyArg_ParseTuple(args, "|i:readline", &n))
1372                 return NULL;
1373         if (n == 0)
1374                 return PyString_FromString("");
1375         if (n < 0)
1376                 n = 0;
1377         return get_line(f, n);
1378 }
1379
1380 static PyObject *
1381 file_readlines(PyFileObject *f, PyObject *args)
1382 {
1383         long sizehint = 0;
1384         PyObject *list;
1385         PyObject *line;
1386         char small_buffer[SMALLCHUNK];
1387         char *buffer = small_buffer;
1388         size_t buffersize = SMALLCHUNK;
1389         PyObject *big_buffer = NULL;
1390         size_t nfilled = 0;
1391         size_t nread;
1392         size_t totalread = 0;
1393         char *p, *q, *end;
1394         int err;
1395         int shortread = 0;
1396
1397         if (f->f_fp == NULL)
1398                 return err_closed();
1399         /* refuse to mix with f.next() */
1400         if (f->f_buf != NULL &&
1401             (f->f_bufend - f->f_bufptr) > 0 &&
1402             f->f_buf[0] != '\0')
1403                 return err_iterbuffered();
1404         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1405                 return NULL;
1406         if ((list = PyList_New(0)) == NULL)
1407                 return NULL;
1408         for (;;) {
1409                 if (shortread)
1410                         nread = 0;
1411                 else {
1412                         Py_BEGIN_ALLOW_THREADS
1413                         errno = 0;
1414                         nread = Py_UniversalNewlineFread(buffer+nfilled,
1415                                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1416                         Py_END_ALLOW_THREADS
1417                         shortread = (nread < buffersize-nfilled);
1418                 }
1419                 if (nread == 0) {
1420                         sizehint = 0;
1421                         if (!ferror(f->f_fp))
1422                                 break;
1423                         PyErr_SetFromErrno(PyExc_IOError);
1424                         clearerr(f->f_fp);
1425                   error:
1426                         Py_DECREF(list);
1427                         list = NULL;
1428                         goto cleanup;
1429                 }
1430                 totalread += nread;
1431                 p = (char *)memchr(buffer+nfilled, '\n', nread);
1432                 if (p == NULL) {
1433                         /* Need a larger buffer to fit this line */
1434                         nfilled += nread;
1435                         buffersize *= 2;
1436                         if (buffersize > PY_SSIZE_T_MAX) {
1437                                 PyErr_SetString(PyExc_OverflowError,
1438                             "line is longer than a Python string can hold");
1439                                 goto error;
1440                         }
1441                         if (big_buffer == NULL) {
1442                                 /* Create the big buffer */
1443                                 big_buffer = PyString_FromStringAndSize(
1444                                         NULL, buffersize);
1445                                 if (big_buffer == NULL)
1446                                         goto error;
1447                                 buffer = PyString_AS_STRING(big_buffer);
1448                                 memcpy(buffer, small_buffer, nfilled);
1449                         }
1450                         else {
1451                                 /* Grow the big buffer */
1452                                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1453                                         goto error;
1454                                 buffer = PyString_AS_STRING(big_buffer);
1455                         }
1456                         continue;
1457                 }
1458                 end = buffer+nfilled+nread;
1459                 q = buffer;
1460                 do {
1461                         /* Process complete lines */
1462                         p++;
1463                         line = PyString_FromStringAndSize(q, p-q);
1464                         if (line == NULL)
1465                                 goto error;
1466                         err = PyList_Append(list, line);
1467                         Py_DECREF(line);
1468                         if (err != 0)
1469                                 goto error;
1470                         q = p;
1471                         p = (char *)memchr(q, '\n', end-q);
1472                 } while (p != NULL);
1473                 /* Move the remaining incomplete line to the start */
1474                 nfilled = end-q;
1475                 memmove(buffer, q, nfilled);
1476                 if (sizehint > 0)
1477                         if (totalread >= (size_t)sizehint)
1478                                 break;
1479         }
1480         if (nfilled != 0) {
1481                 /* Partial last line */
1482                 line = PyString_FromStringAndSize(buffer, nfilled);
1483                 if (line == NULL)
1484                         goto error;
1485                 if (sizehint > 0) {
1486                         /* Need to complete the last line */
1487                         PyObject *rest = get_line(f, 0);
1488                         if (rest == NULL) {
1489                                 Py_DECREF(line);
1490                                 goto error;
1491                         }
1492                         PyString_Concat(&line, rest);
1493                         Py_DECREF(rest);
1494                         if (line == NULL)
1495                                 goto error;
1496                 }
1497                 err = PyList_Append(list, line);
1498                 Py_DECREF(line);
1499                 if (err != 0)
1500                         goto error;
1501         }
1502   cleanup:
1503         Py_XDECREF(big_buffer);
1504         return list;
1505 }
1506
1507 static PyObject *
1508 file_write(PyFileObject *f, PyObject *args)
1509 {
1510         char *s;
1511         Py_ssize_t n, n2;
1512         if (f->f_fp == NULL)
1513                 return err_closed();
1514         if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1515                 return NULL;
1516         f->f_softspace = 0;
1517         Py_BEGIN_ALLOW_THREADS
1518         errno = 0;
1519         n2 = fwrite(s, 1, n, f->f_fp);
1520         Py_END_ALLOW_THREADS
1521         if (n2 != n) {
1522                 PyErr_SetFromErrno(PyExc_IOError);
1523                 clearerr(f->f_fp);
1524                 return NULL;
1525         }
1526         Py_INCREF(Py_None);
1527         return Py_None;
1528 }
1529
1530 static PyObject *
1531 file_writelines(PyFileObject *f, PyObject *seq)
1532 {
1533 #define CHUNKSIZE 1000
1534         PyObject *list, *line;
1535         PyObject *it;   /* iter(seq) */
1536         PyObject *result;
1537         int index, islist;
1538         Py_ssize_t i, j, nwritten, len;
1539
1540         assert(seq != NULL);
1541         if (f->f_fp == NULL)
1542                 return err_closed();
1543
1544         result = NULL;
1545         list = NULL;
1546         islist = PyList_Check(seq);
1547         if  (islist)
1548                 it = NULL;
1549         else {
1550                 it = PyObject_GetIter(seq);
1551                 if (it == NULL) {
1552                         PyErr_SetString(PyExc_TypeError,
1553                                 "writelines() requires an iterable argument");
1554                         return NULL;
1555                 }
1556                 /* From here on, fail by going to error, to reclaim "it". */
1557                 list = PyList_New(CHUNKSIZE);
1558                 if (list == NULL)
1559                         goto error;
1560         }
1561
1562         /* Strategy: slurp CHUNKSIZE lines into a private list,
1563            checking that they are all strings, then write that list
1564            without holding the interpreter lock, then come back for more. */
1565         for (index = 0; ; index += CHUNKSIZE) {
1566                 if (islist) {
1567                         Py_XDECREF(list);
1568                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1569                         if (list == NULL)
1570                                 goto error;
1571                         j = PyList_GET_SIZE(list);
1572                 }
1573                 else {
1574                         for (j = 0; j < CHUNKSIZE; j++) {
1575                                 line = PyIter_Next(it);
1576                                 if (line == NULL) {
1577                                         if (PyErr_Occurred())
1578                                                 goto error;
1579                                         break;
1580                                 }
1581                                 PyList_SetItem(list, j, line);
1582                         }
1583                 }
1584                 if (j == 0)
1585                         break;
1586
1587                 /* Check that all entries are indeed strings. If not,
1588                    apply the same rules as for file.write() and
1589                    convert the results to strings. This is slow, but
1590                    seems to be the only way since all conversion APIs
1591                    could potentially execute Python code. */
1592                 for (i = 0; i < j; i++) {
1593                         PyObject *v = PyList_GET_ITEM(list, i);
1594                         if (!PyString_Check(v)) {
1595                                 const char *buffer;
1596                                 if (((f->f_binary &&
1597                                       PyObject_AsReadBuffer(v,
1598                                               (const void**)&buffer,
1599                                                             &len)) ||
1600                                      PyObject_AsCharBuffer(v,
1601                                                            &buffer,
1602                                                            &len))) {
1603                                         PyErr_SetString(PyExc_TypeError,
1604                         "writelines() argument must be a sequence of strings");
1605                                         goto error;
1606                                 }
1607                                 line = PyString_FromStringAndSize(buffer,
1608                                                                   len);
1609                                 if (line == NULL)
1610                                         goto error;
1611                                 Py_DECREF(v);
1612                                 PyList_SET_ITEM(list, i, line);
1613                         }
1614                 }
1615
1616                 /* Since we are releasing the global lock, the
1617                    following code may *not* execute Python code. */
1618                 Py_BEGIN_ALLOW_THREADS
1619                 f->f_softspace = 0;
1620                 errno = 0;
1621                 for (i = 0; i < j; i++) {
1622                         line = PyList_GET_ITEM(list, i);
1623                         len = PyString_GET_SIZE(line);
1624                         nwritten = fwrite(PyString_AS_STRING(line),
1625                                           1, len, f->f_fp);
1626                         if (nwritten != len) {
1627                                 Py_BLOCK_THREADS
1628                                 PyErr_SetFromErrno(PyExc_IOError);
1629                                 clearerr(f->f_fp);
1630                                 goto error;
1631                         }
1632                 }
1633                 Py_END_ALLOW_THREADS
1634
1635                 if (j < CHUNKSIZE)
1636                         break;
1637         }
1638
1639         Py_INCREF(Py_None);
1640         result = Py_None;
1641   error:
1642         Py_XDECREF(list);
1643         Py_XDECREF(it);
1644         return result;
1645 #undef CHUNKSIZE
1646 }
1647
1648 static PyObject *
1649 file_self(PyFileObject *f)
1650 {
1651         if (f->f_fp == NULL)
1652                 return err_closed();
1653         Py_INCREF(f);
1654         return (PyObject *)f;
1655 }
1656
1657 static PyObject *
1658 file_exit(PyObject *f, PyObject *args)
1659 {
1660         PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1661         if (!ret)
1662                 /* If error occurred, pass through */
1663                 return NULL;
1664         Py_DECREF(ret);
1665         /* We cannot return the result of close since a true
1666          * value will be interpreted as "yes, swallow the
1667          * exception if one was raised inside the with block". */
1668         Py_RETURN_NONE;
1669 }
1670
1671 PyDoc_STRVAR(readline_doc,
1672 "readline([size]) -> next line from the file, as a string.\n"
1673 "\n"
1674 "Retain newline.  A non-negative size argument limits the maximum\n"
1675 "number of bytes to return (an incomplete line may be returned then).\n"
1676 "Return an empty string at EOF.");
1677
1678 PyDoc_STRVAR(read_doc,
1679 "read([size]) -> read at most size bytes, returned as a string.\n"
1680 "\n"
1681 "If the size argument is negative or omitted, read until EOF is reached.\n"
1682 "Notice that when in non-blocking mode, less data than what was requested\n"
1683 "may be returned, even if no size parameter was given.");
1684
1685 PyDoc_STRVAR(write_doc,
1686 "write(str) -> None.  Write string str to file.\n"
1687 "\n"
1688 "Note that due to buffering, flush() or close() may be needed before\n"
1689 "the file on disk reflects the data written.");
1690
1691 PyDoc_STRVAR(fileno_doc,
1692 "fileno() -> integer \"file descriptor\".\n"
1693 "\n"
1694 "This is needed for lower-level file interfaces, such os.read().");
1695
1696 PyDoc_STRVAR(seek_doc,
1697 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1698 "\n"
1699 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1700 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1701 "(move relative to current position, positive or negative), and 2 (move\n"
1702 "relative to end of file, usually negative, although many platforms allow\n"
1703 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1704 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1705 "undefined behavior."
1706 "\n"
1707 "Note that not all file objects are seekable.");
1708
1709 #ifdef HAVE_FTRUNCATE
1710 PyDoc_STRVAR(truncate_doc,
1711 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1712 "\n"
1713 "Size defaults to the current file position, as returned by tell().");
1714 #endif
1715
1716 PyDoc_STRVAR(tell_doc,
1717 "tell() -> current file position, an integer (may be a long integer).");
1718
1719 PyDoc_STRVAR(readinto_doc,
1720 "readinto() -> Undocumented.  Don't use this; it may go away.");
1721
1722 PyDoc_STRVAR(readlines_doc,
1723 "readlines([size]) -> list of strings, each a line from the file.\n"
1724 "\n"
1725 "Call readline() repeatedly and return a list of the lines so read.\n"
1726 "The optional size argument, if given, is an approximate bound on the\n"
1727 "total number of bytes in the lines returned.");
1728
1729 PyDoc_STRVAR(xreadlines_doc,
1730 "xreadlines() -> returns self.\n"
1731 "\n"
1732 "For backward compatibility. File objects now include the performance\n"
1733 "optimizations previously implemented in the xreadlines module.");
1734
1735 PyDoc_STRVAR(writelines_doc,
1736 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
1737 "\n"
1738 "Note that newlines are not added.  The sequence can be any iterable object\n"
1739 "producing strings. This is equivalent to calling write() for each string.");
1740
1741 PyDoc_STRVAR(flush_doc,
1742 "flush() -> None.  Flush the internal I/O buffer.");
1743
1744 PyDoc_STRVAR(close_doc,
1745 "close() -> None or (perhaps) an integer.  Close the file.\n"
1746 "\n"
1747 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
1748 "further I/O operations.  close() may be called more than once without\n"
1749 "error.  Some kinds of file objects (for example, opened by popen())\n"
1750 "may return an exit status upon closing.");
1751
1752 PyDoc_STRVAR(isatty_doc,
1753 "isatty() -> true or false.  True if the file is connected to a tty device.");
1754
1755 PyDoc_STRVAR(enter_doc,
1756              "__enter__() -> self.");
1757
1758 PyDoc_STRVAR(exit_doc,
1759              "__exit__(*excinfo) -> None.  Closes the file.");
1760
1761 static PyMethodDef file_methods[] = {
1762         {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1763         {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
1764         {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
1765         {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
1766         {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
1767 #ifdef HAVE_FTRUNCATE
1768         {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1769 #endif
1770         {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
1771         {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1772         {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1773         {"xreadlines",(PyCFunction)file_self,     METH_NOARGS, xreadlines_doc},
1774         {"writelines",(PyCFunction)file_writelines, METH_O,    writelines_doc},
1775         {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
1776         {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
1777         {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
1778         {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
1779         {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
1780         {NULL,        NULL}             /* sentinel */
1781 };
1782
1783 #define OFF(x) offsetof(PyFileObject, x)
1784
1785 static PyMemberDef file_memberlist[] = {
1786         {"softspace",   T_INT,          OFF(f_softspace), 0,
1787          "flag indicating that a space needs to be printed; used by print"},
1788         {"mode",        T_OBJECT,       OFF(f_mode),    RO,
1789          "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1790         {"name",        T_OBJECT,       OFF(f_name),    RO,
1791          "file name"},
1792         {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
1793          "file encoding"},
1794         /* getattr(f, "closed") is implemented without this table */
1795         {NULL}  /* Sentinel */
1796 };
1797
1798 static PyObject *
1799 get_closed(PyFileObject *f, void *closure)
1800 {
1801         return PyBool_FromLong((long)(f->f_fp == 0));
1802 }
1803 static PyObject *
1804 get_newlines(PyFileObject *f, void *closure)
1805 {
1806         switch (f->f_newlinetypes) {
1807         case NEWLINE_UNKNOWN:
1808                 Py_INCREF(Py_None);
1809                 return Py_None;
1810         case NEWLINE_CR:
1811                 return PyString_FromString("\r");
1812         case NEWLINE_LF:
1813                 return PyString_FromString("\n");
1814         case NEWLINE_CR|NEWLINE_LF:
1815                 return Py_BuildValue("(ss)", "\r", "\n");
1816         case NEWLINE_CRLF:
1817                 return PyString_FromString("\r\n");
1818         case NEWLINE_CR|NEWLINE_CRLF:
1819                 return Py_BuildValue("(ss)", "\r", "\r\n");
1820         case NEWLINE_LF|NEWLINE_CRLF:
1821                 return Py_BuildValue("(ss)", "\n", "\r\n");
1822         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1823                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1824         default:
1825                 PyErr_Format(PyExc_SystemError,
1826                              "Unknown newlines value 0x%x\n",
1827                              f->f_newlinetypes);
1828                 return NULL;
1829         }
1830 }
1831
1832 static PyGetSetDef file_getsetlist[] = {
1833         {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1834         {"newlines", (getter)get_newlines, NULL,
1835          "end-of-line convention used in this file"},
1836         {0},
1837 };
1838
1839 static void
1840 drop_readahead(PyFileObject *f)
1841 {
1842         if (f->f_buf != NULL) {
1843                 PyMem_Free(f->f_buf);
1844                 f->f_buf = NULL;
1845         }
1846 }
1847
1848 /* Make sure that file has a readahead buffer with at least one byte
1849    (unless at EOF) and no more than bufsize.  Returns negative value on
1850    error, will set MemoryError if bufsize bytes cannot be allocated. */
1851 static int
1852 readahead(PyFileObject *f, int bufsize)
1853 {
1854         Py_ssize_t chunksize;
1855
1856         if (f->f_buf != NULL) {
1857                 if( (f->f_bufend - f->f_bufptr) >= 1)
1858                         return 0;
1859                 else
1860                         drop_readahead(f);
1861         }
1862         if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
1863                 PyErr_NoMemory();
1864                 return -1;
1865         }
1866         Py_BEGIN_ALLOW_THREADS
1867         errno = 0;
1868         chunksize = Py_UniversalNewlineFread(
1869                 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1870         Py_END_ALLOW_THREADS
1871         if (chunksize == 0) {
1872                 if (ferror(f->f_fp)) {
1873                         PyErr_SetFromErrno(PyExc_IOError);
1874                         clearerr(f->f_fp);
1875                         drop_readahead(f);
1876                         return -1;
1877                 }
1878         }
1879         f->f_bufptr = f->f_buf;
1880         f->f_bufend = f->f_buf + chunksize;
1881         return 0;
1882 }
1883
1884 /* Used by file_iternext.  The returned string will start with 'skip'
1885    uninitialized bytes followed by the remainder of the line. Don't be
1886    horrified by the recursive call: maximum recursion depth is limited by
1887    logarithmic buffer growth to about 50 even when reading a 1gb line. */
1888
1889 static PyStringObject *
1890 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1891 {
1892         PyStringObject* s;
1893         char *bufptr;
1894         char *buf;
1895         Py_ssize_t len;
1896
1897         if (f->f_buf == NULL)
1898                 if (readahead(f, bufsize) < 0)
1899                         return NULL;
1900
1901         len = f->f_bufend - f->f_bufptr;
1902         if (len == 0)
1903                 return (PyStringObject *)
1904                         PyString_FromStringAndSize(NULL, skip);
1905         bufptr = (char *)memchr(f->f_bufptr, '\n', len);
1906         if (bufptr != NULL) {
1907                 bufptr++;                       /* Count the '\n' */
1908                 len = bufptr - f->f_bufptr;
1909                 s = (PyStringObject *)
1910                         PyString_FromStringAndSize(NULL, skip+len);
1911                 if (s == NULL)
1912                         return NULL;
1913                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1914                 f->f_bufptr = bufptr;
1915                 if (bufptr == f->f_bufend)
1916                         drop_readahead(f);
1917         } else {
1918                 bufptr = f->f_bufptr;
1919                 buf = f->f_buf;
1920                 f->f_buf = NULL;        /* Force new readahead buffer */
1921                 assert(skip+len < INT_MAX);
1922                 s = readahead_get_line_skip(
1923                         f, (int)(skip+len), bufsize + (bufsize>>2) );
1924                 if (s == NULL) {
1925                         PyMem_Free(buf);
1926                         return NULL;
1927                 }
1928                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1929                 PyMem_Free(buf);
1930         }
1931         return s;
1932 }
1933
1934 /* A larger buffer size may actually decrease performance. */
1935 #define READAHEAD_BUFSIZE 8192
1936
1937 static PyObject *
1938 file_iternext(PyFileObject *f)
1939 {
1940         PyStringObject* l;
1941
1942         if (f->f_fp == NULL)
1943                 return err_closed();
1944
1945         l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1946         if (l == NULL || PyString_GET_SIZE(l) == 0) {
1947                 Py_XDECREF(l);
1948                 return NULL;
1949         }
1950         return (PyObject *)l;
1951 }
1952
1953
1954 static PyObject *
1955 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1956 {
1957         PyObject *self;
1958         static PyObject *not_yet_string;
1959
1960         assert(type != NULL && type->tp_alloc != NULL);
1961
1962         if (not_yet_string == NULL) {
1963                 not_yet_string = PyString_InternFromString("<uninitialized file>");
1964                 if (not_yet_string == NULL)
1965                         return NULL;
1966         }
1967
1968         self = type->tp_alloc(type, 0);
1969         if (self != NULL) {
1970                 /* Always fill in the name and mode, so that nobody else
1971                    needs to special-case NULLs there. */
1972                 Py_INCREF(not_yet_string);
1973                 ((PyFileObject *)self)->f_name = not_yet_string;
1974                 Py_INCREF(not_yet_string);
1975                 ((PyFileObject *)self)->f_mode = not_yet_string;
1976                 Py_INCREF(Py_None);
1977                 ((PyFileObject *)self)->f_encoding = Py_None;
1978                 ((PyFileObject *)self)->weakreflist = NULL;
1979         }
1980         return self;
1981 }
1982
1983 static int
1984 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1985 {
1986         PyFileObject *foself = (PyFileObject *)self;
1987         int ret = 0;
1988         static char *kwlist[] = {"name", "mode", "buffering", 0};
1989         char *name = NULL;
1990         char *mode = "r";
1991         int bufsize = -1;
1992         int wideargument = 0;
1993
1994         assert(PyFile_Check(self));
1995         if (foself->f_fp != NULL) {
1996                 /* Have to close the existing file first. */
1997                 PyObject *closeresult = file_close(foself);
1998                 if (closeresult == NULL)
1999                         return -1;
2000                 Py_DECREF(closeresult);
2001         }
2002
2003 #ifdef Py_WIN_WIDE_FILENAMES
2004         if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
2005                 PyObject *po;
2006                 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2007                                                 kwlist, &po, &mode, &bufsize)) {
2008                         wideargument = 1;
2009                         if (fill_file_fields(foself, NULL, po, mode,
2010                                              fclose) == NULL)
2011                                 goto Error;
2012                 } else {
2013                         /* Drop the argument parsing error as narrow
2014                            strings are also valid. */
2015                         PyErr_Clear();
2016                 }
2017         }
2018 #endif
2019
2020         if (!wideargument) {
2021                 PyObject *o_name;
2022
2023                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2024                                                  Py_FileSystemDefaultEncoding,
2025                                                  &name,
2026                                                  &mode, &bufsize))
2027                         return -1;
2028
2029                 /* We parse again to get the name as a PyObject */
2030                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2031                                                  kwlist, &o_name, &mode,
2032                                                  &bufsize))
2033                         goto Error;
2034
2035                 if (fill_file_fields(foself, NULL, o_name, mode,
2036                                      fclose) == NULL)
2037                         goto Error;
2038         }
2039         if (open_the_file(foself, name, mode) == NULL)
2040                 goto Error;
2041         foself->f_setbuf = NULL;
2042         PyFile_SetBufSize(self, bufsize);
2043         goto Done;
2044
2045 Error:
2046         ret = -1;
2047         /* fall through */
2048 Done:
2049         PyMem_Free(name); /* free the encoded string */
2050         return ret;
2051 }
2052
2053 PyDoc_VAR(file_doc) =
2054 PyDoc_STR(
2055 "file(name[, mode[, buffering]]) -> file object\n"
2056 "\n"
2057 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2058 "writing or appending.  The file will be created if it doesn't exist\n"
2059 "when opened for writing or appending; it will be truncated when\n"
2060 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2061 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2062 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2063 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2064 "to open a file is with the builtin open() function.\n"
2065 )
2066 PyDoc_STR(
2067 "Add a 'U' to mode to open the file for input with universal newline\n"
2068 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2069 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2070 "the value for this attribute is one of None (no newline read yet),\n"
2071 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2072 "\n"
2073 "'U' cannot be combined with 'w' or '+' mode.\n"
2074 );
2075
2076 PyTypeObject PyFile_Type = {
2077         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2078         "file",
2079         sizeof(PyFileObject),
2080         0,
2081         (destructor)file_dealloc,               /* tp_dealloc */
2082         0,                                      /* tp_print */
2083         0,                                      /* tp_getattr */
2084         0,                                      /* tp_setattr */
2085         0,                                      /* tp_compare */
2086         (reprfunc)file_repr,                    /* tp_repr */
2087         0,                                      /* tp_as_number */
2088         0,                                      /* tp_as_sequence */
2089         0,                                      /* tp_as_mapping */
2090         0,                                      /* tp_hash */
2091         0,                                      /* tp_call */
2092         0,                                      /* tp_str */
2093         PyObject_GenericGetAttr,                /* tp_getattro */
2094         /* softspace is writable:  we must supply tp_setattro */
2095         PyObject_GenericSetAttr,                /* tp_setattro */
2096         0,                                      /* tp_as_buffer */
2097         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2098         file_doc,                               /* tp_doc */
2099         0,                                      /* tp_traverse */
2100         0,                                      /* tp_clear */
2101         0,                                      /* tp_richcompare */
2102         offsetof(PyFileObject, weakreflist),    /* tp_weaklistoffset */
2103         (getiterfunc)file_self,                 /* tp_iter */
2104         (iternextfunc)file_iternext,            /* tp_iternext */
2105         file_methods,                           /* tp_methods */
2106         file_memberlist,                        /* tp_members */
2107         file_getsetlist,                        /* tp_getset */
2108         0,                                      /* tp_base */
2109         0,                                      /* tp_dict */
2110         0,                                      /* tp_descr_get */
2111         0,                                      /* tp_descr_set */
2112         0,                                      /* tp_dictoffset */
2113         file_init,                              /* tp_init */
2114         PyType_GenericAlloc,                    /* tp_alloc */
2115         file_new,                               /* tp_new */
2116         PyObject_Del,                           /* tp_free */
2117 };
2118
2119 /* Interface for the 'soft space' between print items. */
2120
2121 int
2122 PyFile_SoftSpace(PyObject *f, int newflag)
2123 {
2124         long oldflag = 0;
2125         if (f == NULL) {
2126                 /* Do nothing */
2127         }
2128         else if (PyFile_Check(f)) {
2129                 oldflag = ((PyFileObject *)f)->f_softspace;
2130                 ((PyFileObject *)f)->f_softspace = newflag;
2131         }
2132         else {
2133                 PyObject *v;
2134                 v = PyObject_GetAttrString(f, "softspace");
2135                 if (v == NULL)
2136                         PyErr_Clear();
2137                 else {
2138                         if (PyInt_Check(v))
2139                                 oldflag = PyInt_AsLong(v);
2140                         assert(oldflag < INT_MAX);
2141                         Py_DECREF(v);
2142                 }
2143                 v = PyInt_FromLong((long)newflag);
2144                 if (v == NULL)
2145                         PyErr_Clear();
2146                 else {
2147                         if (PyObject_SetAttrString(f, "softspace", v) != 0)
2148                                 PyErr_Clear();
2149                         Py_DECREF(v);
2150                 }
2151         }
2152         return (int)oldflag;
2153 }
2154
2155 /* Interfaces to write objects/strings to file-like objects */
2156
2157 int
2158 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2159 {
2160         PyObject *writer, *value, *args, *result;
2161         if (f == NULL) {
2162                 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2163                 return -1;
2164         }
2165         else if (PyFile_Check(f)) {
2166                 FILE *fp = PyFile_AsFile(f);
2167 #ifdef Py_USING_UNICODE
2168                 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2169                 int result;
2170 #endif
2171                 if (fp == NULL) {
2172                         err_closed();
2173                         return -1;
2174                 }
2175 #ifdef Py_USING_UNICODE
2176                 if ((flags & Py_PRINT_RAW) &&
2177                     PyUnicode_Check(v) && enc != Py_None) {
2178                         char *cenc = PyString_AS_STRING(enc);
2179                         value = PyUnicode_AsEncodedString(v, cenc, "strict");
2180                         if (value == NULL)
2181                                 return -1;
2182                 } else {
2183                         value = v;
2184                         Py_INCREF(value);
2185                 }
2186                 result = PyObject_Print(value, fp, flags);
2187                 Py_DECREF(value);
2188                 return result;
2189 #else
2190                 return PyObject_Print(v, fp, flags);
2191 #endif
2192         }
2193         writer = PyObject_GetAttrString(f, "write");
2194         if (writer == NULL)
2195                 return -1;
2196         if (flags & Py_PRINT_RAW) {
2197                 if (PyUnicode_Check(v)) {
2198                         value = v;
2199                         Py_INCREF(value);
2200                 } else
2201                         value = PyObject_Str(v);
2202         }
2203         else
2204                 value = PyObject_Repr(v);
2205         if (value == NULL) {
2206                 Py_DECREF(writer);
2207                 return -1;
2208         }
2209         args = PyTuple_Pack(1, value);
2210         if (args == NULL) {
2211                 Py_DECREF(value);
2212                 Py_DECREF(writer);
2213                 return -1;
2214         }
2215         result = PyEval_CallObject(writer, args);
2216         Py_DECREF(args);
2217         Py_DECREF(value);
2218         Py_DECREF(writer);
2219         if (result == NULL)
2220                 return -1;
2221         Py_DECREF(result);
2222         return 0;
2223 }
2224
2225 int
2226 PyFile_WriteString(const char *s, PyObject *f)
2227 {
2228         if (f == NULL) {
2229                 /* Should be caused by a pre-existing error */
2230                 if (!PyErr_Occurred())
2231                         PyErr_SetString(PyExc_SystemError,
2232                                         "null file for PyFile_WriteString");
2233                 return -1;
2234         }
2235         else if (PyFile_Check(f)) {
2236                 FILE *fp = PyFile_AsFile(f);
2237                 if (fp == NULL) {
2238                         err_closed();
2239                         return -1;
2240                 }
2241                 Py_BEGIN_ALLOW_THREADS
2242                 fputs(s, fp);
2243                 Py_END_ALLOW_THREADS
2244                 return 0;
2245         }
2246         else if (!PyErr_Occurred()) {
2247                 PyObject *v = PyString_FromString(s);
2248                 int err;
2249                 if (v == NULL)
2250                         return -1;
2251                 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2252                 Py_DECREF(v);
2253                 return err;
2254         }
2255         else
2256                 return -1;
2257 }
2258
2259 /* Try to get a file-descriptor from a Python object.  If the object
2260    is an integer or long integer, its value is returned.  If not, the
2261    object's fileno() method is called if it exists; the method must return
2262    an integer or long integer, which is returned as the file descriptor value.
2263    -1 is returned on failure.
2264 */
2265
2266 int PyObject_AsFileDescriptor(PyObject *o)
2267 {
2268         int fd;
2269         PyObject *meth;
2270
2271         if (PyInt_Check(o)) {
2272                 fd = PyInt_AsLong(o);
2273         }
2274         else if (PyLong_Check(o)) {
2275                 fd = PyLong_AsLong(o);
2276         }
2277         else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2278         {
2279                 PyObject *fno = PyEval_CallObject(meth, NULL);
2280                 Py_DECREF(meth);
2281                 if (fno == NULL)
2282                         return -1;
2283
2284                 if (PyInt_Check(fno)) {
2285                         fd = PyInt_AsLong(fno);
2286                         Py_DECREF(fno);
2287                 }
2288                 else if (PyLong_Check(fno)) {
2289                         fd = PyLong_AsLong(fno);
2290                         Py_DECREF(fno);
2291                 }
2292                 else {
2293                         PyErr_SetString(PyExc_TypeError,
2294                                         "fileno() returned a non-integer");
2295                         Py_DECREF(fno);
2296                         return -1;
2297                 }
2298         }
2299         else {
2300                 PyErr_SetString(PyExc_TypeError,
2301                                 "argument must be an int, or have a fileno() method.");
2302                 return -1;
2303         }
2304
2305         if (fd < 0) {
2306                 PyErr_Format(PyExc_ValueError,
2307                              "file descriptor cannot be a negative integer (%i)",
2308                              fd);
2309                 return -1;
2310         }
2311         return fd;
2312 }
2313
2314 /* From here on we need access to the real fgets and fread */
2315 #undef fgets
2316 #undef fread
2317
2318 /*
2319 ** Py_UniversalNewlineFgets is an fgets variation that understands
2320 ** all of \r, \n and \r\n conventions.
2321 ** The stream should be opened in binary mode.
2322 ** If fobj is NULL the routine always does newline conversion, and
2323 ** it may peek one char ahead to gobble the second char in \r\n.
2324 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2325 ** is no readahead but in stead a flag is used to skip a following
2326 ** \n on the next read. Also, if the file is open in binary mode
2327 ** the whole conversion is skipped. Finally, the routine keeps track of
2328 ** the different types of newlines seen.
2329 ** Note that we need no error handling: fgets() treats error and eof
2330 ** identically.
2331 */
2332 char *
2333 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2334 {
2335         char *p = buf;
2336         int c;
2337         int newlinetypes = 0;
2338         int skipnextlf = 0;
2339         int univ_newline = 1;
2340
2341         if (fobj) {
2342                 if (!PyFile_Check(fobj)) {
2343                         errno = ENXIO;  /* What can you do... */
2344                         return NULL;
2345                 }
2346                 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2347                 if ( !univ_newline )
2348                         return fgets(buf, n, stream);
2349                 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2350                 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2351         }
2352         FLOCKFILE(stream);
2353         c = 'x'; /* Shut up gcc warning */
2354         while (--n > 0 && (c = GETC(stream)) != EOF ) {
2355                 if (skipnextlf ) {
2356                         skipnextlf = 0;
2357                         if (c == '\n') {
2358                                 /* Seeing a \n here with skipnextlf true
2359                                 ** means we saw a \r before.
2360                                 */
2361                                 newlinetypes |= NEWLINE_CRLF;
2362                                 c = GETC(stream);
2363                                 if (c == EOF) break;
2364                         } else {
2365                                 /*
2366                                 ** Note that c == EOF also brings us here,
2367                                 ** so we're okay if the last char in the file
2368                                 ** is a CR.
2369                                 */
2370                                 newlinetypes |= NEWLINE_CR;
2371                         }
2372                 }
2373                 if (c == '\r') {
2374                         /* A \r is translated into a \n, and we skip
2375                         ** an adjacent \n, if any. We don't set the
2376                         ** newlinetypes flag until we've seen the next char.
2377                         */
2378                         skipnextlf = 1;
2379                         c = '\n';
2380                 } else if ( c == '\n') {
2381                         newlinetypes |= NEWLINE_LF;
2382                 }
2383                 *p++ = c;
2384                 if (c == '\n') break;
2385         }
2386         if ( c == EOF && skipnextlf )
2387                 newlinetypes |= NEWLINE_CR;
2388         FUNLOCKFILE(stream);
2389         *p = '\0';
2390         if (fobj) {
2391                 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2392                 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2393         } else if ( skipnextlf ) {
2394                 /* If we have no file object we cannot save the
2395                 ** skipnextlf flag. We have to readahead, which
2396                 ** will cause a pause if we're reading from an
2397                 ** interactive stream, but that is very unlikely
2398                 ** unless we're doing something silly like
2399                 ** execfile("/dev/tty").
2400                 */
2401                 c = GETC(stream);
2402                 if ( c != '\n' )
2403                         ungetc(c, stream);
2404         }
2405         if (p == buf)
2406                 return NULL;
2407         return buf;
2408 }
2409
2410 /*
2411 ** Py_UniversalNewlineFread is an fread variation that understands
2412 ** all of \r, \n and \r\n conventions.
2413 ** The stream should be opened in binary mode.
2414 ** fobj must be a PyFileObject. In this case there
2415 ** is no readahead but in stead a flag is used to skip a following
2416 ** \n on the next read. Also, if the file is open in binary mode
2417 ** the whole conversion is skipped. Finally, the routine keeps track of
2418 ** the different types of newlines seen.
2419 */
2420 size_t
2421 Py_UniversalNewlineFread(char *buf, size_t n,
2422                          FILE *stream, PyObject *fobj)
2423 {
2424         char *dst = buf;
2425         PyFileObject *f = (PyFileObject *)fobj;
2426         int newlinetypes, skipnextlf;
2427
2428         assert(buf != NULL);
2429         assert(stream != NULL);
2430
2431         if (!fobj || !PyFile_Check(fobj)) {
2432                 errno = ENXIO;  /* What can you do... */
2433                 return 0;
2434         }
2435         if (!f->f_univ_newline)
2436                 return fread(buf, 1, n, stream);
2437         newlinetypes = f->f_newlinetypes;
2438         skipnextlf = f->f_skipnextlf;
2439         /* Invariant:  n is the number of bytes remaining to be filled
2440          * in the buffer.
2441          */
2442         while (n) {
2443                 size_t nread;
2444                 int shortread;
2445                 char *src = dst;
2446
2447                 nread = fread(dst, 1, n, stream);
2448                 assert(nread <= n);
2449                 if (nread == 0)
2450                         break;
2451
2452                 n -= nread; /* assuming 1 byte out for each in; will adjust */
2453                 shortread = n != 0;     /* true iff EOF or error */
2454                 while (nread--) {
2455                         char c = *src++;
2456                         if (c == '\r') {
2457                                 /* Save as LF and set flag to skip next LF. */
2458                                 *dst++ = '\n';
2459                                 skipnextlf = 1;
2460                         }
2461                         else if (skipnextlf && c == '\n') {
2462                                 /* Skip LF, and remember we saw CR LF. */
2463                                 skipnextlf = 0;
2464                                 newlinetypes |= NEWLINE_CRLF;
2465                                 ++n;
2466                         }
2467                         else {
2468                                 /* Normal char to be stored in buffer.  Also
2469                                  * update the newlinetypes flag if either this
2470                                  * is an LF or the previous char was a CR.
2471                                  */
2472                                 if (c == '\n')
2473                                         newlinetypes |= NEWLINE_LF;
2474                                 else if (skipnextlf)
2475                                         newlinetypes |= NEWLINE_CR;
2476                                 *dst++ = c;
2477                                 skipnextlf = 0;
2478                         }
2479                 }
2480                 if (shortread) {
2481                         /* If this is EOF, update type flags. */
2482                         if (skipnextlf && feof(stream))
2483                                 newlinetypes |= NEWLINE_CR;
2484                         break;
2485                 }
2486         }
2487         f->f_newlinetypes = newlinetypes;
2488         f->f_skipnextlf = skipnextlf;
2489         return dst - buf;
2490 }
2491
2492 #ifdef __cplusplus
2493 }
2494 #endif