Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include "structmember.h"
   6
   7 #ifdef HAVE_SYS_TYPES_H
   8 #include <sys/types.h>
   9 #endif /* HAVE_SYS_TYPES_H */
  10
  11 #ifdef MS_WINDOWS
  12 #define fileno _fileno
  13 /* can simulate truncate with Win32 API functions; see file_truncate */
  14 #define HAVE_FTRUNCATE
  15 #define WIN32_LEAN_AND_MEAN
  16 #include <windows.h>
  17 #endif
  18
  19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  20 #include <io.h>
  21 #endif
  22
  23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  24
  25 #ifndef DONT_HAVE_ERRNO_H
  26 #include <errno.h>
  27 #endif
  28
  29 #ifdef HAVE_GETC_UNLOCKED
  30 #define GETC(f) getc_unlocked(f)
  31 #define FLOCKFILE(f) flockfile(f)
  32 #define FUNLOCKFILE(f) funlockfile(f)
  33 #else
  34 #define GETC(f) getc(f)
  35 #define FLOCKFILE(f)
  36 #define FUNLOCKFILE(f)
  37 #endif
  38
  39 /* Bits in f_newlinetypes */
  40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  41 #define NEWLINE_CR 1            /* \r newline seen */
  42 #define NEWLINE_LF 2            /* \n newline seen */
  43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  44
  45 /*
  46  * These macros release the GIL while preventing the f_close() function being
  47  * called in the interval between them.  For that purpose, a running total of
  48  * the number of currently running unlocked code sections is kept in
  49  * the unlocked_count field of the PyFileObject. The close() method raises
  50  * an IOError if that field is non-zero.  See issue #815646, #595601.
  51  */
  52
  53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
  54 { \
  55         fobj->unlocked_count++; \
  56         Py_BEGIN_ALLOW_THREADS
  57
  58 #define FILE_END_ALLOW_THREADS(fobj) \
  59         Py_END_ALLOW_THREADS \
  60         fobj->unlocked_count--; \
  61         assert(fobj->unlocked_count >= 0); \
  62 }
  63
  64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
  65         Py_BLOCK_THREADS \
  66         fobj->unlocked_count--; \
  67         assert(fobj->unlocked_count >= 0);
  68
  69 #ifdef __cplusplus
  70 extern "C" {
  71 #endif
  72
  73 FILE *
  74 PyFile_AsFile(PyObject *f)
  75 {
  76         if (f == NULL || !PyFile_Check(f))
  77                 return NULL;
  78         else
  79                 return ((PyFileObject *)f)->f_fp;
  80 }
  81
  82 void PyFile_IncUseCount(PyFileObject *fobj)
  83 {
  84         fobj->unlocked_count++;
  85 }
  86
  87 void PyFile_DecUseCount(PyFileObject *fobj)
  88 {
  89         fobj->unlocked_count--;
  90         assert(fobj->unlocked_count >= 0);
  91 }
  92
  93 PyObject *
  94 PyFile_Name(PyObject *f)
  95 {
  96         if (f == NULL || !PyFile_Check(f))
  97                 return NULL;
  98         else
  99                 return ((PyFileObject *)f)->f_name;
 100 }
 101
 102 /* This is a safe wrapper around PyObject_Print to print to the FILE
 103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
 104    about PyFileObject. */
 105 static int
 106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
 107 {
 108         int result;
 109         PyFile_IncUseCount(f);
 110         result = PyObject_Print(op, f->f_fp, flags);
 111         PyFile_DecUseCount(f);
 112         return result;
 113 }
 114
 115 /* On Unix, fopen will succeed for directories.
 116    In Python, there should be no file objects referring to
 117    directories, so we need a check.  */
 118
 119 static PyFileObject*
 120 dircheck(PyFileObject* f)
 121 {
 122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
 123         struct stat buf;
 124         if (f->f_fp == NULL)
 125                 return f;
 126         if (fstat(fileno(f->f_fp), &buf) == 0 &&
 127             S_ISDIR(buf.st_mode)) {
 128                 char *msg = strerror(EISDIR);
 129                 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
 130                                                       EISDIR, msg, f->f_name);
 131                 PyErr_SetObject(PyExc_IOError, exc);
 132                 Py_XDECREF(exc);
 133                 return NULL;
 134         }
 135 #endif
 136         return f;
 137 }
 138
 139
 140 static PyObject *
 141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
 142                  int (*close)(FILE *))
 143 {
 144         assert(name != NULL);
 145         assert(f != NULL);
 146         assert(PyFile_Check(f));
 147         assert(f->f_fp == NULL);
 148
 149         Py_DECREF(f->f_name);
 150         Py_DECREF(f->f_mode);
 151         Py_DECREF(f->f_encoding);
 152         Py_DECREF(f->f_errors);
 153
 154         Py_INCREF(name);
 155         f->f_name = name;
 156
 157         f->f_mode = PyString_FromString(mode);
 158
 159         f->f_close = close;
 160         f->f_softspace = 0;
 161         f->f_binary = strchr(mode,'b') != NULL;
 162         f->f_buf = NULL;
 163         f->f_univ_newline = (strchr(mode, 'U') != NULL);
 164         f->f_newlinetypes = NEWLINE_UNKNOWN;
 165         f->f_skipnextlf = 0;
 166         Py_INCREF(Py_None);
 167         f->f_encoding = Py_None;
 168         Py_INCREF(Py_None);
 169         f->f_errors = Py_None;
 170
 171         if (f->f_mode == NULL)
 172                 return NULL;
 173         f->f_fp = fp;
 174         f = dircheck(f);
 175         return (PyObject *) f;
 176 }
 177
 178 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
 179 #define Py_VERIFY_WINNT
 180 /* The CRT on windows compiled with Visual Studio 2005 and higher may
 181  * assert if given invalid mode strings.  This is all fine and well
 182  * in static languages like C where the mode string is typcially hard
 183  * coded.  But in Python, were we pass in the mode string from the user,
 184  * we need to verify it first manually
 185  */
 186 static int _PyVerify_Mode_WINNT(const char *mode)
 187 {
 188         /* See if mode string is valid on Windows to avoid hard assertions */
 189         /* remove leading spacese */
 190         int singles = 0;
 191         int pairs = 0;
 192         int encoding = 0;
 193         const char *s, *c;
 194
 195         while(*mode == ' ') /* strip initial spaces */
 196                 ++mode;
 197         if (!strchr("rwa", *mode)) /* must start with one of these */
 198                 return 0;
 199         while (*++mode) {
 200                 if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
 201                         continue;
 202                 s = "+TD"; /* each of this can appear only once */
 203                 c = strchr(s, *mode);
 204                 if (c) {
 205                         ptrdiff_t idx = s-c;
 206                         if (singles & (1<<idx))
 207                                 return 0;
 208                         singles |= (1<<idx);
 209                         continue;
 210                 }
 211                 s = "btcnSR"; /* only one of each letter in the pairs allowed */
 212                 c = strchr(s, *mode);
 213                 if (c) {
 214                         ptrdiff_t idx = (s-c)/2;
 215                         if (pairs & (1<<idx))
 216                                 return 0;
 217                         pairs |= (1<<idx);
 218                         continue;
 219                 }
 220                 if (*mode == ',') {
 221                         encoding = 1;
 222                         break;
 223                 }
 224                 return 0; /* found an invalid char */
 225         }
 226
 227         if (encoding) {
 228                 char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
 229                 while (*mode == ' ')
 230                         ++mode;
 231                 /* find 'ccs =' */
 232                 if (strncmp(mode, "ccs", 3))
 233                         return 0;
 234                 mode += 3;
 235                 while (*mode == ' ')
 236                         ++mode;
 237                 if (*mode != '=')
 238                         return 0;
 239                 while (*mode == ' ')
 240                         ++mode;
 241                 for(encoding = 0; encoding<_countof(e); ++encoding) {
 242                         size_t l = strlen(e[encoding]);
 243                         if (!strncmp(mode, e[encoding], l)) {
 244                                 mode += l; /* found a valid encoding */
 245                                 break;
 246                         }
 247                 }
 248                 if (encoding == _countof(e))
 249                         return 0;
 250         }
 251         /* skip trailing spaces */
 252         while (*mode == ' ')
 253                 ++mode;
 254
 255         return *mode == '\0'; /* must be at the end of the string */
 256 }
 257 #endif
 258
 259 /* check for known incorrect mode strings - problem is, platforms are
 260    free to accept any mode characters they like and are supposed to
 261    ignore stuff they don't understand... write or append mode with
 262    universal newline support is expressly forbidden by PEP 278.
 263    Additionally, remove the 'U' from the mode string as platforms
 264    won't know what it is. Non-zero return signals an exception */
 265 int
 266 _PyFile_SanitizeMode(char *mode)
 267 {
 268         char *upos;
 269         size_t len = strlen(mode);
 270
 271         if (!len) {
 272                 PyErr_SetString(PyExc_ValueError, "empty mode string");
 273                 return -1;
 274         }
 275
 276         upos = strchr(mode, 'U');
 277         if (upos) {
 278                 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
 279
 280                 if (mode[0] == 'w' || mode[0] == 'a') {
 281                         PyErr_Format(PyExc_ValueError, "universal newline "
 282                                      "mode can only be used with modes "
 283                                      "starting with 'r'");
 284                         return -1;
 285                 }
 286
 287                 if (mode[0] != 'r') {
 288                         memmove(mode+1, mode, strlen(mode)+1);
 289                         mode[0] = 'r';
 290                 }
 291
 292                 if (!strchr(mode, 'b')) {
 293                         memmove(mode+2, mode+1, strlen(mode));
 294                         mode[1] = 'b';
 295                 }
 296         } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
 297                 PyErr_Format(PyExc_ValueError, "mode string must begin with "
 298                             "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
 299                 return -1;
 300         }
 301 #ifdef Py_VERIFY_WINNT
 302         /* additional checks on NT with visual studio 2005 and higher */
 303         if (!_PyVerify_Mode_WINNT(mode)) {
 304                 PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
 305                 return -1;
 306         }
 307 #endif
 308         return 0;
 309 }
 310
 311 static PyObject *
 312 open_the_file(PyFileObject *f, char *name, char *mode)
 313 {
 314         char *newmode;
 315         assert(f != NULL);
 316         assert(PyFile_Check(f));
 317 #ifdef MS_WINDOWS
 318         /* windows ignores the passed name in order to support Unicode */
 319         assert(f->f_name != NULL);
 320 #else
 321         assert(name != NULL);
 322 #endif
 323         assert(mode != NULL);
 324         assert(f->f_fp == NULL);
 325
 326         /* probably need to replace 'U' by 'rb' */
 327         newmode = PyMem_MALLOC(strlen(mode) + 3);
 328         if (!newmode) {
 329                 PyErr_NoMemory();
 330                 return NULL;
 331         }
 332         strcpy(newmode, mode);
 333
 334         if (_PyFile_SanitizeMode(newmode)) {
 335                 f = NULL;
 336                 goto cleanup;
 337         }
 338
 339         /* rexec.py can't stop a user from getting the file() constructor --
 340            all they have to do is get *any* file object f, and then do
 341            type(f).  Here we prevent them from doing damage with it. */
 342         if (PyEval_GetRestricted()) {
 343                 PyErr_SetString(PyExc_IOError,
 344                 "file() constructor not accessible in restricted mode");
 345                 f = NULL;
 346                 goto cleanup;
 347         }
 348         errno = 0;
 349
 350 #ifdef MS_WINDOWS
 351         if (PyUnicode_Check(f->f_name)) {
 352                 PyObject *wmode;
 353                 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
 354                 if (f->f_name && wmode) {
 355                         FILE_BEGIN_ALLOW_THREADS(f)
 356                         /* PyUnicode_AS_UNICODE OK without thread
 357                            lock as it is a simple dereference. */
 358                         f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 359                                           PyUnicode_AS_UNICODE(wmode));
 360                         FILE_END_ALLOW_THREADS(f)
 361                 }
 362                 Py_XDECREF(wmode);
 363         }
 364 #endif
 365         if (NULL == f->f_fp && NULL != name) {
 366                 FILE_BEGIN_ALLOW_THREADS(f)
 367                 f->f_fp = fopen(name, newmode);
 368                 FILE_END_ALLOW_THREADS(f)
 369         }
 370
 371         if (f->f_fp == NULL) {
 372 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
 373                 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 374                  * across all Windows flavors.  When it sets EINVAL varies
 375                  * across Windows flavors, the exact conditions aren't
 376                  * documented, and the answer lies in the OS's implementation
 377                  * of Win32's CreateFile function (whose source is secret).
 378                  * Seems the best we can do is map EINVAL to ENOENT.
 379                  * Starting with Visual Studio .NET 2005, EINVAL is correctly
 380                  * set by our CRT error handler (set in exceptions.c.)
 381                  */
 382                 if (errno == 0) /* bad mode string */
 383                         errno = EINVAL;
 384                 else if (errno == EINVAL) /* unknown, but not a mode string */
 385                         errno = ENOENT;
 386 #endif
 387                 /* EINVAL is returned when an invalid filename or
 388                  * an invalid mode is supplied. */
 389                 if (errno == EINVAL) {
 390                         PyObject *v;
 391                         char message[100];
 392                         PyOS_snprintf(message, 100,
 393                             "invalid mode ('%.50s') or filename", mode);
 394                         v = Py_BuildValue("(isO)", errno, message, f->f_name);
 395                         if (v != NULL) {
 396                                 PyErr_SetObject(PyExc_IOError, v);
 397                                 Py_DECREF(v);
 398                         }
 399                 }
 400                 else
 401                         PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 402                 f = NULL;
 403         }
 404         if (f != NULL)
 405                 f = dircheck(f);
 406
 407 cleanup:
 408         PyMem_FREE(newmode);
 409
 410         return (PyObject *)f;
 411 }
 412
 413 static PyObject *
 414 close_the_file(PyFileObject *f)
 415 {
 416         int sts = 0;
 417         int (*local_close)(FILE *);
 418         FILE *local_fp = f->f_fp;
 419         if (local_fp != NULL) {
 420                 local_close = f->f_close;
 421                 if (local_close != NULL && f->unlocked_count > 0) {
 422                         if (f->ob_refcnt > 0) {
 423                                 PyErr_SetString(PyExc_IOError,
 424                                         "close() called during concurrent "
 425                                         "operation on the same file object.");
 426                         } else {
 427                                 /* This should not happen unless someone is
 428                                  * carelessly playing with the PyFileObject
 429                                  * struct fields and/or its associated FILE
 430                                  * pointer. */
 431                                 PyErr_SetString(PyExc_SystemError,
 432                                         "PyFileObject locking error in "
 433                                         "destructor (refcnt <= 0 at close).");
 434                         }
 435                         return NULL;
 436                 }
 437                 /* NULL out the FILE pointer before releasing the GIL, because
 438                  * it will not be valid anymore after the close() function is
 439                  * called. */
 440                 f->f_fp = NULL;
 441                 if (local_close != NULL) {
 442                         Py_BEGIN_ALLOW_THREADS
 443                         errno = 0;
 444                         sts = (*local_close)(local_fp);
 445                         Py_END_ALLOW_THREADS
 446                         if (sts == EOF)
 447                                 return PyErr_SetFromErrno(PyExc_IOError);
 448                         if (sts != 0)
 449                                 return PyInt_FromLong((long)sts);
 450                 }
 451         }
 452         Py_RETURN_NONE;
 453 }
 454
 455 PyObject *
 456 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 457 {
 458         PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 459                                                              NULL, NULL);
 460         if (f != NULL) {
 461                 PyObject *o_name = PyString_FromString(name);
 462                 if (o_name == NULL)
 463                         return NULL;
 464                 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 465                         Py_DECREF(f);
 466                         f = NULL;
 467                 }
 468                 Py_DECREF(o_name);
 469         }
 470         return (PyObject *) f;
 471 }
 472
 473 PyObject *
 474 PyFile_FromString(char *name, char *mode)
 475 {
 476         extern int fclose(FILE *);
 477         PyFileObject *f;
 478
 479         f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 480         if (f != NULL) {
 481                 if (open_the_file(f, name, mode) == NULL) {
 482                         Py_DECREF(f);
 483                         f = NULL;
 484                 }
 485         }
 486         return (PyObject *)f;
 487 }
 488
 489 void
 490 PyFile_SetBufSize(PyObject *f, int bufsize)
 491 {
 492         PyFileObject *file = (PyFileObject *)f;
 493         if (bufsize >= 0) {
 494                 int type;
 495                 switch (bufsize) {
 496                 case 0:
 497                         type = _IONBF;
 498                         break;
 499 #ifdef HAVE_SETVBUF
 500                 case 1:
 501                         type = _IOLBF;
 502                         bufsize = BUFSIZ;
 503                         break;
 504 #endif
 505                 default:
 506                         type = _IOFBF;
 507 #ifndef HAVE_SETVBUF
 508                         bufsize = BUFSIZ;
 509 #endif
 510                         break;
 511                 }
 512                 fflush(file->f_fp);
 513                 if (type == _IONBF) {
 514                         PyMem_Free(file->f_setbuf);
 515                         file->f_setbuf = NULL;
 516                 } else {
 517                         file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
 518                                                                 bufsize);
 519                 }
 520 #ifdef HAVE_SETVBUF
 521                 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 522 #else /* !HAVE_SETVBUF */
 523                 setbuf(file->f_fp, file->f_setbuf);
 524 #endif /* !HAVE_SETVBUF */
 525         }
 526 }
 527
 528 /* Set the encoding used to output Unicode strings.
 529    Return 1 on success, 0 on failure. */
 530
 531 int
 532 PyFile_SetEncoding(PyObject *f, const char *enc)
 533 {
 534         return PyFile_SetEncodingAndErrors(f, enc, NULL);
 535 }
 536
 537 int
 538 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
 539 {
 540         PyFileObject *file = (PyFileObject*)f;
 541         PyObject *str, *oerrors;
 542
 543         assert(PyFile_Check(f));
 544         str = PyString_FromString(enc);
 545         if (!str)
 546                 return 0;
 547         if (errors) {
 548                 oerrors = PyString_FromString(errors);
 549                 if (!oerrors) {
 550                         Py_DECREF(str);
 551                         return 0;
 552                 }
 553         } else {
 554                 oerrors = Py_None;
 555                 Py_INCREF(Py_None);
 556         }
 557         Py_DECREF(file->f_encoding);
 558         file->f_encoding = str;
 559         Py_DECREF(file->f_errors);
 560         file->f_errors = oerrors;
 561         return 1;
 562 }
 563
 564 static PyObject *
 565 err_closed(void)
 566 {
 567         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 568         return NULL;
 569 }
 570
 571 /* Refuse regular file I/O if there's data in the iteration-buffer.
 572  * Mixing them would cause data to arrive out of order, as the read*
 573  * methods don't use the iteration buffer. */
 574 static PyObject *
 575 err_iterbuffered(void)
 576 {
 577         PyErr_SetString(PyExc_ValueError,
 578                 "Mixing iteration and read methods would lose data");
 579         return NULL;
 580 }
 581
 582 static void drop_readahead(PyFileObject *);
 583
 584 /* Methods */
 585
 586 static void
 587 file_dealloc(PyFileObject *f)
 588 {
 589         PyObject *ret;
 590         if (f->weakreflist != NULL)
 591                 PyObject_ClearWeakRefs((PyObject *) f);
 592         ret = close_the_file(f);
 593         if (!ret) {
 594                 PySys_WriteStderr("close failed in file object destructor:\n");
 595                 PyErr_Print();
 596         }
 597         else {
 598                 Py_DECREF(ret);
 599         }
 600         PyMem_Free(f->f_setbuf);
 601         Py_XDECREF(f->f_name);
 602         Py_XDECREF(f->f_mode);
 603         Py_XDECREF(f->f_encoding);
 604         Py_XDECREF(f->f_errors);
 605         drop_readahead(f);
 606         Py_TYPE(f)->tp_free((PyObject *)f);
 607 }
 608
 609 static PyObject *
 610 file_repr(PyFileObject *f)
 611 {
 612         if (PyUnicode_Check(f->f_name)) {
 613 #ifdef Py_USING_UNICODE
 614                 PyObject *ret = NULL;
 615                 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 616                 const char *name_str = name ? PyString_AsString(name) : "?";
 617                 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 618                                    f->f_fp == NULL ? "closed" : "open",
 619                                    name_str,
 620                                    PyString_AsString(f->f_mode),
 621                                    f);
 622                 Py_XDECREF(name);
 623                 return ret;
 624 #endif
 625         } else {
 626                 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 627                                    f->f_fp == NULL ? "closed" : "open",
 628                                    PyString_AsString(f->f_name),
 629                                    PyString_AsString(f->f_mode),
 630                                    f);
 631         }
 632 }
 633
 634 static PyObject *
 635 file_close(PyFileObject *f)
 636 {
 637         PyObject *sts = close_the_file(f);
 638         PyMem_Free(f->f_setbuf);
 639         f->f_setbuf = NULL;
 640         return sts;
 641 }
 642
 643
 644 /* Our very own off_t-like type, 64-bit if possible */
 645 #if !defined(HAVE_LARGEFILE_SUPPORT)
 646 typedef off_t Py_off_t;
 647 #elif SIZEOF_OFF_T >= 8
 648 typedef off_t Py_off_t;
 649 #elif SIZEOF_FPOS_T >= 8
 650 typedef fpos_t Py_off_t;
 651 #else
 652 #error "Large file support, but neither off_t nor fpos_t is large enough."
 653 #endif
 654
 655
 656 /* a portable fseek() function
 657    return 0 on success, non-zero on failure (with errno set) */
 658 static int
 659 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 660 {
 661 #if !defined(HAVE_LARGEFILE_SUPPORT)
 662         return fseek(fp, offset, whence);
 663 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 664         return fseeko(fp, offset, whence);
 665 #elif defined(HAVE_FSEEK64)
 666         return fseek64(fp, offset, whence);
 667 #elif defined(__BEOS__)
 668         return _fseek(fp, offset, whence);
 669 #elif SIZEOF_FPOS_T >= 8
 670         /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 671            and fgetpos() to implement fseek()*/
 672         fpos_t pos;
 673         switch (whence) {
 674         case SEEK_END:
 675 #ifdef MS_WINDOWS
 676                 fflush(fp);
 677                 if (_lseeki64(fileno(fp), 0, 2) == -1)
 678                         return -1;
 679 #else
 680                 if (fseek(fp, 0, SEEK_END) != 0)
 681                         return -1;
 682 #endif
 683                 /* fall through */
 684         case SEEK_CUR:
 685                 if (fgetpos(fp, &pos) != 0)
 686                         return -1;
 687                 offset += pos;
 688                 break;
 689         /* case SEEK_SET: break; */
 690         }
 691         return fsetpos(fp, &offset);
 692 #else
 693 #error "Large file support, but no way to fseek."
 694 #endif
 695 }
 696
 697
 698 /* a portable ftell() function
 699    Return -1 on failure with errno set appropriately, current file
 700    position on success */
 701 static Py_off_t
 702 _portable_ftell(FILE* fp)
 703 {
 704 #if !defined(HAVE_LARGEFILE_SUPPORT)
 705         return ftell(fp);
 706 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 707         return ftello(fp);
 708 #elif defined(HAVE_FTELL64)
 709         return ftell64(fp);
 710 #elif SIZEOF_FPOS_T >= 8
 711         fpos_t pos;
 712         if (fgetpos(fp, &pos) != 0)
 713                 return -1;
 714         return pos;
 715 #else
 716 #error "Large file support, but no way to ftell."
 717 #endif
 718 }
 719
 720
 721 static PyObject *
 722 file_seek(PyFileObject *f, PyObject *args)
 723 {
 724         int whence;
 725         int ret;
 726         Py_off_t offset;
 727         PyObject *offobj, *off_index;
 728
 729         if (f->f_fp == NULL)
 730                 return err_closed();
 731         drop_readahead(f);
 732         whence = 0;
 733         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 734                 return NULL;
 735         off_index = PyNumber_Index(offobj);
 736         if (!off_index) {
 737                 if (!PyFloat_Check(offobj))
 738                         return NULL;
 739                 /* Deprecated in 2.6 */
 740                 PyErr_Clear();
 741                 if (PyErr_WarnEx(PyExc_DeprecationWarning,
 742                                  "integer argument expected, got float",
 743                                  1) < 0)
 744                         return NULL;
 745                 off_index = offobj;
 746                 Py_INCREF(offobj);
 747         }
 748 #if !defined(HAVE_LARGEFILE_SUPPORT)
 749         offset = PyInt_AsLong(off_index);
 750 #else
 751         offset = PyLong_Check(off_index) ?
 752                 PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
 753 #endif
 754         Py_DECREF(off_index);
 755         if (PyErr_Occurred())
 756                 return NULL;
 757
 758         FILE_BEGIN_ALLOW_THREADS(f)
 759         errno = 0;
 760         ret = _portable_fseek(f->f_fp, offset, whence);
 761         FILE_END_ALLOW_THREADS(f)
 762
 763         if (ret != 0) {
 764                 PyErr_SetFromErrno(PyExc_IOError);
 765                 clearerr(f->f_fp);
 766                 return NULL;
 767         }
 768         f->f_skipnextlf = 0;
 769         Py_INCREF(Py_None);
 770         return Py_None;
 771 }
 772
 773
 774 #ifdef HAVE_FTRUNCATE
 775 static PyObject *
 776 file_truncate(PyFileObject *f, PyObject *args)
 777 {
 778         Py_off_t newsize;
 779         PyObject *newsizeobj = NULL;
 780         Py_off_t initialpos;
 781         int ret;
 782
 783         if (f->f_fp == NULL)
 784                 return err_closed();
 785         if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 786                 return NULL;
 787
 788         /* Get current file position.  If the file happens to be open for
 789          * update and the last operation was an input operation, C doesn't
 790          * define what the later fflush() will do, but we promise truncate()
 791          * won't change the current position (and fflush() *does* change it
 792          * then at least on Windows).  The easiest thing is to capture
 793          * current pos now and seek back to it at the end.
 794          */
 795         FILE_BEGIN_ALLOW_THREADS(f)
 796         errno = 0;
 797         initialpos = _portable_ftell(f->f_fp);
 798         FILE_END_ALLOW_THREADS(f)
 799         if (initialpos == -1)
 800                 goto onioerror;
 801
 802         /* Set newsize to current postion if newsizeobj NULL, else to the
 803          * specified value.
 804          */
 805         if (newsizeobj != NULL) {
 806 #if !defined(HAVE_LARGEFILE_SUPPORT)
 807                 newsize = PyInt_AsLong(newsizeobj);
 808 #else
 809                 newsize = PyLong_Check(newsizeobj) ?
 810                                 PyLong_AsLongLong(newsizeobj) :
 811                                 PyInt_AsLong(newsizeobj);
 812 #endif
 813                 if (PyErr_Occurred())
 814                         return NULL;
 815         }
 816         else /* default to current position */
 817                 newsize = initialpos;
 818
 819         /* Flush the stream.  We're mixing stream-level I/O with lower-level
 820          * I/O, and a flush may be necessary to synch both platform views
 821          * of the current file state.
 822          */
 823         FILE_BEGIN_ALLOW_THREADS(f)
 824         errno = 0;
 825         ret = fflush(f->f_fp);
 826         FILE_END_ALLOW_THREADS(f)
 827         if (ret != 0)
 828                 goto onioerror;
 829
 830 #ifdef MS_WINDOWS
 831         /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 832            so don't even try using it. */
 833         {
 834                 HANDLE hFile;
 835
 836                 /* Have to move current pos to desired endpoint on Windows. */
 837                 FILE_BEGIN_ALLOW_THREADS(f)
 838                 errno = 0;
 839                 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 840                 FILE_END_ALLOW_THREADS(f)
 841                 if (ret)
 842                         goto onioerror;
 843
 844                 /* Truncate.  Note that this may grow the file! */
 845                 FILE_BEGIN_ALLOW_THREADS(f)
 846                 errno = 0;
 847                 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 848                 ret = hFile == (HANDLE)-1;
 849                 if (ret == 0) {
 850                         ret = SetEndOfFile(hFile) == 0;
 851                         if (ret)
 852                                 errno = EACCES;
 853                 }
 854                 FILE_END_ALLOW_THREADS(f)
 855                 if (ret)
 856                         goto onioerror;
 857         }
 858 #else
 859         FILE_BEGIN_ALLOW_THREADS(f)
 860         errno = 0;
 861         ret = ftruncate(fileno(f->f_fp), newsize);
 862         FILE_END_ALLOW_THREADS(f)
 863         if (ret != 0)
 864                 goto onioerror;
 865 #endif /* !MS_WINDOWS */
 866
 867         /* Restore original file position. */
 868         FILE_BEGIN_ALLOW_THREADS(f)
 869         errno = 0;
 870         ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 871         FILE_END_ALLOW_THREADS(f)
 872         if (ret)
 873                 goto onioerror;
 874
 875         Py_INCREF(Py_None);
 876         return Py_None;
 877
 878 onioerror:
 879         PyErr_SetFromErrno(PyExc_IOError);
 880         clearerr(f->f_fp);
 881         return NULL;
 882 }
 883 #endif /* HAVE_FTRUNCATE */
 884
 885 static PyObject *
 886 file_tell(PyFileObject *f)
 887 {
 888         Py_off_t pos;
 889
 890         if (f->f_fp == NULL)
 891                 return err_closed();
 892         FILE_BEGIN_ALLOW_THREADS(f)
 893         errno = 0;
 894         pos = _portable_ftell(f->f_fp);
 895         FILE_END_ALLOW_THREADS(f)
 896
 897         if (pos == -1) {
 898                 PyErr_SetFromErrno(PyExc_IOError);
 899                 clearerr(f->f_fp);
 900                 return NULL;
 901         }
 902         if (f->f_skipnextlf) {
 903                 int c;
 904                 c = GETC(f->f_fp);
 905                 if (c == '\n') {
 906                         f->f_newlinetypes |= NEWLINE_CRLF;
 907                         pos++;
 908                         f->f_skipnextlf = 0;
 909                 } else if (c != EOF) ungetc(c, f->f_fp);
 910         }
 911 #if !defined(HAVE_LARGEFILE_SUPPORT)
 912         return PyInt_FromLong(pos);
 913 #else
 914         return PyLong_FromLongLong(pos);
 915 #endif
 916 }
 917
 918 static PyObject *
 919 file_fileno(PyFileObject *f)
 920 {
 921         if (f->f_fp == NULL)
 922                 return err_closed();
 923         return PyInt_FromLong((long) fileno(f->f_fp));
 924 }
 925
 926 static PyObject *
 927 file_flush(PyFileObject *f)
 928 {
 929         int res;
 930
 931         if (f->f_fp == NULL)
 932                 return err_closed();
 933         FILE_BEGIN_ALLOW_THREADS(f)
 934         errno = 0;
 935         res = fflush(f->f_fp);
 936         FILE_END_ALLOW_THREADS(f)
 937         if (res != 0) {
 938                 PyErr_SetFromErrno(PyExc_IOError);
 939                 clearerr(f->f_fp);
 940                 return NULL;
 941         }
 942         Py_INCREF(Py_None);
 943         return Py_None;
 944 }
 945
 946 static PyObject *
 947 file_isatty(PyFileObject *f)
 948 {
 949         long res;
 950         if (f->f_fp == NULL)
 951                 return err_closed();
 952         FILE_BEGIN_ALLOW_THREADS(f)
 953         res = isatty((int)fileno(f->f_fp));
 954         FILE_END_ALLOW_THREADS(f)
 955         return PyBool_FromLong(res);
 956 }
 957
 958
 959 #if BUFSIZ < 8192
 960 #define SMALLCHUNK 8192
 961 #else
 962 #define SMALLCHUNK BUFSIZ
 963 #endif
 964
 965 #if SIZEOF_INT < 4
 966 #define BIGCHUNK  (512 * 32)
 967 #else
 968 #define BIGCHUNK  (512 * 1024)
 969 #endif
 970
 971 static size_t
 972 new_buffersize(PyFileObject *f, size_t currentsize)
 973 {
 974 #ifdef HAVE_FSTAT
 975         off_t pos, end;
 976         struct stat st;
 977         if (fstat(fileno(f->f_fp), &st) == 0) {
 978                 end = st.st_size;
 979                 /* The following is not a bug: we really need to call lseek()
 980                    *and* ftell().  The reason is that some stdio libraries
 981                    mistakenly flush their buffer when ftell() is called and
 982                    the lseek() call it makes fails, thereby throwing away
 983                    data that cannot be recovered in any way.  To avoid this,
 984                    we first test lseek(), and only call ftell() if lseek()
 985                    works.  We can't use the lseek() value either, because we
 986                    need to take the amount of buffered data into account.
 987                    (Yet another reason why stdio stinks. :-) */
 988                 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
 989                 if (pos >= 0) {
 990                         pos = ftell(f->f_fp);
 991                 }
 992                 if (pos < 0)
 993                         clearerr(f->f_fp);
 994                 if (end > pos && pos >= 0)
 995                         return currentsize + end - pos + 1;
 996                 /* Add 1 so if the file were to grow we'd notice. */
 997         }
 998 #endif
 999         if (currentsize > SMALLCHUNK) {
1000                 /* Keep doubling until we reach BIGCHUNK;
1001                    then keep adding BIGCHUNK. */
1002                 if (currentsize <= BIGCHUNK)
1003                         return currentsize + currentsize;
1004                 else
1005                         return currentsize + BIGCHUNK;
1006         }
1007         return currentsize + SMALLCHUNK;
1008 }
1009
1010 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1011 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1012 #else
1013 #ifdef EWOULDBLOCK
1014 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1015 #else
1016 #ifdef EAGAIN
1017 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1018 #else
1019 #define BLOCKED_ERRNO(x) 0
1020 #endif
1021 #endif
1022 #endif
1023
1024 static PyObject *
1025 file_read(PyFileObject *f, PyObject *args)
1026 {
1027         long bytesrequested = -1;
1028         size_t bytesread, buffersize, chunksize;
1029         PyObject *v;
1030
1031         if (f->f_fp == NULL)
1032                 return err_closed();
1033         /* refuse to mix with f.next() */
1034         if (f->f_buf != NULL &&
1035             (f->f_bufend - f->f_bufptr) > 0 &&
1036             f->f_buf[0] != '\0')
1037                 return err_iterbuffered();
1038         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1039                 return NULL;
1040         if (bytesrequested < 0)
1041                 buffersize = new_buffersize(f, (size_t)0);
1042         else
1043                 buffersize = bytesrequested;
1044         if (buffersize > PY_SSIZE_T_MAX) {
1045                 PyErr_SetString(PyExc_OverflowError,
1046         "requested number of bytes is more than a Python string can hold");
1047                 return NULL;
1048         }
1049         v = PyString_FromStringAndSize((char *)NULL, buffersize);
1050         if (v == NULL)
1051                 return NULL;
1052         bytesread = 0;
1053         for (;;) {
1054                 FILE_BEGIN_ALLOW_THREADS(f)
1055                 errno = 0;
1056                 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1057                           buffersize - bytesread, f->f_fp, (PyObject *)f);
1058                 FILE_END_ALLOW_THREADS(f)
1059                 if (chunksize == 0) {
1060                         if (!ferror(f->f_fp))
1061                                 break;
1062                         clearerr(f->f_fp);
1063                         /* When in non-blocking mode, data shouldn't
1064                          * be discarded if a blocking signal was
1065                          * received. That will also happen if
1066                          * chunksize != 0, but bytesread < buffersize. */
1067                         if (bytesread > 0 && BLOCKED_ERRNO(errno))
1068                                 break;
1069                         PyErr_SetFromErrno(PyExc_IOError);
1070                         Py_DECREF(v);
1071                         return NULL;
1072                 }
1073                 bytesread += chunksize;
1074                 if (bytesread < buffersize) {
1075                         clearerr(f->f_fp);
1076                         break;
1077                 }
1078                 if (bytesrequested < 0) {
1079                         buffersize = new_buffersize(f, buffersize);
1080                         if (_PyString_Resize(&v, buffersize) < 0)
1081                                 return NULL;
1082                 } else {
1083                         /* Got what was requested. */
1084                         break;
1085                 }
1086         }
1087         if (bytesread != buffersize)
1088                 _PyString_Resize(&v, bytesread);
1089         return v;
1090 }
1091
1092 static PyObject *
1093 file_readinto(PyFileObject *f, PyObject *args)
1094 {
1095         char *ptr;
1096         Py_ssize_t ntodo;
1097         Py_ssize_t ndone, nnow;
1098         Py_buffer pbuf;
1099
1100         if (f->f_fp == NULL)
1101                 return err_closed();
1102         /* refuse to mix with f.next() */
1103         if (f->f_buf != NULL &&
1104             (f->f_bufend - f->f_bufptr) > 0 &&
1105             f->f_buf[0] != '\0')
1106                 return err_iterbuffered();
1107         if (!PyArg_ParseTuple(args, "w*", &pbuf))
1108                 return NULL;
1109         ptr = pbuf.buf;
1110         ntodo = pbuf.len;
1111         ndone = 0;
1112         while (ntodo > 0) {
1113                 FILE_BEGIN_ALLOW_THREADS(f)
1114                 errno = 0;
1115                 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1116                                                 (PyObject *)f);
1117                 FILE_END_ALLOW_THREADS(f)
1118                 if (nnow == 0) {
1119                         if (!ferror(f->f_fp))
1120                                 break;
1121                         PyErr_SetFromErrno(PyExc_IOError);
1122                         clearerr(f->f_fp);
1123                         PyBuffer_Release(&pbuf);
1124                         return NULL;
1125                 }
1126                 ndone += nnow;
1127                 ntodo -= nnow;
1128         }
1129         PyBuffer_Release(&pbuf);
1130         return PyInt_FromSsize_t(ndone);
1131 }
1132
1133 /**************************************************************************
1134 Routine to get next line using platform fgets().
1135
1136 Under MSVC 6:
1137
1138 + MS threadsafe getc is very slow (multiple layers of function calls before+
1139   after each character, to lock+unlock the stream).
1140 + The stream-locking functions are MS-internal -- can't access them from user
1141   code.
1142 + There's nothing Tim could find in the MS C or platform SDK libraries that
1143   can worm around this.
1144 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1145
1146 So we use fgets for speed(!), despite that it's painful.
1147
1148 MS realloc is also slow.
1149
1150 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1151 have):
1152         Linux           a wash
1153         Solaris         a wash
1154         Tru64 Unix      getline_via_fgets significantly faster
1155
1156 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1157 writes something into the buffer, can it write into any position beyond the
1158 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1159 known on which it does; and it would be a strange way to code fgets. Still,
1160 getline_via_fgets may not work correctly if it does.  The std test
1161 test_bufio.py should fail if platform fgets() routinely writes beyond the
1162 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1163 **************************************************************************/
1164
1165 /* Use this routine if told to, or by default on non-get_unlocked()
1166  * platforms unless told not to.  Yikes!  Let's spell that out:
1167  * On a platform with getc_unlocked():
1168  *     By default, use getc_unlocked().
1169  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1170  * On a platform without getc_unlocked():
1171  *     By default, use fgets().
1172  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1173  */
1174 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1175 #define USE_FGETS_IN_GETLINE
1176 #endif
1177
1178 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1179 #undef USE_FGETS_IN_GETLINE
1180 #endif
1181
1182 #ifdef USE_FGETS_IN_GETLINE
1183 static PyObject*
1184 getline_via_fgets(PyFileObject *f, FILE *fp)
1185 {
1186 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1187  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1188  * to fill this much of the buffer with a known value in order to figure out
1189  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1190  * than "most" lines, we waste time filling unused buffer slots.  100 is
1191  * surely adequate for most peoples' email archives, chewing over source code,
1192  * etc -- "regular old text files".
1193  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1194  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1195  * cautions about boosting that.  300 was chosen because the worst real-life
1196  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1197  * half the lines were 254 chars.
1198  */
1199 #define INITBUFSIZE 100
1200 #define MAXBUFSIZE 300
1201         char* p;        /* temp */
1202         char buf[MAXBUFSIZE];
1203         PyObject* v;    /* the string object result */
1204         char* pvfree;   /* address of next free slot */
1205         char* pvend;    /* address one beyond last free slot */
1206         size_t nfree;   /* # of free buffer slots; pvend-pvfree */
1207         size_t total_v_size;  /* total # of slots in buffer */
1208         size_t increment;       /* amount to increment the buffer */
1209         size_t prev_v_size;
1210
1211         /* Optimize for normal case:  avoid _PyString_Resize if at all
1212          * possible via first reading into stack buffer "buf".
1213          */
1214         total_v_size = INITBUFSIZE;     /* start small and pray */
1215         pvfree = buf;
1216         for (;;) {
1217                 FILE_BEGIN_ALLOW_THREADS(f)
1218                 pvend = buf + total_v_size;
1219                 nfree = pvend - pvfree;
1220                 memset(pvfree, '\n', nfree);
1221                 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1222                 p = fgets(pvfree, (int)nfree, fp);
1223                 FILE_END_ALLOW_THREADS(f)
1224
1225                 if (p == NULL) {
1226                         clearerr(fp);
1227                         if (PyErr_CheckSignals())
1228                                 return NULL;
1229                         v = PyString_FromStringAndSize(buf, pvfree - buf);
1230                         return v;
1231                 }
1232                 /* fgets read *something* */
1233                 p = memchr(pvfree, '\n', nfree);
1234                 if (p != NULL) {
1235                         /* Did the \n come from fgets or from us?
1236                          * Since fgets stops at the first \n, and then writes
1237                          * \0, if it's from fgets a \0 must be next.  But if
1238                          * that's so, it could not have come from us, since
1239                          * the \n's we filled the buffer with have only more
1240                          * \n's to the right.
1241                          */
1242                         if (p+1 < pvend && *(p+1) == '\0') {
1243                                 /* It's from fgets:  we win!  In particular,
1244                                  * we haven't done any mallocs yet, and can
1245                                  * build the final result on the first try.
1246                                  */
1247                                 ++p;    /* include \n from fgets */
1248                         }
1249                         else {
1250                                 /* Must be from us:  fgets didn't fill the
1251                                  * buffer and didn't find a newline, so it
1252                                  * must be the last and newline-free line of
1253                                  * the file.
1254                                  */
1255                                 assert(p > pvfree && *(p-1) == '\0');
1256                                 --p;    /* don't include \0 from fgets */
1257                         }
1258                         v = PyString_FromStringAndSize(buf, p - buf);
1259                         return v;
1260                 }
1261                 /* yuck:  fgets overwrote all the newlines, i.e. the entire
1262                  * buffer.  So this line isn't over yet, or maybe it is but
1263                  * we're exactly at EOF.  If we haven't already, try using the
1264                  * rest of the stack buffer.
1265                  */
1266                 assert(*(pvend-1) == '\0');
1267                 if (pvfree == buf) {
1268                         pvfree = pvend - 1;     /* overwrite trailing null */
1269                         total_v_size = MAXBUFSIZE;
1270                 }
1271                 else
1272                         break;
1273         }
1274
1275         /* The stack buffer isn't big enough; malloc a string object and read
1276          * into its buffer.
1277          */
1278         total_v_size = MAXBUFSIZE << 1;
1279         v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1280         if (v == NULL)
1281                 return v;
1282         /* copy over everything except the last null byte */
1283         memcpy(BUF(v), buf, MAXBUFSIZE-1);
1284         pvfree = BUF(v) + MAXBUFSIZE - 1;
1285
1286         /* Keep reading stuff into v; if it ever ends successfully, break
1287          * after setting p one beyond the end of the line.  The code here is
1288          * very much like the code above, except reads into v's buffer; see
1289          * the code above for detailed comments about the logic.
1290          */
1291         for (;;) {
1292                 FILE_BEGIN_ALLOW_THREADS(f)
1293                 pvend = BUF(v) + total_v_size;
1294                 nfree = pvend - pvfree;
1295                 memset(pvfree, '\n', nfree);
1296                 assert(nfree < INT_MAX);
1297                 p = fgets(pvfree, (int)nfree, fp);
1298                 FILE_END_ALLOW_THREADS(f)
1299
1300                 if (p == NULL) {
1301                         clearerr(fp);
1302                         if (PyErr_CheckSignals()) {
1303                                 Py_DECREF(v);
1304                                 return NULL;
1305                         }
1306                         p = pvfree;
1307                         break;
1308                 }
1309                 p = memchr(pvfree, '\n', nfree);
1310                 if (p != NULL) {
1311                         if (p+1 < pvend && *(p+1) == '\0') {
1312                                 /* \n came from fgets */
1313                                 ++p;
1314                                 break;
1315                         }
1316                         /* \n came from us; last line of file, no newline */
1317                         assert(p > pvfree && *(p-1) == '\0');
1318                         --p;
1319                         break;
1320                 }
1321                 /* expand buffer and try again */
1322                 assert(*(pvend-1) == '\0');
1323                 increment = total_v_size >> 2;  /* mild exponential growth */
1324                 prev_v_size = total_v_size;
1325                 total_v_size += increment;
1326                 /* check for overflow */
1327                 if (total_v_size <= prev_v_size ||
1328                     total_v_size > PY_SSIZE_T_MAX) {
1329                         PyErr_SetString(PyExc_OverflowError,
1330                             "line is longer than a Python string can hold");
1331                         Py_DECREF(v);
1332                         return NULL;
1333                 }
1334                 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1335                         return NULL;
1336                 /* overwrite the trailing null byte */
1337                 pvfree = BUF(v) + (prev_v_size - 1);
1338         }
1339         if (BUF(v) + total_v_size != p)
1340                 _PyString_Resize(&v, p - BUF(v));
1341         return v;
1342 #undef INITBUFSIZE
1343 #undef MAXBUFSIZE
1344 }
1345 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1346
1347 /* Internal routine to get a line.
1348    Size argument interpretation:
1349    > 0: max length;
1350    <= 0: read arbitrary line
1351 */
1352
1353 static PyObject *
1354 get_line(PyFileObject *f, int n)
1355 {
1356         FILE *fp = f->f_fp;
1357         int c;
1358         char *buf, *end;
1359         size_t total_v_size;    /* total # of slots in buffer */
1360         size_t used_v_size;     /* # used slots in buffer */
1361         size_t increment;       /* amount to increment the buffer */
1362         PyObject *v;
1363         int newlinetypes = f->f_newlinetypes;
1364         int skipnextlf = f->f_skipnextlf;
1365         int univ_newline = f->f_univ_newline;
1366
1367 #if defined(USE_FGETS_IN_GETLINE)
1368         if (n <= 0 && !univ_newline )
1369                 return getline_via_fgets(f, fp);
1370 #endif
1371         total_v_size = n > 0 ? n : 100;
1372         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1373         if (v == NULL)
1374                 return NULL;
1375         buf = BUF(v);
1376         end = buf + total_v_size;
1377
1378         for (;;) {
1379                 FILE_BEGIN_ALLOW_THREADS(f)
1380                 FLOCKFILE(fp);
1381                 if (univ_newline) {
1382                         c = 'x'; /* Shut up gcc warning */
1383                         while ( buf != end && (c = GETC(fp)) != EOF ) {
1384                                 if (skipnextlf ) {
1385                                         skipnextlf = 0;
1386                                         if (c == '\n') {
1387                                                 /* Seeing a \n here with
1388                                                  * skipnextlf true means we
1389                                                  * saw a \r before.
1390                                                  */
1391                                                 newlinetypes |= NEWLINE_CRLF;
1392                                                 c = GETC(fp);
1393                                                 if (c == EOF) break;
1394                                         } else {
1395                                                 newlinetypes |= NEWLINE_CR;
1396                                         }
1397                                 }
1398                                 if (c == '\r') {
1399                                         skipnextlf = 1;
1400                                         c = '\n';
1401                                 } else if ( c == '\n')
1402                                         newlinetypes |= NEWLINE_LF;
1403                                 *buf++ = c;
1404                                 if (c == '\n') break;
1405                         }
1406                         if ( c == EOF && skipnextlf )
1407                                 newlinetypes |= NEWLINE_CR;
1408                 } else /* If not universal newlines use the normal loop */
1409                 while ((c = GETC(fp)) != EOF &&
1410                        (*buf++ = c) != '\n' &&
1411                         buf != end)
1412                         ;
1413                 FUNLOCKFILE(fp);
1414                 FILE_END_ALLOW_THREADS(f)
1415                 f->f_newlinetypes = newlinetypes;
1416                 f->f_skipnextlf = skipnextlf;
1417                 if (c == '\n')
1418                         break;
1419                 if (c == EOF) {
1420                         if (ferror(fp)) {
1421                                 PyErr_SetFromErrno(PyExc_IOError);
1422                                 clearerr(fp);
1423                                 Py_DECREF(v);
1424                                 return NULL;
1425                         }
1426                         clearerr(fp);
1427                         if (PyErr_CheckSignals()) {
1428                                 Py_DECREF(v);
1429                                 return NULL;
1430                         }
1431                         break;
1432                 }
1433                 /* Must be because buf == end */
1434                 if (n > 0)
1435                         break;
1436                 used_v_size = total_v_size;
1437                 increment = total_v_size >> 2; /* mild exponential growth */
1438                 total_v_size += increment;
1439                 if (total_v_size > PY_SSIZE_T_MAX) {
1440                         PyErr_SetString(PyExc_OverflowError,
1441                             "line is longer than a Python string can hold");
1442                         Py_DECREF(v);
1443                         return NULL;
1444                 }
1445                 if (_PyString_Resize(&v, total_v_size) < 0)
1446                         return NULL;
1447                 buf = BUF(v) + used_v_size;
1448                 end = BUF(v) + total_v_size;
1449         }
1450
1451         used_v_size = buf - BUF(v);
1452         if (used_v_size != total_v_size)
1453                 _PyString_Resize(&v, used_v_size);
1454         return v;
1455 }
1456
1457 /* External C interface */
1458
1459 PyObject *
1460 PyFile_GetLine(PyObject *f, int n)
1461 {
1462         PyObject *result;
1463
1464         if (f == NULL) {
1465                 PyErr_BadInternalCall();
1466                 return NULL;
1467         }
1468
1469         if (PyFile_Check(f)) {
1470                 PyFileObject *fo = (PyFileObject *)f;
1471                 if (fo->f_fp == NULL)
1472                         return err_closed();
1473                 /* refuse to mix with f.next() */
1474                 if (fo->f_buf != NULL &&
1475                     (fo->f_bufend - fo->f_bufptr) > 0 &&
1476                     fo->f_buf[0] != '\0')
1477                         return err_iterbuffered();
1478                 result = get_line(fo, n);
1479         }
1480         else {
1481                 PyObject *reader;
1482                 PyObject *args;
1483
1484                 reader = PyObject_GetAttrString(f, "readline");
1485                 if (reader == NULL)
1486                         return NULL;
1487                 if (n <= 0)
1488                         args = PyTuple_New(0);
1489                 else
1490                         args = Py_BuildValue("(i)", n);
1491                 if (args == NULL) {
1492                         Py_DECREF(reader);
1493                         return NULL;
1494                 }
1495                 result = PyEval_CallObject(reader, args);
1496                 Py_DECREF(reader);
1497                 Py_DECREF(args);
1498                 if (result != NULL && !PyString_Check(result) &&
1499                     !PyUnicode_Check(result)) {
1500                         Py_DECREF(result);
1501                         result = NULL;
1502                         PyErr_SetString(PyExc_TypeError,
1503                                    "object.readline() returned non-string");
1504                 }
1505         }
1506
1507         if (n < 0 && result != NULL && PyString_Check(result)) {
1508                 char *s = PyString_AS_STRING(result);
1509                 Py_ssize_t len = PyString_GET_SIZE(result);
1510                 if (len == 0) {
1511                         Py_DECREF(result);
1512                         result = NULL;
1513                         PyErr_SetString(PyExc_EOFError,
1514                                         "EOF when reading a line");
1515                 }
1516                 else if (s[len-1] == '\n') {
1517                         if (result->ob_refcnt == 1)
1518                                 _PyString_Resize(&result, len-1);
1519                         else {
1520                                 PyObject *v;
1521                                 v = PyString_FromStringAndSize(s, len-1);
1522                                 Py_DECREF(result);
1523                                 result = v;
1524                         }
1525                 }
1526         }
1527 #ifdef Py_USING_UNICODE
1528         if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1529                 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1530                 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1531                 if (len == 0) {
1532                         Py_DECREF(result);
1533                         result = NULL;
1534                         PyErr_SetString(PyExc_EOFError,
1535                                         "EOF when reading a line");
1536                 }
1537                 else if (s[len-1] == '\n') {
1538                         if (result->ob_refcnt == 1)
1539                                 PyUnicode_Resize(&result, len-1);
1540                         else {
1541                                 PyObject *v;
1542                                 v = PyUnicode_FromUnicode(s, len-1);
1543                                 Py_DECREF(result);
1544                                 result = v;
1545                         }
1546                 }
1547         }
1548 #endif
1549         return result;
1550 }
1551
1552 /* Python method */
1553
1554 static PyObject *
1555 file_readline(PyFileObject *f, PyObject *args)
1556 {
1557         int n = -1;
1558
1559         if (f->f_fp == NULL)
1560                 return err_closed();
1561         /* refuse to mix with f.next() */
1562         if (f->f_buf != NULL &&
1563             (f->f_bufend - f->f_bufptr) > 0 &&
1564             f->f_buf[0] != '\0')
1565                 return err_iterbuffered();
1566         if (!PyArg_ParseTuple(args, "|i:readline", &n))
1567                 return NULL;
1568         if (n == 0)
1569                 return PyString_FromString("");
1570         if (n < 0)
1571                 n = 0;
1572         return get_line(f, n);
1573 }
1574
1575 static PyObject *
1576 file_readlines(PyFileObject *f, PyObject *args)
1577 {
1578         long sizehint = 0;
1579         PyObject *list = NULL;
1580         PyObject *line;
1581         char small_buffer[SMALLCHUNK];
1582         char *buffer = small_buffer;
1583         size_t buffersize = SMALLCHUNK;
1584         PyObject *big_buffer = NULL;
1585         size_t nfilled = 0;
1586         size_t nread;
1587         size_t totalread = 0;
1588         char *p, *q, *end;
1589         int err;
1590         int shortread = 0;
1591
1592         if (f->f_fp == NULL)
1593                 return err_closed();
1594         /* refuse to mix with f.next() */
1595         if (f->f_buf != NULL &&
1596             (f->f_bufend - f->f_bufptr) > 0 &&
1597             f->f_buf[0] != '\0')
1598                 return err_iterbuffered();
1599         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1600                 return NULL;
1601         if ((list = PyList_New(0)) == NULL)
1602                 return NULL;
1603         for (;;) {
1604                 if (shortread)
1605                         nread = 0;
1606                 else {
1607                         FILE_BEGIN_ALLOW_THREADS(f)
1608                         errno = 0;
1609                         nread = Py_UniversalNewlineFread(buffer+nfilled,
1610                                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1611                         FILE_END_ALLOW_THREADS(f)
1612                         shortread = (nread < buffersize-nfilled);
1613                 }
1614                 if (nread == 0) {
1615                         sizehint = 0;
1616                         if (!ferror(f->f_fp))
1617                                 break;
1618                         PyErr_SetFromErrno(PyExc_IOError);
1619                         clearerr(f->f_fp);
1620                         goto error;
1621                 }
1622                 totalread += nread;
1623                 p = (char *)memchr(buffer+nfilled, '\n', nread);
1624                 if (p == NULL) {
1625                         /* Need a larger buffer to fit this line */
1626                         nfilled += nread;
1627                         buffersize *= 2;
1628                         if (buffersize > PY_SSIZE_T_MAX) {
1629                                 PyErr_SetString(PyExc_OverflowError,
1630                             "line is longer than a Python string can hold");
1631                                 goto error;
1632                         }
1633                         if (big_buffer == NULL) {
1634                                 /* Create the big buffer */
1635                                 big_buffer = PyString_FromStringAndSize(
1636                                         NULL, buffersize);
1637                                 if (big_buffer == NULL)
1638                                         goto error;
1639                                 buffer = PyString_AS_STRING(big_buffer);
1640                                 memcpy(buffer, small_buffer, nfilled);
1641                         }
1642                         else {
1643                                 /* Grow the big buffer */
1644                                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1645                                         goto error;
1646                                 buffer = PyString_AS_STRING(big_buffer);
1647                         }
1648                         continue;
1649                 }
1650                 end = buffer+nfilled+nread;
1651                 q = buffer;
1652                 do {
1653                         /* Process complete lines */
1654                         p++;
1655                         line = PyString_FromStringAndSize(q, p-q);
1656                         if (line == NULL)
1657                                 goto error;
1658                         err = PyList_Append(list, line);
1659                         Py_DECREF(line);
1660                         if (err != 0)
1661                                 goto error;
1662                         q = p;
1663                         p = (char *)memchr(q, '\n', end-q);
1664                 } while (p != NULL);
1665                 /* Move the remaining incomplete line to the start */
1666                 nfilled = end-q;
1667                 memmove(buffer, q, nfilled);
1668                 if (sizehint > 0)
1669                         if (totalread >= (size_t)sizehint)
1670                                 break;
1671         }
1672         if (nfilled != 0) {
1673                 /* Partial last line */
1674                 line = PyString_FromStringAndSize(buffer, nfilled);
1675                 if (line == NULL)
1676                         goto error;
1677                 if (sizehint > 0) {
1678                         /* Need to complete the last line */
1679                         PyObject *rest = get_line(f, 0);
1680                         if (rest == NULL) {
1681                                 Py_DECREF(line);
1682                                 goto error;
1683                         }
1684                         PyString_Concat(&line, rest);
1685                         Py_DECREF(rest);
1686                         if (line == NULL)
1687                                 goto error;
1688                 }
1689                 err = PyList_Append(list, line);
1690                 Py_DECREF(line);
1691                 if (err != 0)
1692                         goto error;
1693         }
1694
1695 cleanup:
1696         Py_XDECREF(big_buffer);
1697         return list;
1698
1699 error:
1700         Py_CLEAR(list);
1701         goto cleanup;
1702 }
1703
1704 static PyObject *
1705 file_write(PyFileObject *f, PyObject *args)
1706 {
1707         Py_buffer pbuf;
1708         char *s;
1709         Py_ssize_t n, n2;
1710         if (f->f_fp == NULL)
1711                 return err_closed();
1712         if (f->f_binary) {
1713                 if (!PyArg_ParseTuple(args, "s*", &pbuf))
1714                         return NULL;
1715                 s = pbuf.buf;
1716                 n = pbuf.len;
1717         } else
1718                 if (!PyArg_ParseTuple(args, "t#", &s, &n))
1719                 return NULL;
1720         f->f_softspace = 0;
1721         FILE_BEGIN_ALLOW_THREADS(f)
1722         errno = 0;
1723         n2 = fwrite(s, 1, n, f->f_fp);
1724         FILE_END_ALLOW_THREADS(f)
1725         if (f->f_binary)
1726                 PyBuffer_Release(&pbuf);
1727         if (n2 != n) {
1728                 PyErr_SetFromErrno(PyExc_IOError);
1729                 clearerr(f->f_fp);
1730                 return NULL;
1731         }
1732         Py_INCREF(Py_None);
1733         return Py_None;
1734 }
1735
1736 static PyObject *
1737 file_writelines(PyFileObject *f, PyObject *seq)
1738 {
1739 #define CHUNKSIZE 1000
1740         PyObject *list, *line;
1741         PyObject *it;   /* iter(seq) */
1742         PyObject *result;
1743         int index, islist;
1744         Py_ssize_t i, j, nwritten, len;
1745
1746         assert(seq != NULL);
1747         if (f->f_fp == NULL)
1748                 return err_closed();
1749
1750         result = NULL;
1751         list = NULL;
1752         islist = PyList_Check(seq);
1753         if  (islist)
1754                 it = NULL;
1755         else {
1756                 it = PyObject_GetIter(seq);
1757                 if (it == NULL) {
1758                         PyErr_SetString(PyExc_TypeError,
1759                                 "writelines() requires an iterable argument");
1760                         return NULL;
1761                 }
1762                 /* From here on, fail by going to error, to reclaim "it". */
1763                 list = PyList_New(CHUNKSIZE);
1764                 if (list == NULL)
1765                         goto error;
1766         }
1767
1768         /* Strategy: slurp CHUNKSIZE lines into a private list,
1769            checking that they are all strings, then write that list
1770            without holding the interpreter lock, then come back for more. */
1771         for (index = 0; ; index += CHUNKSIZE) {
1772                 if (islist) {
1773                         Py_XDECREF(list);
1774                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1775                         if (list == NULL)
1776                                 goto error;
1777                         j = PyList_GET_SIZE(list);
1778                 }
1779                 else {
1780                         for (j = 0; j < CHUNKSIZE; j++) {
1781                                 line = PyIter_Next(it);
1782                                 if (line == NULL) {
1783                                         if (PyErr_Occurred())
1784                                                 goto error;
1785                                         break;
1786                                 }
1787                                 PyList_SetItem(list, j, line);
1788                         }
1789                 }
1790                 if (j == 0)
1791                         break;
1792
1793                 /* Check that all entries are indeed strings. If not,
1794                    apply the same rules as for file.write() and
1795                    convert the results to strings. This is slow, but
1796                    seems to be the only way since all conversion APIs
1797                    could potentially execute Python code. */
1798                 for (i = 0; i < j; i++) {
1799                         PyObject *v = PyList_GET_ITEM(list, i);
1800                         if (!PyString_Check(v)) {
1801                                 const char *buffer;
1802                                 if (((f->f_binary &&
1803                                       PyObject_AsReadBuffer(v,
1804                                               (const void**)&buffer,
1805                                                             &len)) ||
1806                                      PyObject_AsCharBuffer(v,
1807                                                            &buffer,
1808                                                            &len))) {
1809                                         PyErr_SetString(PyExc_TypeError,
1810                         "writelines() argument must be a sequence of strings");
1811                                         goto error;
1812                                 }
1813                                 line = PyString_FromStringAndSize(buffer,
1814                                                                   len);
1815                                 if (line == NULL)
1816                                         goto error;
1817                                 Py_DECREF(v);
1818                                 PyList_SET_ITEM(list, i, line);
1819                         }
1820                 }
1821
1822                 /* Since we are releasing the global lock, the
1823                    following code may *not* execute Python code. */
1824                 f->f_softspace = 0;
1825                 FILE_BEGIN_ALLOW_THREADS(f)
1826                 errno = 0;
1827                 for (i = 0; i < j; i++) {
1828                         line = PyList_GET_ITEM(list, i);
1829                         len = PyString_GET_SIZE(line);
1830                         nwritten = fwrite(PyString_AS_STRING(line),
1831                                           1, len, f->f_fp);
1832                         if (nwritten != len) {
1833                                 FILE_ABORT_ALLOW_THREADS(f)
1834                                 PyErr_SetFromErrno(PyExc_IOError);
1835                                 clearerr(f->f_fp);
1836                                 goto error;
1837                         }
1838                 }
1839                 FILE_END_ALLOW_THREADS(f)
1840
1841                 if (j < CHUNKSIZE)
1842                         break;
1843         }
1844
1845         Py_INCREF(Py_None);
1846         result = Py_None;
1847   error:
1848         Py_XDECREF(list);
1849         Py_XDECREF(it);
1850         return result;
1851 #undef CHUNKSIZE
1852 }
1853
1854 static PyObject *
1855 file_self(PyFileObject *f)
1856 {
1857         if (f->f_fp == NULL)
1858                 return err_closed();
1859         Py_INCREF(f);
1860         return (PyObject *)f;
1861 }
1862
1863 static PyObject *
1864 file_xreadlines(PyFileObject *f)
1865 {
1866         if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1867                            "try 'for line in f' instead", 1) < 0)
1868                return NULL;
1869         return file_self(f);
1870 }
1871
1872 static PyObject *
1873 file_exit(PyObject *f, PyObject *args)
1874 {
1875         PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1876         if (!ret)
1877                 /* If error occurred, pass through */
1878                 return NULL;
1879         Py_DECREF(ret);
1880         /* We cannot return the result of close since a true
1881          * value will be interpreted as "yes, swallow the
1882          * exception if one was raised inside the with block". */
1883         Py_RETURN_NONE;
1884 }
1885
1886 PyDoc_STRVAR(readline_doc,
1887 "readline([size]) -> next line from the file, as a string.\n"
1888 "\n"
1889 "Retain newline.  A non-negative size argument limits the maximum\n"
1890 "number of bytes to return (an incomplete line may be returned then).\n"
1891 "Return an empty string at EOF.");
1892
1893 PyDoc_STRVAR(read_doc,
1894 "read([size]) -> read at most size bytes, returned as a string.\n"
1895 "\n"
1896 "If the size argument is negative or omitted, read until EOF is reached.\n"
1897 "Notice that when in non-blocking mode, less data than what was requested\n"
1898 "may be returned, even if no size parameter was given.");
1899
1900 PyDoc_STRVAR(write_doc,
1901 "write(str) -> None.  Write string str to file.\n"
1902 "\n"
1903 "Note that due to buffering, flush() or close() may be needed before\n"
1904 "the file on disk reflects the data written.");
1905
1906 PyDoc_STRVAR(fileno_doc,
1907 "fileno() -> integer \"file descriptor\".\n"
1908 "\n"
1909 "This is needed for lower-level file interfaces, such os.read().");
1910
1911 PyDoc_STRVAR(seek_doc,
1912 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1913 "\n"
1914 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1915 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1916 "(move relative to current position, positive or negative), and 2 (move\n"
1917 "relative to end of file, usually negative, although many platforms allow\n"
1918 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1919 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1920 "undefined behavior."
1921 "\n"
1922 "Note that not all file objects are seekable.");
1923
1924 #ifdef HAVE_FTRUNCATE
1925 PyDoc_STRVAR(truncate_doc,
1926 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1927 "\n"
1928 "Size defaults to the current file position, as returned by tell().");
1929 #endif
1930
1931 PyDoc_STRVAR(tell_doc,
1932 "tell() -> current file position, an integer (may be a long integer).");
1933
1934 PyDoc_STRVAR(readinto_doc,
1935 "readinto() -> Undocumented.  Don't use this; it may go away.");
1936
1937 PyDoc_STRVAR(readlines_doc,
1938 "readlines([size]) -> list of strings, each a line from the file.\n"
1939 "\n"
1940 "Call readline() repeatedly and return a list of the lines so read.\n"
1941 "The optional size argument, if given, is an approximate bound on the\n"
1942 "total number of bytes in the lines returned.");
1943
1944 PyDoc_STRVAR(xreadlines_doc,
1945 "xreadlines() -> returns self.\n"
1946 "\n"
1947 "For backward compatibility. File objects now include the performance\n"
1948 "optimizations previously implemented in the xreadlines module.");
1949
1950 PyDoc_STRVAR(writelines_doc,
1951 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
1952 "\n"
1953 "Note that newlines are not added.  The sequence can be any iterable object\n"
1954 "producing strings. This is equivalent to calling write() for each string.");
1955
1956 PyDoc_STRVAR(flush_doc,
1957 "flush() -> None.  Flush the internal I/O buffer.");
1958
1959 PyDoc_STRVAR(close_doc,
1960 "close() -> None or (perhaps) an integer.  Close the file.\n"
1961 "\n"
1962 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
1963 "further I/O operations.  close() may be called more than once without\n"
1964 "error.  Some kinds of file objects (for example, opened by popen())\n"
1965 "may return an exit status upon closing.");
1966
1967 PyDoc_STRVAR(isatty_doc,
1968 "isatty() -> true or false.  True if the file is connected to a tty device.");
1969
1970 PyDoc_STRVAR(enter_doc,
1971              "__enter__() -> self.");
1972
1973 PyDoc_STRVAR(exit_doc,
1974              "__exit__(*excinfo) -> None.  Closes the file.");
1975
1976 static PyMethodDef file_methods[] = {
1977         {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1978         {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
1979         {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
1980         {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
1981         {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
1982 #ifdef HAVE_FTRUNCATE
1983         {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1984 #endif
1985         {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
1986         {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1987         {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1988         {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1989         {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
1990         {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
1991         {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
1992         {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
1993         {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
1994         {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
1995         {NULL,        NULL}             /* sentinel */
1996 };
1997
1998 #define OFF(x) offsetof(PyFileObject, x)
1999
2000 static PyMemberDef file_memberlist[] = {
2001         {"mode",        T_OBJECT,       OFF(f_mode),    RO,
2002          "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2003         {"name",        T_OBJECT,       OFF(f_name),    RO,
2004          "file name"},
2005         {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
2006          "file encoding"},
2007         {"errors",      T_OBJECT,       OFF(f_errors),  RO,
2008          "Unicode error handler"},
2009         /* getattr(f, "closed") is implemented without this table */
2010         {NULL}  /* Sentinel */
2011 };
2012
2013 static PyObject *
2014 get_closed(PyFileObject *f, void *closure)
2015 {
2016         return PyBool_FromLong((long)(f->f_fp == 0));
2017 }
2018 static PyObject *
2019 get_newlines(PyFileObject *f, void *closure)
2020 {
2021         switch (f->f_newlinetypes) {
2022         case NEWLINE_UNKNOWN:
2023                 Py_INCREF(Py_None);
2024                 return Py_None;
2025         case NEWLINE_CR:
2026                 return PyString_FromString("\r");
2027         case NEWLINE_LF:
2028                 return PyString_FromString("\n");
2029         case NEWLINE_CR|NEWLINE_LF:
2030                 return Py_BuildValue("(ss)", "\r", "\n");
2031         case NEWLINE_CRLF:
2032                 return PyString_FromString("\r\n");
2033         case NEWLINE_CR|NEWLINE_CRLF:
2034                 return Py_BuildValue("(ss)", "\r", "\r\n");
2035         case NEWLINE_LF|NEWLINE_CRLF:
2036                 return Py_BuildValue("(ss)", "\n", "\r\n");
2037         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2038                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2039         default:
2040                 PyErr_Format(PyExc_SystemError,
2041                              "Unknown newlines value 0x%x\n",
2042                              f->f_newlinetypes);
2043                 return NULL;
2044         }
2045 }
2046
2047 static PyObject *
2048 get_softspace(PyFileObject *f, void *closure)
2049 {
2050         if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2051                 return NULL;
2052         return PyInt_FromLong(f->f_softspace);
2053 }
2054
2055 static int
2056 set_softspace(PyFileObject *f, PyObject *value)
2057 {
2058         int new;
2059         if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2060                 return -1;
2061
2062         if (value == NULL) {
2063                 PyErr_SetString(PyExc_TypeError,
2064                                 "can't delete softspace attribute");
2065                 return -1;
2066         }
2067
2068         new = PyInt_AsLong(value);
2069         if (new == -1 && PyErr_Occurred())
2070                 return -1;
2071         f->f_softspace = new;
2072         return 0;
2073 }
2074
2075 static PyGetSetDef file_getsetlist[] = {
2076         {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2077         {"newlines", (getter)get_newlines, NULL,
2078          "end-of-line convention used in this file"},
2079         {"softspace", (getter)get_softspace, (setter)set_softspace,
2080          "flag indicating that a space needs to be printed; used by print"},
2081         {0},
2082 };
2083
2084 static void
2085 drop_readahead(PyFileObject *f)
2086 {
2087         if (f->f_buf != NULL) {
2088                 PyMem_Free(f->f_buf);
2089                 f->f_buf = NULL;
2090         }
2091 }
2092
2093 /* Make sure that file has a readahead buffer with at least one byte
2094    (unless at EOF) and no more than bufsize.  Returns negative value on
2095    error, will set MemoryError if bufsize bytes cannot be allocated. */
2096 static int
2097 readahead(PyFileObject *f, int bufsize)
2098 {
2099         Py_ssize_t chunksize;
2100
2101         if (f->f_buf != NULL) {
2102                 if( (f->f_bufend - f->f_bufptr) >= 1)
2103                         return 0;
2104                 else
2105                         drop_readahead(f);
2106         }
2107         if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2108                 PyErr_NoMemory();
2109                 return -1;
2110         }
2111         FILE_BEGIN_ALLOW_THREADS(f)
2112         errno = 0;
2113         chunksize = Py_UniversalNewlineFread(
2114                 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2115         FILE_END_ALLOW_THREADS(f)
2116         if (chunksize == 0) {
2117                 if (ferror(f->f_fp)) {
2118                         PyErr_SetFromErrno(PyExc_IOError);
2119                         clearerr(f->f_fp);
2120                         drop_readahead(f);
2121                         return -1;
2122                 }
2123         }
2124         f->f_bufptr = f->f_buf;
2125         f->f_bufend = f->f_buf + chunksize;
2126         return 0;
2127 }
2128
2129 /* Used by file_iternext.  The returned string will start with 'skip'
2130    uninitialized bytes followed by the remainder of the line. Don't be
2131    horrified by the recursive call: maximum recursion depth is limited by
2132    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2133
2134 static PyStringObject *
2135 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2136 {
2137         PyStringObject* s;
2138         char *bufptr;
2139         char *buf;
2140         Py_ssize_t len;
2141
2142         if (f->f_buf == NULL)
2143                 if (readahead(f, bufsize) < 0)
2144                         return NULL;
2145
2146         len = f->f_bufend - f->f_bufptr;
2147         if (len == 0)
2148                 return (PyStringObject *)
2149                         PyString_FromStringAndSize(NULL, skip);
2150         bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2151         if (bufptr != NULL) {
2152                 bufptr++;                       /* Count the '\n' */
2153                 len = bufptr - f->f_bufptr;
2154                 s = (PyStringObject *)
2155                         PyString_FromStringAndSize(NULL, skip+len);
2156                 if (s == NULL)
2157                         return NULL;
2158                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2159                 f->f_bufptr = bufptr;
2160                 if (bufptr == f->f_bufend)
2161                         drop_readahead(f);
2162         } else {
2163                 bufptr = f->f_bufptr;
2164                 buf = f->f_buf;
2165                 f->f_buf = NULL;        /* Force new readahead buffer */
2166                 assert(skip+len < INT_MAX);
2167                 s = readahead_get_line_skip(
2168                         f, (int)(skip+len), bufsize + (bufsize>>2) );
2169                 if (s == NULL) {
2170                         PyMem_Free(buf);
2171                         return NULL;
2172                 }
2173                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2174                 PyMem_Free(buf);
2175         }
2176         return s;
2177 }
2178
2179 /* A larger buffer size may actually decrease performance. */
2180 #define READAHEAD_BUFSIZE 8192
2181
2182 static PyObject *
2183 file_iternext(PyFileObject *f)
2184 {
2185         PyStringObject* l;
2186
2187         if (f->f_fp == NULL)
2188                 return err_closed();
2189
2190         l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2191         if (l == NULL || PyString_GET_SIZE(l) == 0) {
2192                 Py_XDECREF(l);
2193                 return NULL;
2194         }
2195         return (PyObject *)l;
2196 }
2197
2198
2199 static PyObject *
2200 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2201 {
2202         PyObject *self;
2203         static PyObject *not_yet_string;
2204
2205         assert(type != NULL && type->tp_alloc != NULL);
2206
2207         if (not_yet_string == NULL) {
2208                 not_yet_string = PyString_InternFromString("<uninitialized file>");
2209                 if (not_yet_string == NULL)
2210                         return NULL;
2211         }
2212
2213         self = type->tp_alloc(type, 0);
2214         if (self != NULL) {
2215                 /* Always fill in the name and mode, so that nobody else
2216                    needs to special-case NULLs there. */
2217                 Py_INCREF(not_yet_string);
2218                 ((PyFileObject *)self)->f_name = not_yet_string;
2219                 Py_INCREF(not_yet_string);
2220                 ((PyFileObject *)self)->f_mode = not_yet_string;
2221                 Py_INCREF(Py_None);
2222                 ((PyFileObject *)self)->f_encoding = Py_None;
2223                 Py_INCREF(Py_None);
2224                 ((PyFileObject *)self)->f_errors = Py_None;
2225                 ((PyFileObject *)self)->weakreflist = NULL;
2226                 ((PyFileObject *)self)->unlocked_count = 0;
2227         }
2228         return self;
2229 }
2230
2231 static int
2232 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2233 {
2234         PyFileObject *foself = (PyFileObject *)self;
2235         int ret = 0;
2236         static char *kwlist[] = {"name", "mode", "buffering", 0};
2237         char *name = NULL;
2238         char *mode = "r";
2239         int bufsize = -1;
2240         int wideargument = 0;
2241 #ifdef MS_WINDOWS
2242         PyObject *po;
2243 #endif
2244
2245         assert(PyFile_Check(self));
2246         if (foself->f_fp != NULL) {
2247                 /* Have to close the existing file first. */
2248                 PyObject *closeresult = file_close(foself);
2249                 if (closeresult == NULL)
2250                         return -1;
2251                 Py_DECREF(closeresult);
2252         }
2253
2254 #ifdef MS_WINDOWS
2255         if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2256                                         kwlist, &po, &mode, &bufsize)) {
2257                 wideargument = 1;
2258                 if (fill_file_fields(foself, NULL, po, mode,
2259                                      fclose) == NULL)
2260                         goto Error;
2261         } else {
2262                 /* Drop the argument parsing error as narrow
2263                    strings are also valid. */
2264                 PyErr_Clear();
2265         }
2266 #endif
2267
2268         if (!wideargument) {
2269                 PyObject *o_name;
2270
2271                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2272                                                  Py_FileSystemDefaultEncoding,
2273                                                  &name,
2274                                                  &mode, &bufsize))
2275                         return -1;
2276
2277                 /* We parse again to get the name as a PyObject */
2278                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2279                                                  kwlist, &o_name, &mode,
2280                                                  &bufsize))
2281                         goto Error;
2282
2283                 if (fill_file_fields(foself, NULL, o_name, mode,
2284                                      fclose) == NULL)
2285                         goto Error;
2286         }
2287         if (open_the_file(foself, name, mode) == NULL)
2288                 goto Error;
2289         foself->f_setbuf = NULL;
2290         PyFile_SetBufSize(self, bufsize);
2291         goto Done;
2292
2293 Error:
2294         ret = -1;
2295         /* fall through */
2296 Done:
2297         PyMem_Free(name); /* free the encoded string */
2298         return ret;
2299 }
2300
2301 PyDoc_VAR(file_doc) =
2302 PyDoc_STR(
2303 "file(name[, mode[, buffering]]) -> file object\n"
2304 "\n"
2305 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2306 "writing or appending.  The file will be created if it doesn't exist\n"
2307 "when opened for writing or appending; it will be truncated when\n"
2308 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2309 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2310 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2311 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2312 "to open a file is with the builtin open() function.\n"
2313 )
2314 PyDoc_STR(
2315 "Add a 'U' to mode to open the file for input with universal newline\n"
2316 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2317 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2318 "the value for this attribute is one of None (no newline read yet),\n"
2319 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2320 "\n"
2321 "'U' cannot be combined with 'w' or '+' mode.\n"
2322 );
2323
2324 PyTypeObject PyFile_Type = {
2325         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2326         "file",
2327         sizeof(PyFileObject),
2328         0,
2329         (destructor)file_dealloc,               /* tp_dealloc */
2330         0,                                      /* tp_print */
2331         0,                                      /* tp_getattr */
2332         0,                                      /* tp_setattr */
2333         0,                                      /* tp_compare */
2334         (reprfunc)file_repr,                    /* tp_repr */
2335         0,                                      /* tp_as_number */
2336         0,                                      /* tp_as_sequence */
2337         0,                                      /* tp_as_mapping */
2338         0,                                      /* tp_hash */
2339         0,                                      /* tp_call */
2340         0,                                      /* tp_str */
2341         PyObject_GenericGetAttr,                /* tp_getattro */
2342         /* softspace is writable:  we must supply tp_setattro */
2343         PyObject_GenericSetAttr,                /* tp_setattro */
2344         0,                                      /* tp_as_buffer */
2345         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2346         file_doc,                               /* tp_doc */
2347         0,                                      /* tp_traverse */
2348         0,                                      /* tp_clear */
2349         0,                                      /* tp_richcompare */
2350         offsetof(PyFileObject, weakreflist),    /* tp_weaklistoffset */
2351         (getiterfunc)file_self,                 /* tp_iter */
2352         (iternextfunc)file_iternext,            /* tp_iternext */
2353         file_methods,                           /* tp_methods */
2354         file_memberlist,                        /* tp_members */
2355         file_getsetlist,                        /* tp_getset */
2356         0,                                      /* tp_base */
2357         0,                                      /* tp_dict */
2358         0,                                      /* tp_descr_get */
2359         0,                                      /* tp_descr_set */
2360         0,                                      /* tp_dictoffset */
2361         file_init,                              /* tp_init */
2362         PyType_GenericAlloc,                    /* tp_alloc */
2363         file_new,                               /* tp_new */
2364         PyObject_Del,                           /* tp_free */
2365 };
2366
2367 /* Interface for the 'soft space' between print items. */
2368
2369 int
2370 PyFile_SoftSpace(PyObject *f, int newflag)
2371 {
2372         long oldflag = 0;
2373         if (f == NULL) {
2374                 /* Do nothing */
2375         }
2376         else if (PyFile_Check(f)) {
2377                 oldflag = ((PyFileObject *)f)->f_softspace;
2378                 ((PyFileObject *)f)->f_softspace = newflag;
2379         }
2380         else {
2381                 PyObject *v;
2382                 v = PyObject_GetAttrString(f, "softspace");
2383                 if (v == NULL)
2384                         PyErr_Clear();
2385                 else {
2386                         if (PyInt_Check(v))
2387                                 oldflag = PyInt_AsLong(v);
2388                         assert(oldflag < INT_MAX);
2389                         Py_DECREF(v);
2390                 }
2391                 v = PyInt_FromLong((long)newflag);
2392                 if (v == NULL)
2393                         PyErr_Clear();
2394                 else {
2395                         if (PyObject_SetAttrString(f, "softspace", v) != 0)
2396                                 PyErr_Clear();
2397                         Py_DECREF(v);
2398                 }
2399         }
2400         return (int)oldflag;
2401 }
2402
2403 /* Interfaces to write objects/strings to file-like objects */
2404
2405 int
2406 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2407 {
2408         PyObject *writer, *value, *args, *result;
2409         if (f == NULL) {
2410                 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2411                 return -1;
2412         }
2413         else if (PyFile_Check(f)) {
2414                 PyFileObject *fobj = (PyFileObject *) f;
2415 #ifdef Py_USING_UNICODE
2416                 PyObject *enc = fobj->f_encoding;
2417                 int result;
2418 #endif
2419                 if (fobj->f_fp == NULL) {
2420                         err_closed();
2421                         return -1;
2422                 }
2423 #ifdef Py_USING_UNICODE
2424                 if ((flags & Py_PRINT_RAW) &&
2425                     PyUnicode_Check(v) && enc != Py_None) {
2426                         char *cenc = PyString_AS_STRING(enc);
2427                         char *errors = fobj->f_errors == Py_None ?
2428                           "strict" : PyString_AS_STRING(fobj->f_errors);
2429                         value = PyUnicode_AsEncodedString(v, cenc, errors);
2430                         if (value == NULL)
2431                                 return -1;
2432                 } else {
2433                         value = v;
2434                         Py_INCREF(value);
2435                 }
2436                 result = file_PyObject_Print(value, fobj, flags);
2437                 Py_DECREF(value);
2438                 return result;
2439 #else
2440                 return file_PyObject_Print(v, fobj, flags);
2441 #endif
2442         }
2443         writer = PyObject_GetAttrString(f, "write");
2444         if (writer == NULL)
2445                 return -1;
2446         if (flags & Py_PRINT_RAW) {
2447                 if (PyUnicode_Check(v)) {
2448                         value = v;
2449                         Py_INCREF(value);
2450                 } else
2451                         value = PyObject_Str(v);
2452         }
2453         else
2454                 value = PyObject_Repr(v);
2455         if (value == NULL) {
2456                 Py_DECREF(writer);
2457                 return -1;
2458         }
2459         args = PyTuple_Pack(1, value);
2460         if (args == NULL) {
2461                 Py_DECREF(value);
2462                 Py_DECREF(writer);
2463                 return -1;
2464         }
2465         result = PyEval_CallObject(writer, args);
2466         Py_DECREF(args);
2467         Py_DECREF(value);
2468         Py_DECREF(writer);
2469         if (result == NULL)
2470                 return -1;
2471         Py_DECREF(result);
2472         return 0;
2473 }
2474
2475 int
2476 PyFile_WriteString(const char *s, PyObject *f)
2477 {
2478
2479         if (f == NULL) {
2480                 /* Should be caused by a pre-existing error */
2481                 if (!PyErr_Occurred())
2482                         PyErr_SetString(PyExc_SystemError,
2483                                         "null file for PyFile_WriteString");
2484                 return -1;
2485         }
2486         else if (PyFile_Check(f)) {
2487                 PyFileObject *fobj = (PyFileObject *) f;
2488                 FILE *fp = PyFile_AsFile(f);
2489                 if (fp == NULL) {
2490                         err_closed();
2491                         return -1;
2492                 }
2493                 FILE_BEGIN_ALLOW_THREADS(fobj)
2494                 fputs(s, fp);
2495                 FILE_END_ALLOW_THREADS(fobj)
2496                 return 0;
2497         }
2498         else if (!PyErr_Occurred()) {
2499                 PyObject *v = PyString_FromString(s);
2500                 int err;
2501                 if (v == NULL)
2502                         return -1;
2503                 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2504                 Py_DECREF(v);
2505                 return err;
2506         }
2507         else
2508                 return -1;
2509 }
2510
2511 /* Try to get a file-descriptor from a Python object.  If the object
2512    is an integer or long integer, its value is returned.  If not, the
2513    object's fileno() method is called if it exists; the method must return
2514    an integer or long integer, which is returned as the file descriptor value.
2515    -1 is returned on failure.
2516 */
2517
2518 int PyObject_AsFileDescriptor(PyObject *o)
2519 {
2520         int fd;
2521         PyObject *meth;
2522
2523         if (PyInt_Check(o)) {
2524                 fd = PyInt_AsLong(o);
2525         }
2526         else if (PyLong_Check(o)) {
2527                 fd = PyLong_AsLong(o);
2528         }
2529         else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2530         {
2531                 PyObject *fno = PyEval_CallObject(meth, NULL);
2532                 Py_DECREF(meth);
2533                 if (fno == NULL)
2534                         return -1;
2535
2536                 if (PyInt_Check(fno)) {
2537                         fd = PyInt_AsLong(fno);
2538                         Py_DECREF(fno);
2539                 }
2540                 else if (PyLong_Check(fno)) {
2541                         fd = PyLong_AsLong(fno);
2542                         Py_DECREF(fno);
2543                 }
2544                 else {
2545                         PyErr_SetString(PyExc_TypeError,
2546                                         "fileno() returned a non-integer");
2547                         Py_DECREF(fno);
2548                         return -1;
2549                 }
2550         }
2551         else {
2552                 PyErr_SetString(PyExc_TypeError,
2553                                 "argument must be an int, or have a fileno() method.");
2554                 return -1;
2555         }
2556
2557         if (fd < 0) {
2558                 PyErr_Format(PyExc_ValueError,
2559                              "file descriptor cannot be a negative integer (%i)",
2560                              fd);
2561                 return -1;
2562         }
2563         return fd;
2564 }
2565
2566 /* From here on we need access to the real fgets and fread */
2567 #undef fgets
2568 #undef fread
2569
2570 /*
2571 ** Py_UniversalNewlineFgets is an fgets variation that understands
2572 ** all of \r, \n and \r\n conventions.
2573 ** The stream should be opened in binary mode.
2574 ** If fobj is NULL the routine always does newline conversion, and
2575 ** it may peek one char ahead to gobble the second char in \r\n.
2576 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2577 ** is no readahead but in stead a flag is used to skip a following
2578 ** \n on the next read. Also, if the file is open in binary mode
2579 ** the whole conversion is skipped. Finally, the routine keeps track of
2580 ** the different types of newlines seen.
2581 ** Note that we need no error handling: fgets() treats error and eof
2582 ** identically.
2583 */
2584 char *
2585 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2586 {
2587         char *p = buf;
2588         int c;
2589         int newlinetypes = 0;
2590         int skipnextlf = 0;
2591         int univ_newline = 1;
2592
2593         if (fobj) {
2594                 if (!PyFile_Check(fobj)) {
2595                         errno = ENXIO;  /* What can you do... */
2596                         return NULL;
2597                 }
2598                 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2599                 if ( !univ_newline )
2600                         return fgets(buf, n, stream);
2601                 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2602                 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2603         }
2604         FLOCKFILE(stream);
2605         c = 'x'; /* Shut up gcc warning */
2606         while (--n > 0 && (c = GETC(stream)) != EOF ) {
2607                 if (skipnextlf ) {
2608                         skipnextlf = 0;
2609                         if (c == '\n') {
2610                                 /* Seeing a \n here with skipnextlf true
2611                                 ** means we saw a \r before.
2612                                 */
2613                                 newlinetypes |= NEWLINE_CRLF;
2614                                 c = GETC(stream);
2615                                 if (c == EOF) break;
2616                         } else {
2617                                 /*
2618                                 ** Note that c == EOF also brings us here,
2619                                 ** so we're okay if the last char in the file
2620                                 ** is a CR.
2621                                 */
2622                                 newlinetypes |= NEWLINE_CR;
2623                         }
2624                 }
2625                 if (c == '\r') {
2626                         /* A \r is translated into a \n, and we skip
2627                         ** an adjacent \n, if any. We don't set the
2628                         ** newlinetypes flag until we've seen the next char.
2629                         */
2630                         skipnextlf = 1;
2631                         c = '\n';
2632                 } else if ( c == '\n') {
2633                         newlinetypes |= NEWLINE_LF;
2634                 }
2635                 *p++ = c;
2636                 if (c == '\n') break;
2637         }
2638         if ( c == EOF && skipnextlf )
2639                 newlinetypes |= NEWLINE_CR;
2640         FUNLOCKFILE(stream);
2641         *p = '\0';
2642         if (fobj) {
2643                 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2644                 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2645         } else if ( skipnextlf ) {
2646                 /* If we have no file object we cannot save the
2647                 ** skipnextlf flag. We have to readahead, which
2648                 ** will cause a pause if we're reading from an
2649                 ** interactive stream, but that is very unlikely
2650                 ** unless we're doing something silly like
2651                 ** execfile("/dev/tty").
2652                 */
2653                 c = GETC(stream);
2654                 if ( c != '\n' )
2655                         ungetc(c, stream);
2656         }
2657         if (p == buf)
2658                 return NULL;
2659         return buf;
2660 }
2661
2662 /*
2663 ** Py_UniversalNewlineFread is an fread variation that understands
2664 ** all of \r, \n and \r\n conventions.
2665 ** The stream should be opened in binary mode.
2666 ** fobj must be a PyFileObject. In this case there
2667 ** is no readahead but in stead a flag is used to skip a following
2668 ** \n on the next read. Also, if the file is open in binary mode
2669 ** the whole conversion is skipped. Finally, the routine keeps track of
2670 ** the different types of newlines seen.
2671 */
2672 size_t
2673 Py_UniversalNewlineFread(char *buf, size_t n,
2674                          FILE *stream, PyObject *fobj)
2675 {
2676         char *dst = buf;
2677         PyFileObject *f = (PyFileObject *)fobj;
2678         int newlinetypes, skipnextlf;
2679
2680         assert(buf != NULL);
2681         assert(stream != NULL);
2682
2683         if (!fobj || !PyFile_Check(fobj)) {
2684                 errno = ENXIO;  /* What can you do... */
2685                 return 0;
2686         }
2687         if (!f->f_univ_newline)
2688                 return fread(buf, 1, n, stream);
2689         newlinetypes = f->f_newlinetypes;
2690         skipnextlf = f->f_skipnextlf;
2691         /* Invariant:  n is the number of bytes remaining to be filled
2692          * in the buffer.
2693          */
2694         while (n) {
2695                 size_t nread;
2696                 int shortread;
2697                 char *src = dst;
2698
2699                 nread = fread(dst, 1, n, stream);
2700                 assert(nread <= n);
2701                 if (nread == 0)
2702                         break;
2703
2704                 n -= nread; /* assuming 1 byte out for each in; will adjust */
2705                 shortread = n != 0;     /* true iff EOF or error */
2706                 while (nread--) {
2707                         char c = *src++;
2708                         if (c == '\r') {
2709                                 /* Save as LF and set flag to skip next LF. */
2710                                 *dst++ = '\n';
2711                                 skipnextlf = 1;
2712                         }
2713                         else if (skipnextlf && c == '\n') {
2714                                 /* Skip LF, and remember we saw CR LF. */
2715                                 skipnextlf = 0;
2716                                 newlinetypes |= NEWLINE_CRLF;
2717                                 ++n;
2718                         }
2719                         else {
2720                                 /* Normal char to be stored in buffer.  Also
2721                                  * update the newlinetypes flag if either this
2722                                  * is an LF or the previous char was a CR.
2723                                  */
2724                                 if (c == '\n')
2725                                         newlinetypes |= NEWLINE_LF;
2726                                 else if (skipnextlf)
2727                                         newlinetypes |= NEWLINE_CR;
2728                                 *dst++ = c;
2729                                 skipnextlf = 0;
2730                         }
2731                 }
2732                 if (shortread) {
2733                         /* If this is EOF, update type flags. */
2734                         if (skipnextlf && feof(stream))
2735                                 newlinetypes |= NEWLINE_CR;
2736                         break;
2737                 }
2738         }
2739         f->f_newlinetypes = newlinetypes;
2740         f->f_skipnextlf = skipnextlf;
2741         return dst - buf;
2742 }
2743
2744 #ifdef __cplusplus
2745 }
2746 #endif