Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include "structmember.h"
   6
   7 #ifdef HAVE_SYS_TYPES_H
   8 #include <sys/types.h>
   9 #endif /* HAVE_SYS_TYPES_H */
  10
  11 #ifdef MS_WINDOWS
  12 #define fileno _fileno
  13 /* can simulate truncate with Win32 API functions; see file_truncate */
  14 #define HAVE_FTRUNCATE
  15 #define WIN32_LEAN_AND_MEAN
  16 #include <windows.h>
  17 #endif
  18
  19 #ifdef _MSC_VER
  20 /* Need GetVersion to see if on NT so safe to use _wfopen */
  21 #define WIN32_LEAN_AND_MEAN
  22 #include <windows.h>
  23 #endif /* _MSC_VER */
  24
  25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  26 #include <io.h>
  27 #endif
  28
  29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  30
  31 #ifndef DONT_HAVE_ERRNO_H
  32 #include <errno.h>
  33 #endif
  34
  35 #ifdef HAVE_GETC_UNLOCKED
  36 #define GETC(f) getc_unlocked(f)
  37 #define FLOCKFILE(f) flockfile(f)
  38 #define FUNLOCKFILE(f) funlockfile(f)
  39 #else
  40 #define GETC(f) getc(f)
  41 #define FLOCKFILE(f)
  42 #define FUNLOCKFILE(f)
  43 #endif
  44
  45 /* Bits in f_newlinetypes */
  46 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  47 #define NEWLINE_CR 1            /* \r newline seen */
  48 #define NEWLINE_LF 2            /* \n newline seen */
  49 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  50
  51 /*
  52  * These macros release the GIL while preventing the f_close() function being
  53  * called in the interval between them.  For that purpose, a running total of
  54  * the number of currently running unlocked code sections is kept in
  55  * the unlocked_count field of the PyFileObject. The close() method raises
  56  * an IOError if that field is non-zero.  See issue #815646, #595601.
  57  */
  58
  59 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
  60 { \
  61         fobj->unlocked_count++; \
  62         Py_BEGIN_ALLOW_THREADS
  63
  64 #define FILE_END_ALLOW_THREADS(fobj) \
  65         Py_END_ALLOW_THREADS \
  66         fobj->unlocked_count--; \
  67         assert(fobj->unlocked_count >= 0); \
  68 }
  69
  70 #define FILE_ABORT_ALLOW_THREADS(fobj) \
  71         Py_BLOCK_THREADS \
  72         fobj->unlocked_count--; \
  73         assert(fobj->unlocked_count >= 0);
  74
  75 #ifdef __cplusplus
  76 extern "C" {
  77 #endif
  78
  79 FILE *
  80 PyFile_AsFile(PyObject *f)
  81 {
  82         if (f == NULL || !PyFile_Check(f))
  83                 return NULL;
  84         else
  85                 return ((PyFileObject *)f)->f_fp;
  86 }
  87
  88 void PyFile_IncUseCount(PyFileObject *fobj)
  89 {
  90         fobj->unlocked_count++;
  91 }
  92
  93 void PyFile_DecUseCount(PyFileObject *fobj)
  94 {
  95         fobj->unlocked_count--;
  96         assert(fobj->unlocked_count >= 0);
  97 }
  98
  99 PyObject *
 100 PyFile_Name(PyObject *f)
 101 {
 102         if (f == NULL || !PyFile_Check(f))
 103                 return NULL;
 104         else
 105                 return ((PyFileObject *)f)->f_name;
 106 }
 107
 108 /* This is a safe wrapper around PyObject_Print to print to the FILE
 109    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
 110    about PyFileObject. */
 111 static int
 112 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
 113 {
 114         int result;
 115         PyFile_IncUseCount(f);
 116         result = PyObject_Print(op, f->f_fp, flags);
 117         PyFile_DecUseCount(f);
 118         return result;
 119 }
 120
 121 /* On Unix, fopen will succeed for directories.
 122    In Python, there should be no file objects referring to
 123    directories, so we need a check.  */
 124
 125 static PyFileObject*
 126 dircheck(PyFileObject* f)
 127 {
 128 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
 129         struct stat buf;
 130         if (f->f_fp == NULL)
 131                 return f;
 132         if (fstat(fileno(f->f_fp), &buf) == 0 &&
 133             S_ISDIR(buf.st_mode)) {
 134                 char *msg = strerror(EISDIR);
 135                 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
 136                                                       EISDIR, msg, f->f_name);
 137                 PyErr_SetObject(PyExc_IOError, exc);
 138                 Py_XDECREF(exc);
 139                 return NULL;
 140         }
 141 #endif
 142         return f;
 143 }
 144
 145
 146 static PyObject *
 147 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
 148                  int (*close)(FILE *))
 149 {
 150         assert(name != NULL);
 151         assert(f != NULL);
 152         assert(PyFile_Check(f));
 153         assert(f->f_fp == NULL);
 154
 155         Py_DECREF(f->f_name);
 156         Py_DECREF(f->f_mode);
 157         Py_DECREF(f->f_encoding);
 158         Py_DECREF(f->f_errors);
 159
 160         Py_INCREF(name);
 161         f->f_name = name;
 162
 163         f->f_mode = PyString_FromString(mode);
 164
 165         f->f_close = close;
 166         f->f_softspace = 0;
 167         f->f_binary = strchr(mode,'b') != NULL;
 168         f->f_buf = NULL;
 169         f->f_univ_newline = (strchr(mode, 'U') != NULL);
 170         f->f_newlinetypes = NEWLINE_UNKNOWN;
 171         f->f_skipnextlf = 0;
 172         Py_INCREF(Py_None);
 173         f->f_encoding = Py_None;
 174         Py_INCREF(Py_None);
 175         f->f_errors = Py_None;
 176
 177         if (f->f_mode == NULL)
 178                 return NULL;
 179         f->f_fp = fp;
 180         f = dircheck(f);
 181         return (PyObject *) f;
 182 }
 183
 184 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
 185 #define Py_VERIFY_WINNT
 186 /* The CRT on windows compiled with Visual Studio 2005 and higher may
 187  * assert if given invalid mode strings.  This is all fine and well
 188  * in static languages like C where the mode string is typcially hard
 189  * coded.  But in Python, were we pass in the mode string from the user,
 190  * we need to verify it first manually
 191  */
 192 static int _PyVerify_Mode_WINNT(const char *mode)
 193 {
 194         /* See if mode string is valid on Windows to avoid hard assertions */
 195         /* remove leading spacese */
 196         int singles = 0;
 197         int pairs = 0;
 198         int encoding = 0;
 199         const char *s, *c;
 200
 201         while(*mode == ' ') /* strip initial spaces */
 202                 ++mode;
 203         if (!strchr("rwa", *mode)) /* must start with one of these */
 204                 return 0;
 205         while (*++mode) {
 206                 if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
 207                         continue;
 208                 s = "+TD"; /* each of this can appear only once */
 209                 c = strchr(s, *mode);
 210                 if (c) {
 211                         ptrdiff_t idx = s-c;
 212                         if (singles & (1<<idx))
 213                                 return 0;
 214                         singles |= (1<<idx);
 215                         continue;
 216                 }
 217                 s = "btcnSR"; /* only one of each letter in the pairs allowed */
 218                 c = strchr(s, *mode);
 219                 if (c) {
 220                         ptrdiff_t idx = (s-c)/2;
 221                         if (pairs & (1<<idx))
 222                                 return 0;
 223                         pairs |= (1<<idx);
 224                         continue;
 225                 }
 226                 if (*mode == ',') {
 227                         encoding = 1;
 228                         break;
 229                 }
 230                 return 0; /* found an invalid char */
 231         }
 232
 233         if (encoding) {
 234                 char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
 235                 while (*mode == ' ')
 236                         ++mode;
 237                 /* find 'ccs =' */
 238                 if (strncmp(mode, "ccs", 3))
 239                         return 0;
 240                 mode += 3;
 241                 while (*mode == ' ')
 242                         ++mode;
 243                 if (*mode != '=')
 244                         return 0;
 245                 while (*mode == ' ')
 246                         ++mode;
 247                 for(encoding = 0; encoding<_countof(e); ++encoding) {
 248                         size_t l = strlen(e[encoding]);
 249                         if (!strncmp(mode, e[encoding], l)) {
 250                                 mode += l; /* found a valid encoding */
 251                                 break;
 252                         }
 253                 }
 254                 if (encoding == _countof(e))
 255                         return 0;
 256         }
 257         /* skip trailing spaces */
 258         while (*mode == ' ')
 259                 ++mode;
 260
 261         return *mode == '\0'; /* must be at the end of the string */
 262 }
 263 #endif
 264
 265 /* check for known incorrect mode strings - problem is, platforms are
 266    free to accept any mode characters they like and are supposed to
 267    ignore stuff they don't understand... write or append mode with
 268    universal newline support is expressly forbidden by PEP 278.
 269    Additionally, remove the 'U' from the mode string as platforms
 270    won't know what it is. Non-zero return signals an exception */
 271 int
 272 _PyFile_SanitizeMode(char *mode)
 273 {
 274         char *upos;
 275         size_t len = strlen(mode);
 276
 277         if (!len) {
 278                 PyErr_SetString(PyExc_ValueError, "empty mode string");
 279                 return -1;
 280         }
 281
 282         upos = strchr(mode, 'U');
 283         if (upos) {
 284                 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
 285
 286                 if (mode[0] == 'w' || mode[0] == 'a') {
 287                         PyErr_Format(PyExc_ValueError, "universal newline "
 288                                      "mode can only be used with modes "
 289                                      "starting with 'r'");
 290                         return -1;
 291                 }
 292
 293                 if (mode[0] != 'r') {
 294                         memmove(mode+1, mode, strlen(mode)+1);
 295                         mode[0] = 'r';
 296                 }
 297
 298                 if (!strchr(mode, 'b')) {
 299                         memmove(mode+2, mode+1, strlen(mode));
 300                         mode[1] = 'b';
 301                 }
 302         } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
 303                 PyErr_Format(PyExc_ValueError, "mode string must begin with "
 304                             "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
 305                 return -1;
 306         }
 307 #ifdef Py_VERIFY_WINNT
 308         /* additional checks on NT with visual studio 2005 and higher */
 309         if (!_PyVerify_Mode_WINNT(mode)) {
 310                 PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
 311                 return -1;
 312         }
 313 #endif
 314         return 0;
 315 }
 316
 317 static PyObject *
 318 open_the_file(PyFileObject *f, char *name, char *mode)
 319 {
 320         char *newmode;
 321         assert(f != NULL);
 322         assert(PyFile_Check(f));
 323 #ifdef MS_WINDOWS
 324         /* windows ignores the passed name in order to support Unicode */
 325         assert(f->f_name != NULL);
 326 #else
 327         assert(name != NULL);
 328 #endif
 329         assert(mode != NULL);
 330         assert(f->f_fp == NULL);
 331
 332         /* probably need to replace 'U' by 'rb' */
 333         newmode = PyMem_MALLOC(strlen(mode) + 3);
 334         if (!newmode) {
 335                 PyErr_NoMemory();
 336                 return NULL;
 337         }
 338         strcpy(newmode, mode);
 339
 340         if (_PyFile_SanitizeMode(newmode)) {
 341                 f = NULL;
 342                 goto cleanup;
 343         }
 344
 345         /* rexec.py can't stop a user from getting the file() constructor --
 346            all they have to do is get *any* file object f, and then do
 347            type(f).  Here we prevent them from doing damage with it. */
 348         if (PyEval_GetRestricted()) {
 349                 PyErr_SetString(PyExc_IOError,
 350                 "file() constructor not accessible in restricted mode");
 351                 f = NULL;
 352                 goto cleanup;
 353         }
 354         errno = 0;
 355
 356 #ifdef MS_WINDOWS
 357         if (PyUnicode_Check(f->f_name)) {
 358                 PyObject *wmode;
 359                 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
 360                 if (f->f_name && wmode) {
 361                         FILE_BEGIN_ALLOW_THREADS(f)
 362                         /* PyUnicode_AS_UNICODE OK without thread
 363                            lock as it is a simple dereference. */
 364                         f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 365                                           PyUnicode_AS_UNICODE(wmode));
 366                         FILE_END_ALLOW_THREADS(f)
 367                 }
 368                 Py_XDECREF(wmode);
 369         }
 370 #endif
 371         if (NULL == f->f_fp && NULL != name) {
 372                 FILE_BEGIN_ALLOW_THREADS(f)
 373                 f->f_fp = fopen(name, newmode);
 374                 FILE_END_ALLOW_THREADS(f)
 375         }
 376
 377         if (f->f_fp == NULL) {
 378 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
 379                 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 380                  * across all Windows flavors.  When it sets EINVAL varies
 381                  * across Windows flavors, the exact conditions aren't
 382                  * documented, and the answer lies in the OS's implementation
 383                  * of Win32's CreateFile function (whose source is secret).
 384                  * Seems the best we can do is map EINVAL to ENOENT.
 385                  * Starting with Visual Studio .NET 2005, EINVAL is correctly
 386                  * set by our CRT error handler (set in exceptions.c.)
 387                  */
 388                 if (errno == 0) /* bad mode string */
 389                         errno = EINVAL;
 390                 else if (errno == EINVAL) /* unknown, but not a mode string */
 391                         errno = ENOENT;
 392 #endif
 393                 /* EINVAL is returned when an invalid filename or
 394                  * an invalid mode is supplied. */
 395                 if (errno == EINVAL) {
 396                         PyObject *v;
 397                         char message[100];
 398                         PyOS_snprintf(message, 100,
 399                             "invalid mode ('%.50s') or filename", mode);
 400                         v = Py_BuildValue("(isO)", errno, message, f->f_name);
 401                         if (v != NULL) {
 402                                 PyErr_SetObject(PyExc_IOError, v);
 403                                 Py_DECREF(v);
 404                         }
 405                 }
 406                 else
 407                         PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 408                 f = NULL;
 409         }
 410         if (f != NULL)
 411                 f = dircheck(f);
 412
 413 cleanup:
 414         PyMem_FREE(newmode);
 415
 416         return (PyObject *)f;
 417 }
 418
 419 static PyObject *
 420 close_the_file(PyFileObject *f)
 421 {
 422         int sts = 0;
 423         int (*local_close)(FILE *);
 424         FILE *local_fp = f->f_fp;
 425         if (local_fp != NULL) {
 426                 local_close = f->f_close;
 427                 if (local_close != NULL && f->unlocked_count > 0) {
 428                         if (f->ob_refcnt > 0) {
 429                                 PyErr_SetString(PyExc_IOError,
 430                                         "close() called during concurrent "
 431                                         "operation on the same file object.");
 432                         } else {
 433                                 /* This should not happen unless someone is
 434                                  * carelessly playing with the PyFileObject
 435                                  * struct fields and/or its associated FILE
 436                                  * pointer. */
 437                                 PyErr_SetString(PyExc_SystemError,
 438                                         "PyFileObject locking error in "
 439                                         "destructor (refcnt <= 0 at close).");
 440                         }
 441                         return NULL;
 442                 }
 443                 /* NULL out the FILE pointer before releasing the GIL, because
 444                  * it will not be valid anymore after the close() function is
 445                  * called. */
 446                 f->f_fp = NULL;
 447                 if (local_close != NULL) {
 448                         Py_BEGIN_ALLOW_THREADS
 449                         errno = 0;
 450                         sts = (*local_close)(local_fp);
 451                         Py_END_ALLOW_THREADS
 452                         if (sts == EOF)
 453                                 return PyErr_SetFromErrno(PyExc_IOError);
 454                         if (sts != 0)
 455                                 return PyInt_FromLong((long)sts);
 456                 }
 457         }
 458         Py_RETURN_NONE;
 459 }
 460
 461 PyObject *
 462 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 463 {
 464         PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 465                                                              NULL, NULL);
 466         if (f != NULL) {
 467                 PyObject *o_name = PyString_FromString(name);
 468                 if (o_name == NULL)
 469                         return NULL;
 470                 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 471                         Py_DECREF(f);
 472                         f = NULL;
 473                 }
 474                 Py_DECREF(o_name);
 475         }
 476         return (PyObject *) f;
 477 }
 478
 479 PyObject *
 480 PyFile_FromString(char *name, char *mode)
 481 {
 482         extern int fclose(FILE *);
 483         PyFileObject *f;
 484
 485         f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 486         if (f != NULL) {
 487                 if (open_the_file(f, name, mode) == NULL) {
 488                         Py_DECREF(f);
 489                         f = NULL;
 490                 }
 491         }
 492         return (PyObject *)f;
 493 }
 494
 495 void
 496 PyFile_SetBufSize(PyObject *f, int bufsize)
 497 {
 498         PyFileObject *file = (PyFileObject *)f;
 499         if (bufsize >= 0) {
 500                 int type;
 501                 switch (bufsize) {
 502                 case 0:
 503                         type = _IONBF;
 504                         break;
 505 #ifdef HAVE_SETVBUF
 506                 case 1:
 507                         type = _IOLBF;
 508                         bufsize = BUFSIZ;
 509                         break;
 510 #endif
 511                 default:
 512                         type = _IOFBF;
 513 #ifndef HAVE_SETVBUF
 514                         bufsize = BUFSIZ;
 515 #endif
 516                         break;
 517                 }
 518                 fflush(file->f_fp);
 519                 if (type == _IONBF) {
 520                         PyMem_Free(file->f_setbuf);
 521                         file->f_setbuf = NULL;
 522                 } else {
 523                         file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
 524                                                                 bufsize);
 525                 }
 526 #ifdef HAVE_SETVBUF
 527                 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 528 #else /* !HAVE_SETVBUF */
 529                 setbuf(file->f_fp, file->f_setbuf);
 530 #endif /* !HAVE_SETVBUF */
 531         }
 532 }
 533
 534 /* Set the encoding used to output Unicode strings.
 535    Return 1 on success, 0 on failure. */
 536
 537 int
 538 PyFile_SetEncoding(PyObject *f, const char *enc)
 539 {
 540         return PyFile_SetEncodingAndErrors(f, enc, NULL);
 541 }
 542
 543 int
 544 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
 545 {
 546         PyFileObject *file = (PyFileObject*)f;
 547         PyObject *str, *oerrors;
 548
 549         assert(PyFile_Check(f));
 550         str = PyString_FromString(enc);
 551         if (!str)
 552                 return 0;
 553         if (errors) {
 554                 oerrors = PyString_FromString(errors);
 555                 if (!oerrors) {
 556                         Py_DECREF(str);
 557                         return 0;
 558                 }
 559         } else {
 560                 oerrors = Py_None;
 561                 Py_INCREF(Py_None);
 562         }
 563         Py_DECREF(file->f_encoding);
 564         file->f_encoding = str;
 565         Py_DECREF(file->f_errors);
 566         file->f_errors = oerrors;
 567         return 1;
 568 }
 569
 570 static PyObject *
 571 err_closed(void)
 572 {
 573         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 574         return NULL;
 575 }
 576
 577 /* Refuse regular file I/O if there's data in the iteration-buffer.
 578  * Mixing them would cause data to arrive out of order, as the read*
 579  * methods don't use the iteration buffer. */
 580 static PyObject *
 581 err_iterbuffered(void)
 582 {
 583         PyErr_SetString(PyExc_ValueError,
 584                 "Mixing iteration and read methods would lose data");
 585         return NULL;
 586 }
 587
 588 static void drop_readahead(PyFileObject *);
 589
 590 /* Methods */
 591
 592 static void
 593 file_dealloc(PyFileObject *f)
 594 {
 595         PyObject *ret;
 596         if (f->weakreflist != NULL)
 597                 PyObject_ClearWeakRefs((PyObject *) f);
 598         ret = close_the_file(f);
 599         if (!ret) {
 600                 PySys_WriteStderr("close failed in file object destructor:\n");
 601                 PyErr_Print();
 602         }
 603         else {
 604                 Py_DECREF(ret);
 605         }
 606         PyMem_Free(f->f_setbuf);
 607         Py_XDECREF(f->f_name);
 608         Py_XDECREF(f->f_mode);
 609         Py_XDECREF(f->f_encoding);
 610         Py_XDECREF(f->f_errors);
 611         drop_readahead(f);
 612         Py_TYPE(f)->tp_free((PyObject *)f);
 613 }
 614
 615 static PyObject *
 616 file_repr(PyFileObject *f)
 617 {
 618         if (PyUnicode_Check(f->f_name)) {
 619 #ifdef Py_USING_UNICODE
 620                 PyObject *ret = NULL;
 621                 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 622                 const char *name_str = name ? PyString_AsString(name) : "?";
 623                 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 624                                    f->f_fp == NULL ? "closed" : "open",
 625                                    name_str,
 626                                    PyString_AsString(f->f_mode),
 627                                    f);
 628                 Py_XDECREF(name);
 629                 return ret;
 630 #endif
 631         } else {
 632                 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 633                                    f->f_fp == NULL ? "closed" : "open",
 634                                    PyString_AsString(f->f_name),
 635                                    PyString_AsString(f->f_mode),
 636                                    f);
 637         }
 638 }
 639
 640 static PyObject *
 641 file_close(PyFileObject *f)
 642 {
 643         PyObject *sts = close_the_file(f);
 644         PyMem_Free(f->f_setbuf);
 645         f->f_setbuf = NULL;
 646         return sts;
 647 }
 648
 649
 650 /* Our very own off_t-like type, 64-bit if possible */
 651 #if !defined(HAVE_LARGEFILE_SUPPORT)
 652 typedef off_t Py_off_t;
 653 #elif SIZEOF_OFF_T >= 8
 654 typedef off_t Py_off_t;
 655 #elif SIZEOF_FPOS_T >= 8
 656 typedef fpos_t Py_off_t;
 657 #else
 658 #error "Large file support, but neither off_t nor fpos_t is large enough."
 659 #endif
 660
 661
 662 /* a portable fseek() function
 663    return 0 on success, non-zero on failure (with errno set) */
 664 static int
 665 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 666 {
 667 #if !defined(HAVE_LARGEFILE_SUPPORT)
 668         return fseek(fp, offset, whence);
 669 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 670         return fseeko(fp, offset, whence);
 671 #elif defined(HAVE_FSEEK64)
 672         return fseek64(fp, offset, whence);
 673 #elif defined(__BEOS__)
 674         return _fseek(fp, offset, whence);
 675 #elif SIZEOF_FPOS_T >= 8
 676         /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 677            and fgetpos() to implement fseek()*/
 678         fpos_t pos;
 679         switch (whence) {
 680         case SEEK_END:
 681 #ifdef MS_WINDOWS
 682                 fflush(fp);
 683                 if (_lseeki64(fileno(fp), 0, 2) == -1)
 684                         return -1;
 685 #else
 686                 if (fseek(fp, 0, SEEK_END) != 0)
 687                         return -1;
 688 #endif
 689                 /* fall through */
 690         case SEEK_CUR:
 691                 if (fgetpos(fp, &pos) != 0)
 692                         return -1;
 693                 offset += pos;
 694                 break;
 695         /* case SEEK_SET: break; */
 696         }
 697         return fsetpos(fp, &offset);
 698 #else
 699 #error "Large file support, but no way to fseek."
 700 #endif
 701 }
 702
 703
 704 /* a portable ftell() function
 705    Return -1 on failure with errno set appropriately, current file
 706    position on success */
 707 static Py_off_t
 708 _portable_ftell(FILE* fp)
 709 {
 710 #if !defined(HAVE_LARGEFILE_SUPPORT)
 711         return ftell(fp);
 712 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 713         return ftello(fp);
 714 #elif defined(HAVE_FTELL64)
 715         return ftell64(fp);
 716 #elif SIZEOF_FPOS_T >= 8
 717         fpos_t pos;
 718         if (fgetpos(fp, &pos) != 0)
 719                 return -1;
 720         return pos;
 721 #else
 722 #error "Large file support, but no way to ftell."
 723 #endif
 724 }
 725
 726
 727 static PyObject *
 728 file_seek(PyFileObject *f, PyObject *args)
 729 {
 730         int whence;
 731         int ret;
 732         Py_off_t offset;
 733         PyObject *offobj, *off_index;
 734
 735         if (f->f_fp == NULL)
 736                 return err_closed();
 737         drop_readahead(f);
 738         whence = 0;
 739         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 740                 return NULL;
 741         off_index = PyNumber_Index(offobj);
 742         if (!off_index) {
 743                 if (!PyFloat_Check(offobj))
 744                         return NULL;
 745                 /* Deprecated in 2.6 */
 746                 PyErr_Clear();
 747                 if (PyErr_WarnEx(PyExc_DeprecationWarning,
 748                                  "integer argument expected, got float",
 749                                  1) < 0)
 750                         return NULL;
 751                 off_index = offobj;
 752                 Py_INCREF(offobj);
 753         }
 754 #if !defined(HAVE_LARGEFILE_SUPPORT)
 755         offset = PyInt_AsLong(off_index);
 756 #else
 757         offset = PyLong_Check(off_index) ?
 758                 PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
 759 #endif
 760         Py_DECREF(off_index);
 761         if (PyErr_Occurred())
 762                 return NULL;
 763
 764         FILE_BEGIN_ALLOW_THREADS(f)
 765         errno = 0;
 766         ret = _portable_fseek(f->f_fp, offset, whence);
 767         FILE_END_ALLOW_THREADS(f)
 768
 769         if (ret != 0) {
 770                 PyErr_SetFromErrno(PyExc_IOError);
 771                 clearerr(f->f_fp);
 772                 return NULL;
 773         }
 774         f->f_skipnextlf = 0;
 775         Py_INCREF(Py_None);
 776         return Py_None;
 777 }
 778
 779
 780 #ifdef HAVE_FTRUNCATE
 781 static PyObject *
 782 file_truncate(PyFileObject *f, PyObject *args)
 783 {
 784         Py_off_t newsize;
 785         PyObject *newsizeobj = NULL;
 786         Py_off_t initialpos;
 787         int ret;
 788
 789         if (f->f_fp == NULL)
 790                 return err_closed();
 791         if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 792                 return NULL;
 793
 794         /* Get current file position.  If the file happens to be open for
 795          * update and the last operation was an input operation, C doesn't
 796          * define what the later fflush() will do, but we promise truncate()
 797          * won't change the current position (and fflush() *does* change it
 798          * then at least on Windows).  The easiest thing is to capture
 799          * current pos now and seek back to it at the end.
 800          */
 801         FILE_BEGIN_ALLOW_THREADS(f)
 802         errno = 0;
 803         initialpos = _portable_ftell(f->f_fp);
 804         FILE_END_ALLOW_THREADS(f)
 805         if (initialpos == -1)
 806                 goto onioerror;
 807
 808         /* Set newsize to current postion if newsizeobj NULL, else to the
 809          * specified value.
 810          */
 811         if (newsizeobj != NULL) {
 812 #if !defined(HAVE_LARGEFILE_SUPPORT)
 813                 newsize = PyInt_AsLong(newsizeobj);
 814 #else
 815                 newsize = PyLong_Check(newsizeobj) ?
 816                                 PyLong_AsLongLong(newsizeobj) :
 817                                 PyInt_AsLong(newsizeobj);
 818 #endif
 819                 if (PyErr_Occurred())
 820                         return NULL;
 821         }
 822         else /* default to current position */
 823                 newsize = initialpos;
 824
 825         /* Flush the stream.  We're mixing stream-level I/O with lower-level
 826          * I/O, and a flush may be necessary to synch both platform views
 827          * of the current file state.
 828          */
 829         FILE_BEGIN_ALLOW_THREADS(f)
 830         errno = 0;
 831         ret = fflush(f->f_fp);
 832         FILE_END_ALLOW_THREADS(f)
 833         if (ret != 0)
 834                 goto onioerror;
 835
 836 #ifdef MS_WINDOWS
 837         /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 838            so don't even try using it. */
 839         {
 840                 HANDLE hFile;
 841
 842                 /* Have to move current pos to desired endpoint on Windows. */
 843                 FILE_BEGIN_ALLOW_THREADS(f)
 844                 errno = 0;
 845                 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 846                 FILE_END_ALLOW_THREADS(f)
 847                 if (ret)
 848                         goto onioerror;
 849
 850                 /* Truncate.  Note that this may grow the file! */
 851                 FILE_BEGIN_ALLOW_THREADS(f)
 852                 errno = 0;
 853                 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 854                 ret = hFile == (HANDLE)-1;
 855                 if (ret == 0) {
 856                         ret = SetEndOfFile(hFile) == 0;
 857                         if (ret)
 858                                 errno = EACCES;
 859                 }
 860                 FILE_END_ALLOW_THREADS(f)
 861                 if (ret)
 862                         goto onioerror;
 863         }
 864 #else
 865         FILE_BEGIN_ALLOW_THREADS(f)
 866         errno = 0;
 867         ret = ftruncate(fileno(f->f_fp), newsize);
 868         FILE_END_ALLOW_THREADS(f)
 869         if (ret != 0)
 870                 goto onioerror;
 871 #endif /* !MS_WINDOWS */
 872
 873         /* Restore original file position. */
 874         FILE_BEGIN_ALLOW_THREADS(f)
 875         errno = 0;
 876         ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 877         FILE_END_ALLOW_THREADS(f)
 878         if (ret)
 879                 goto onioerror;
 880
 881         Py_INCREF(Py_None);
 882         return Py_None;
 883
 884 onioerror:
 885         PyErr_SetFromErrno(PyExc_IOError);
 886         clearerr(f->f_fp);
 887         return NULL;
 888 }
 889 #endif /* HAVE_FTRUNCATE */
 890
 891 static PyObject *
 892 file_tell(PyFileObject *f)
 893 {
 894         Py_off_t pos;
 895
 896         if (f->f_fp == NULL)
 897                 return err_closed();
 898         FILE_BEGIN_ALLOW_THREADS(f)
 899         errno = 0;
 900         pos = _portable_ftell(f->f_fp);
 901         FILE_END_ALLOW_THREADS(f)
 902
 903         if (pos == -1) {
 904                 PyErr_SetFromErrno(PyExc_IOError);
 905                 clearerr(f->f_fp);
 906                 return NULL;
 907         }
 908         if (f->f_skipnextlf) {
 909                 int c;
 910                 c = GETC(f->f_fp);
 911                 if (c == '\n') {
 912                         f->f_newlinetypes |= NEWLINE_CRLF;
 913                         pos++;
 914                         f->f_skipnextlf = 0;
 915                 } else if (c != EOF) ungetc(c, f->f_fp);
 916         }
 917 #if !defined(HAVE_LARGEFILE_SUPPORT)
 918         return PyInt_FromLong(pos);
 919 #else
 920         return PyLong_FromLongLong(pos);
 921 #endif
 922 }
 923
 924 static PyObject *
 925 file_fileno(PyFileObject *f)
 926 {
 927         if (f->f_fp == NULL)
 928                 return err_closed();
 929         return PyInt_FromLong((long) fileno(f->f_fp));
 930 }
 931
 932 static PyObject *
 933 file_flush(PyFileObject *f)
 934 {
 935         int res;
 936
 937         if (f->f_fp == NULL)
 938                 return err_closed();
 939         FILE_BEGIN_ALLOW_THREADS(f)
 940         errno = 0;
 941         res = fflush(f->f_fp);
 942         FILE_END_ALLOW_THREADS(f)
 943         if (res != 0) {
 944                 PyErr_SetFromErrno(PyExc_IOError);
 945                 clearerr(f->f_fp);
 946                 return NULL;
 947         }
 948         Py_INCREF(Py_None);
 949         return Py_None;
 950 }
 951
 952 static PyObject *
 953 file_isatty(PyFileObject *f)
 954 {
 955         long res;
 956         if (f->f_fp == NULL)
 957                 return err_closed();
 958         FILE_BEGIN_ALLOW_THREADS(f)
 959         res = isatty((int)fileno(f->f_fp));
 960         FILE_END_ALLOW_THREADS(f)
 961         return PyBool_FromLong(res);
 962 }
 963
 964
 965 #if BUFSIZ < 8192
 966 #define SMALLCHUNK 8192
 967 #else
 968 #define SMALLCHUNK BUFSIZ
 969 #endif
 970
 971 #if SIZEOF_INT < 4
 972 #define BIGCHUNK  (512 * 32)
 973 #else
 974 #define BIGCHUNK  (512 * 1024)
 975 #endif
 976
 977 static size_t
 978 new_buffersize(PyFileObject *f, size_t currentsize)
 979 {
 980 #ifdef HAVE_FSTAT
 981         off_t pos, end;
 982         struct stat st;
 983         if (fstat(fileno(f->f_fp), &st) == 0) {
 984                 end = st.st_size;
 985                 /* The following is not a bug: we really need to call lseek()
 986                    *and* ftell().  The reason is that some stdio libraries
 987                    mistakenly flush their buffer when ftell() is called and
 988                    the lseek() call it makes fails, thereby throwing away
 989                    data that cannot be recovered in any way.  To avoid this,
 990                    we first test lseek(), and only call ftell() if lseek()
 991                    works.  We can't use the lseek() value either, because we
 992                    need to take the amount of buffered data into account.
 993                    (Yet another reason why stdio stinks. :-) */
 994                 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
 995                 if (pos >= 0) {
 996                         pos = ftell(f->f_fp);
 997                 }
 998                 if (pos < 0)
 999                         clearerr(f->f_fp);
1000                 if (end > pos && pos >= 0)
1001                         return currentsize + end - pos + 1;
1002                 /* Add 1 so if the file were to grow we'd notice. */
1003         }
1004 #endif
1005         if (currentsize > SMALLCHUNK) {
1006                 /* Keep doubling until we reach BIGCHUNK;
1007                    then keep adding BIGCHUNK. */
1008                 if (currentsize <= BIGCHUNK)
1009                         return currentsize + currentsize;
1010                 else
1011                         return currentsize + BIGCHUNK;
1012         }
1013         return currentsize + SMALLCHUNK;
1014 }
1015
1016 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1017 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1018 #else
1019 #ifdef EWOULDBLOCK
1020 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1021 #else
1022 #ifdef EAGAIN
1023 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1024 #else
1025 #define BLOCKED_ERRNO(x) 0
1026 #endif
1027 #endif
1028 #endif
1029
1030 static PyObject *
1031 file_read(PyFileObject *f, PyObject *args)
1032 {
1033         long bytesrequested = -1;
1034         size_t bytesread, buffersize, chunksize;
1035         PyObject *v;
1036
1037         if (f->f_fp == NULL)
1038                 return err_closed();
1039         /* refuse to mix with f.next() */
1040         if (f->f_buf != NULL &&
1041             (f->f_bufend - f->f_bufptr) > 0 &&
1042             f->f_buf[0] != '\0')
1043                 return err_iterbuffered();
1044         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1045                 return NULL;
1046         if (bytesrequested < 0)
1047                 buffersize = new_buffersize(f, (size_t)0);
1048         else
1049                 buffersize = bytesrequested;
1050         if (buffersize > PY_SSIZE_T_MAX) {
1051                 PyErr_SetString(PyExc_OverflowError,
1052         "requested number of bytes is more than a Python string can hold");
1053                 return NULL;
1054         }
1055         v = PyString_FromStringAndSize((char *)NULL, buffersize);
1056         if (v == NULL)
1057                 return NULL;
1058         bytesread = 0;
1059         for (;;) {
1060                 FILE_BEGIN_ALLOW_THREADS(f)
1061                 errno = 0;
1062                 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1063                           buffersize - bytesread, f->f_fp, (PyObject *)f);
1064                 FILE_END_ALLOW_THREADS(f)
1065                 if (chunksize == 0) {
1066                         if (!ferror(f->f_fp))
1067                                 break;
1068                         clearerr(f->f_fp);
1069                         /* When in non-blocking mode, data shouldn't
1070                          * be discarded if a blocking signal was
1071                          * received. That will also happen if
1072                          * chunksize != 0, but bytesread < buffersize. */
1073                         if (bytesread > 0 && BLOCKED_ERRNO(errno))
1074                                 break;
1075                         PyErr_SetFromErrno(PyExc_IOError);
1076                         Py_DECREF(v);
1077                         return NULL;
1078                 }
1079                 bytesread += chunksize;
1080                 if (bytesread < buffersize) {
1081                         clearerr(f->f_fp);
1082                         break;
1083                 }
1084                 if (bytesrequested < 0) {
1085                         buffersize = new_buffersize(f, buffersize);
1086                         if (_PyString_Resize(&v, buffersize) < 0)
1087                                 return NULL;
1088                 } else {
1089                         /* Got what was requested. */
1090                         break;
1091                 }
1092         }
1093         if (bytesread != buffersize)
1094                 _PyString_Resize(&v, bytesread);
1095         return v;
1096 }
1097
1098 static PyObject *
1099 file_readinto(PyFileObject *f, PyObject *args)
1100 {
1101         char *ptr;
1102         Py_ssize_t ntodo;
1103         Py_ssize_t ndone, nnow;
1104         Py_buffer pbuf;
1105
1106         if (f->f_fp == NULL)
1107                 return err_closed();
1108         /* refuse to mix with f.next() */
1109         if (f->f_buf != NULL &&
1110             (f->f_bufend - f->f_bufptr) > 0 &&
1111             f->f_buf[0] != '\0')
1112                 return err_iterbuffered();
1113         if (!PyArg_ParseTuple(args, "w*", &pbuf))
1114                 return NULL;
1115         ptr = pbuf.buf;
1116         ntodo = pbuf.len;
1117         ndone = 0;
1118         while (ntodo > 0) {
1119                 FILE_BEGIN_ALLOW_THREADS(f)
1120                 errno = 0;
1121                 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1122                                                 (PyObject *)f);
1123                 FILE_END_ALLOW_THREADS(f)
1124                 if (nnow == 0) {
1125                         if (!ferror(f->f_fp))
1126                                 break;
1127                         PyErr_SetFromErrno(PyExc_IOError);
1128                         clearerr(f->f_fp);
1129                         PyBuffer_Release(&pbuf);
1130                         return NULL;
1131                 }
1132                 ndone += nnow;
1133                 ntodo -= nnow;
1134         }
1135         PyBuffer_Release(&pbuf);
1136         return PyInt_FromSsize_t(ndone);
1137 }
1138
1139 /**************************************************************************
1140 Routine to get next line using platform fgets().
1141
1142 Under MSVC 6:
1143
1144 + MS threadsafe getc is very slow (multiple layers of function calls before+
1145   after each character, to lock+unlock the stream).
1146 + The stream-locking functions are MS-internal -- can't access them from user
1147   code.
1148 + There's nothing Tim could find in the MS C or platform SDK libraries that
1149   can worm around this.
1150 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1151
1152 So we use fgets for speed(!), despite that it's painful.
1153
1154 MS realloc is also slow.
1155
1156 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1157 have):
1158         Linux           a wash
1159         Solaris         a wash
1160         Tru64 Unix      getline_via_fgets significantly faster
1161
1162 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1163 writes something into the buffer, can it write into any position beyond the
1164 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1165 known on which it does; and it would be a strange way to code fgets. Still,
1166 getline_via_fgets may not work correctly if it does.  The std test
1167 test_bufio.py should fail if platform fgets() routinely writes beyond the
1168 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1169 **************************************************************************/
1170
1171 /* Use this routine if told to, or by default on non-get_unlocked()
1172  * platforms unless told not to.  Yikes!  Let's spell that out:
1173  * On a platform with getc_unlocked():
1174  *     By default, use getc_unlocked().
1175  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1176  * On a platform without getc_unlocked():
1177  *     By default, use fgets().
1178  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1179  */
1180 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1181 #define USE_FGETS_IN_GETLINE
1182 #endif
1183
1184 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1185 #undef USE_FGETS_IN_GETLINE
1186 #endif
1187
1188 #ifdef USE_FGETS_IN_GETLINE
1189 static PyObject*
1190 getline_via_fgets(PyFileObject *f, FILE *fp)
1191 {
1192 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1193  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1194  * to fill this much of the buffer with a known value in order to figure out
1195  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1196  * than "most" lines, we waste time filling unused buffer slots.  100 is
1197  * surely adequate for most peoples' email archives, chewing over source code,
1198  * etc -- "regular old text files".
1199  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1200  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1201  * cautions about boosting that.  300 was chosen because the worst real-life
1202  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1203  * half the lines were 254 chars.
1204  */
1205 #define INITBUFSIZE 100
1206 #define MAXBUFSIZE 300
1207         char* p;        /* temp */
1208         char buf[MAXBUFSIZE];
1209         PyObject* v;    /* the string object result */
1210         char* pvfree;   /* address of next free slot */
1211         char* pvend;    /* address one beyond last free slot */
1212         size_t nfree;   /* # of free buffer slots; pvend-pvfree */
1213         size_t total_v_size;  /* total # of slots in buffer */
1214         size_t increment;       /* amount to increment the buffer */
1215         size_t prev_v_size;
1216
1217         /* Optimize for normal case:  avoid _PyString_Resize if at all
1218          * possible via first reading into stack buffer "buf".
1219          */
1220         total_v_size = INITBUFSIZE;     /* start small and pray */
1221         pvfree = buf;
1222         for (;;) {
1223                 FILE_BEGIN_ALLOW_THREADS(f)
1224                 pvend = buf + total_v_size;
1225                 nfree = pvend - pvfree;
1226                 memset(pvfree, '\n', nfree);
1227                 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1228                 p = fgets(pvfree, (int)nfree, fp);
1229                 FILE_END_ALLOW_THREADS(f)
1230
1231                 if (p == NULL) {
1232                         clearerr(fp);
1233                         if (PyErr_CheckSignals())
1234                                 return NULL;
1235                         v = PyString_FromStringAndSize(buf, pvfree - buf);
1236                         return v;
1237                 }
1238                 /* fgets read *something* */
1239                 p = memchr(pvfree, '\n', nfree);
1240                 if (p != NULL) {
1241                         /* Did the \n come from fgets or from us?
1242                          * Since fgets stops at the first \n, and then writes
1243                          * \0, if it's from fgets a \0 must be next.  But if
1244                          * that's so, it could not have come from us, since
1245                          * the \n's we filled the buffer with have only more
1246                          * \n's to the right.
1247                          */
1248                         if (p+1 < pvend && *(p+1) == '\0') {
1249                                 /* It's from fgets:  we win!  In particular,
1250                                  * we haven't done any mallocs yet, and can
1251                                  * build the final result on the first try.
1252                                  */
1253                                 ++p;    /* include \n from fgets */
1254                         }
1255                         else {
1256                                 /* Must be from us:  fgets didn't fill the
1257                                  * buffer and didn't find a newline, so it
1258                                  * must be the last and newline-free line of
1259                                  * the file.
1260                                  */
1261                                 assert(p > pvfree && *(p-1) == '\0');
1262                                 --p;    /* don't include \0 from fgets */
1263                         }
1264                         v = PyString_FromStringAndSize(buf, p - buf);
1265                         return v;
1266                 }
1267                 /* yuck:  fgets overwrote all the newlines, i.e. the entire
1268                  * buffer.  So this line isn't over yet, or maybe it is but
1269                  * we're exactly at EOF.  If we haven't already, try using the
1270                  * rest of the stack buffer.
1271                  */
1272                 assert(*(pvend-1) == '\0');
1273                 if (pvfree == buf) {
1274                         pvfree = pvend - 1;     /* overwrite trailing null */
1275                         total_v_size = MAXBUFSIZE;
1276                 }
1277                 else
1278                         break;
1279         }
1280
1281         /* The stack buffer isn't big enough; malloc a string object and read
1282          * into its buffer.
1283          */
1284         total_v_size = MAXBUFSIZE << 1;
1285         v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1286         if (v == NULL)
1287                 return v;
1288         /* copy over everything except the last null byte */
1289         memcpy(BUF(v), buf, MAXBUFSIZE-1);
1290         pvfree = BUF(v) + MAXBUFSIZE - 1;
1291
1292         /* Keep reading stuff into v; if it ever ends successfully, break
1293          * after setting p one beyond the end of the line.  The code here is
1294          * very much like the code above, except reads into v's buffer; see
1295          * the code above for detailed comments about the logic.
1296          */
1297         for (;;) {
1298                 FILE_BEGIN_ALLOW_THREADS(f)
1299                 pvend = BUF(v) + total_v_size;
1300                 nfree = pvend - pvfree;
1301                 memset(pvfree, '\n', nfree);
1302                 assert(nfree < INT_MAX);
1303                 p = fgets(pvfree, (int)nfree, fp);
1304                 FILE_END_ALLOW_THREADS(f)
1305
1306                 if (p == NULL) {
1307                         clearerr(fp);
1308                         if (PyErr_CheckSignals()) {
1309                                 Py_DECREF(v);
1310                                 return NULL;
1311                         }
1312                         p = pvfree;
1313                         break;
1314                 }
1315                 p = memchr(pvfree, '\n', nfree);
1316                 if (p != NULL) {
1317                         if (p+1 < pvend && *(p+1) == '\0') {
1318                                 /* \n came from fgets */
1319                                 ++p;
1320                                 break;
1321                         }
1322                         /* \n came from us; last line of file, no newline */
1323                         assert(p > pvfree && *(p-1) == '\0');
1324                         --p;
1325                         break;
1326                 }
1327                 /* expand buffer and try again */
1328                 assert(*(pvend-1) == '\0');
1329                 increment = total_v_size >> 2;  /* mild exponential growth */
1330                 prev_v_size = total_v_size;
1331                 total_v_size += increment;
1332                 /* check for overflow */
1333                 if (total_v_size <= prev_v_size ||
1334                     total_v_size > PY_SSIZE_T_MAX) {
1335                         PyErr_SetString(PyExc_OverflowError,
1336                             "line is longer than a Python string can hold");
1337                         Py_DECREF(v);
1338                         return NULL;
1339                 }
1340                 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1341                         return NULL;
1342                 /* overwrite the trailing null byte */
1343                 pvfree = BUF(v) + (prev_v_size - 1);
1344         }
1345         if (BUF(v) + total_v_size != p)
1346                 _PyString_Resize(&v, p - BUF(v));
1347         return v;
1348 #undef INITBUFSIZE
1349 #undef MAXBUFSIZE
1350 }
1351 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1352
1353 /* Internal routine to get a line.
1354    Size argument interpretation:
1355    > 0: max length;
1356    <= 0: read arbitrary line
1357 */
1358
1359 static PyObject *
1360 get_line(PyFileObject *f, int n)
1361 {
1362         FILE *fp = f->f_fp;
1363         int c;
1364         char *buf, *end;
1365         size_t total_v_size;    /* total # of slots in buffer */
1366         size_t used_v_size;     /* # used slots in buffer */
1367         size_t increment;       /* amount to increment the buffer */
1368         PyObject *v;
1369         int newlinetypes = f->f_newlinetypes;
1370         int skipnextlf = f->f_skipnextlf;
1371         int univ_newline = f->f_univ_newline;
1372
1373 #if defined(USE_FGETS_IN_GETLINE)
1374         if (n <= 0 && !univ_newline )
1375                 return getline_via_fgets(f, fp);
1376 #endif
1377         total_v_size = n > 0 ? n : 100;
1378         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1379         if (v == NULL)
1380                 return NULL;
1381         buf = BUF(v);
1382         end = buf + total_v_size;
1383
1384         for (;;) {
1385                 FILE_BEGIN_ALLOW_THREADS(f)
1386                 FLOCKFILE(fp);
1387                 if (univ_newline) {
1388                         c = 'x'; /* Shut up gcc warning */
1389                         while ( buf != end && (c = GETC(fp)) != EOF ) {
1390                                 if (skipnextlf ) {
1391                                         skipnextlf = 0;
1392                                         if (c == '\n') {
1393                                                 /* Seeing a \n here with
1394                                                  * skipnextlf true means we
1395                                                  * saw a \r before.
1396                                                  */
1397                                                 newlinetypes |= NEWLINE_CRLF;
1398                                                 c = GETC(fp);
1399                                                 if (c == EOF) break;
1400                                         } else {
1401                                                 newlinetypes |= NEWLINE_CR;
1402                                         }
1403                                 }
1404                                 if (c == '\r') {
1405                                         skipnextlf = 1;
1406                                         c = '\n';
1407                                 } else if ( c == '\n')
1408                                         newlinetypes |= NEWLINE_LF;
1409                                 *buf++ = c;
1410                                 if (c == '\n') break;
1411                         }
1412                         if ( c == EOF && skipnextlf )
1413                                 newlinetypes |= NEWLINE_CR;
1414                 } else /* If not universal newlines use the normal loop */
1415                 while ((c = GETC(fp)) != EOF &&
1416                        (*buf++ = c) != '\n' &&
1417                         buf != end)
1418                         ;
1419                 FUNLOCKFILE(fp);
1420                 FILE_END_ALLOW_THREADS(f)
1421                 f->f_newlinetypes = newlinetypes;
1422                 f->f_skipnextlf = skipnextlf;
1423                 if (c == '\n')
1424                         break;
1425                 if (c == EOF) {
1426                         if (ferror(fp)) {
1427                                 PyErr_SetFromErrno(PyExc_IOError);
1428                                 clearerr(fp);
1429                                 Py_DECREF(v);
1430                                 return NULL;
1431                         }
1432                         clearerr(fp);
1433                         if (PyErr_CheckSignals()) {
1434                                 Py_DECREF(v);
1435                                 return NULL;
1436                         }
1437                         break;
1438                 }
1439                 /* Must be because buf == end */
1440                 if (n > 0)
1441                         break;
1442                 used_v_size = total_v_size;
1443                 increment = total_v_size >> 2; /* mild exponential growth */
1444                 total_v_size += increment;
1445                 if (total_v_size > PY_SSIZE_T_MAX) {
1446                         PyErr_SetString(PyExc_OverflowError,
1447                             "line is longer than a Python string can hold");
1448                         Py_DECREF(v);
1449                         return NULL;
1450                 }
1451                 if (_PyString_Resize(&v, total_v_size) < 0)
1452                         return NULL;
1453                 buf = BUF(v) + used_v_size;
1454                 end = BUF(v) + total_v_size;
1455         }
1456
1457         used_v_size = buf - BUF(v);
1458         if (used_v_size != total_v_size)
1459                 _PyString_Resize(&v, used_v_size);
1460         return v;
1461 }
1462
1463 /* External C interface */
1464
1465 PyObject *
1466 PyFile_GetLine(PyObject *f, int n)
1467 {
1468         PyObject *result;
1469
1470         if (f == NULL) {
1471                 PyErr_BadInternalCall();
1472                 return NULL;
1473         }
1474
1475         if (PyFile_Check(f)) {
1476                 PyFileObject *fo = (PyFileObject *)f;
1477                 if (fo->f_fp == NULL)
1478                         return err_closed();
1479                 /* refuse to mix with f.next() */
1480                 if (fo->f_buf != NULL &&
1481                     (fo->f_bufend - fo->f_bufptr) > 0 &&
1482                     fo->f_buf[0] != '\0')
1483                         return err_iterbuffered();
1484                 result = get_line(fo, n);
1485         }
1486         else {
1487                 PyObject *reader;
1488                 PyObject *args;
1489
1490                 reader = PyObject_GetAttrString(f, "readline");
1491                 if (reader == NULL)
1492                         return NULL;
1493                 if (n <= 0)
1494                         args = PyTuple_New(0);
1495                 else
1496                         args = Py_BuildValue("(i)", n);
1497                 if (args == NULL) {
1498                         Py_DECREF(reader);
1499                         return NULL;
1500                 }
1501                 result = PyEval_CallObject(reader, args);
1502                 Py_DECREF(reader);
1503                 Py_DECREF(args);
1504                 if (result != NULL && !PyString_Check(result) &&
1505                     !PyUnicode_Check(result)) {
1506                         Py_DECREF(result);
1507                         result = NULL;
1508                         PyErr_SetString(PyExc_TypeError,
1509                                    "object.readline() returned non-string");
1510                 }
1511         }
1512
1513         if (n < 0 && result != NULL && PyString_Check(result)) {
1514                 char *s = PyString_AS_STRING(result);
1515                 Py_ssize_t len = PyString_GET_SIZE(result);
1516                 if (len == 0) {
1517                         Py_DECREF(result);
1518                         result = NULL;
1519                         PyErr_SetString(PyExc_EOFError,
1520                                         "EOF when reading a line");
1521                 }
1522                 else if (s[len-1] == '\n') {
1523                         if (result->ob_refcnt == 1)
1524                                 _PyString_Resize(&result, len-1);
1525                         else {
1526                                 PyObject *v;
1527                                 v = PyString_FromStringAndSize(s, len-1);
1528                                 Py_DECREF(result);
1529                                 result = v;
1530                         }
1531                 }
1532         }
1533 #ifdef Py_USING_UNICODE
1534         if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1535                 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1536                 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1537                 if (len == 0) {
1538                         Py_DECREF(result);
1539                         result = NULL;
1540                         PyErr_SetString(PyExc_EOFError,
1541                                         "EOF when reading a line");
1542                 }
1543                 else if (s[len-1] == '\n') {
1544                         if (result->ob_refcnt == 1)
1545                                 PyUnicode_Resize(&result, len-1);
1546                         else {
1547                                 PyObject *v;
1548                                 v = PyUnicode_FromUnicode(s, len-1);
1549                                 Py_DECREF(result);
1550                                 result = v;
1551                         }
1552                 }
1553         }
1554 #endif
1555         return result;
1556 }
1557
1558 /* Python method */
1559
1560 static PyObject *
1561 file_readline(PyFileObject *f, PyObject *args)
1562 {
1563         int n = -1;
1564
1565         if (f->f_fp == NULL)
1566                 return err_closed();
1567         /* refuse to mix with f.next() */
1568         if (f->f_buf != NULL &&
1569             (f->f_bufend - f->f_bufptr) > 0 &&
1570             f->f_buf[0] != '\0')
1571                 return err_iterbuffered();
1572         if (!PyArg_ParseTuple(args, "|i:readline", &n))
1573                 return NULL;
1574         if (n == 0)
1575                 return PyString_FromString("");
1576         if (n < 0)
1577                 n = 0;
1578         return get_line(f, n);
1579 }
1580
1581 static PyObject *
1582 file_readlines(PyFileObject *f, PyObject *args)
1583 {
1584         long sizehint = 0;
1585         PyObject *list = NULL;
1586         PyObject *line;
1587         char small_buffer[SMALLCHUNK];
1588         char *buffer = small_buffer;
1589         size_t buffersize = SMALLCHUNK;
1590         PyObject *big_buffer = NULL;
1591         size_t nfilled = 0;
1592         size_t nread;
1593         size_t totalread = 0;
1594         char *p, *q, *end;
1595         int err;
1596         int shortread = 0;
1597
1598         if (f->f_fp == NULL)
1599                 return err_closed();
1600         /* refuse to mix with f.next() */
1601         if (f->f_buf != NULL &&
1602             (f->f_bufend - f->f_bufptr) > 0 &&
1603             f->f_buf[0] != '\0')
1604                 return err_iterbuffered();
1605         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1606                 return NULL;
1607         if ((list = PyList_New(0)) == NULL)
1608                 return NULL;
1609         for (;;) {
1610                 if (shortread)
1611                         nread = 0;
1612                 else {
1613                         FILE_BEGIN_ALLOW_THREADS(f)
1614                         errno = 0;
1615                         nread = Py_UniversalNewlineFread(buffer+nfilled,
1616                                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1617                         FILE_END_ALLOW_THREADS(f)
1618                         shortread = (nread < buffersize-nfilled);
1619                 }
1620                 if (nread == 0) {
1621                         sizehint = 0;
1622                         if (!ferror(f->f_fp))
1623                                 break;
1624                         PyErr_SetFromErrno(PyExc_IOError);
1625                         clearerr(f->f_fp);
1626                         goto error;
1627                 }
1628                 totalread += nread;
1629                 p = (char *)memchr(buffer+nfilled, '\n', nread);
1630                 if (p == NULL) {
1631                         /* Need a larger buffer to fit this line */
1632                         nfilled += nread;
1633                         buffersize *= 2;
1634                         if (buffersize > PY_SSIZE_T_MAX) {
1635                                 PyErr_SetString(PyExc_OverflowError,
1636                             "line is longer than a Python string can hold");
1637                                 goto error;
1638                         }
1639                         if (big_buffer == NULL) {
1640                                 /* Create the big buffer */
1641                                 big_buffer = PyString_FromStringAndSize(
1642                                         NULL, buffersize);
1643                                 if (big_buffer == NULL)
1644                                         goto error;
1645                                 buffer = PyString_AS_STRING(big_buffer);
1646                                 memcpy(buffer, small_buffer, nfilled);
1647                         }
1648                         else {
1649                                 /* Grow the big buffer */
1650                                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1651                                         goto error;
1652                                 buffer = PyString_AS_STRING(big_buffer);
1653                         }
1654                         continue;
1655                 }
1656                 end = buffer+nfilled+nread;
1657                 q = buffer;
1658                 do {
1659                         /* Process complete lines */
1660                         p++;
1661                         line = PyString_FromStringAndSize(q, p-q);
1662                         if (line == NULL)
1663                                 goto error;
1664                         err = PyList_Append(list, line);
1665                         Py_DECREF(line);
1666                         if (err != 0)
1667                                 goto error;
1668                         q = p;
1669                         p = (char *)memchr(q, '\n', end-q);
1670                 } while (p != NULL);
1671                 /* Move the remaining incomplete line to the start */
1672                 nfilled = end-q;
1673                 memmove(buffer, q, nfilled);
1674                 if (sizehint > 0)
1675                         if (totalread >= (size_t)sizehint)
1676                                 break;
1677         }
1678         if (nfilled != 0) {
1679                 /* Partial last line */
1680                 line = PyString_FromStringAndSize(buffer, nfilled);
1681                 if (line == NULL)
1682                         goto error;
1683                 if (sizehint > 0) {
1684                         /* Need to complete the last line */
1685                         PyObject *rest = get_line(f, 0);
1686                         if (rest == NULL) {
1687                                 Py_DECREF(line);
1688                                 goto error;
1689                         }
1690                         PyString_Concat(&line, rest);
1691                         Py_DECREF(rest);
1692                         if (line == NULL)
1693                                 goto error;
1694                 }
1695                 err = PyList_Append(list, line);
1696                 Py_DECREF(line);
1697                 if (err != 0)
1698                         goto error;
1699         }
1700
1701 cleanup:
1702         Py_XDECREF(big_buffer);
1703         return list;
1704
1705 error:
1706         Py_CLEAR(list);
1707         goto cleanup;
1708 }
1709
1710 static PyObject *
1711 file_write(PyFileObject *f, PyObject *args)
1712 {
1713         Py_buffer pbuf;
1714         char *s;
1715         Py_ssize_t n, n2;
1716         if (f->f_fp == NULL)
1717                 return err_closed();
1718         if (f->f_binary) {
1719                 if (!PyArg_ParseTuple(args, "s*", &pbuf))
1720                         return NULL;
1721                 s = pbuf.buf;
1722                 n = pbuf.len;
1723         } else
1724                 if (!PyArg_ParseTuple(args, "t#", &s, &n))
1725                 return NULL;
1726         f->f_softspace = 0;
1727         FILE_BEGIN_ALLOW_THREADS(f)
1728         errno = 0;
1729         n2 = fwrite(s, 1, n, f->f_fp);
1730         FILE_END_ALLOW_THREADS(f)
1731         if (f->f_binary)
1732                 PyBuffer_Release(&pbuf);
1733         if (n2 != n) {
1734                 PyErr_SetFromErrno(PyExc_IOError);
1735                 clearerr(f->f_fp);
1736                 return NULL;
1737         }
1738         Py_INCREF(Py_None);
1739         return Py_None;
1740 }
1741
1742 static PyObject *
1743 file_writelines(PyFileObject *f, PyObject *seq)
1744 {
1745 #define CHUNKSIZE 1000
1746         PyObject *list, *line;
1747         PyObject *it;   /* iter(seq) */
1748         PyObject *result;
1749         int index, islist;
1750         Py_ssize_t i, j, nwritten, len;
1751
1752         assert(seq != NULL);
1753         if (f->f_fp == NULL)
1754                 return err_closed();
1755
1756         result = NULL;
1757         list = NULL;
1758         islist = PyList_Check(seq);
1759         if  (islist)
1760                 it = NULL;
1761         else {
1762                 it = PyObject_GetIter(seq);
1763                 if (it == NULL) {
1764                         PyErr_SetString(PyExc_TypeError,
1765                                 "writelines() requires an iterable argument");
1766                         return NULL;
1767                 }
1768                 /* From here on, fail by going to error, to reclaim "it". */
1769                 list = PyList_New(CHUNKSIZE);
1770                 if (list == NULL)
1771                         goto error;
1772         }
1773
1774         /* Strategy: slurp CHUNKSIZE lines into a private list,
1775            checking that they are all strings, then write that list
1776            without holding the interpreter lock, then come back for more. */
1777         for (index = 0; ; index += CHUNKSIZE) {
1778                 if (islist) {
1779                         Py_XDECREF(list);
1780                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1781                         if (list == NULL)
1782                                 goto error;
1783                         j = PyList_GET_SIZE(list);
1784                 }
1785                 else {
1786                         for (j = 0; j < CHUNKSIZE; j++) {
1787                                 line = PyIter_Next(it);
1788                                 if (line == NULL) {
1789                                         if (PyErr_Occurred())
1790                                                 goto error;
1791                                         break;
1792                                 }
1793                                 PyList_SetItem(list, j, line);
1794                         }
1795                 }
1796                 if (j == 0)
1797                         break;
1798
1799                 /* Check that all entries are indeed strings. If not,
1800                    apply the same rules as for file.write() and
1801                    convert the results to strings. This is slow, but
1802                    seems to be the only way since all conversion APIs
1803                    could potentially execute Python code. */
1804                 for (i = 0; i < j; i++) {
1805                         PyObject *v = PyList_GET_ITEM(list, i);
1806                         if (!PyString_Check(v)) {
1807                                 const char *buffer;
1808                                 if (((f->f_binary &&
1809                                       PyObject_AsReadBuffer(v,
1810                                               (const void**)&buffer,
1811                                                             &len)) ||
1812                                      PyObject_AsCharBuffer(v,
1813                                                            &buffer,
1814                                                            &len))) {
1815                                         PyErr_SetString(PyExc_TypeError,
1816                         "writelines() argument must be a sequence of strings");
1817                                         goto error;
1818                                 }
1819                                 line = PyString_FromStringAndSize(buffer,
1820                                                                   len);
1821                                 if (line == NULL)
1822                                         goto error;
1823                                 Py_DECREF(v);
1824                                 PyList_SET_ITEM(list, i, line);
1825                         }
1826                 }
1827
1828                 /* Since we are releasing the global lock, the
1829                    following code may *not* execute Python code. */
1830                 f->f_softspace = 0;
1831                 FILE_BEGIN_ALLOW_THREADS(f)
1832                 errno = 0;
1833                 for (i = 0; i < j; i++) {
1834                         line = PyList_GET_ITEM(list, i);
1835                         len = PyString_GET_SIZE(line);
1836                         nwritten = fwrite(PyString_AS_STRING(line),
1837                                           1, len, f->f_fp);
1838                         if (nwritten != len) {
1839                                 FILE_ABORT_ALLOW_THREADS(f)
1840                                 PyErr_SetFromErrno(PyExc_IOError);
1841                                 clearerr(f->f_fp);
1842                                 goto error;
1843                         }
1844                 }
1845                 FILE_END_ALLOW_THREADS(f)
1846
1847                 if (j < CHUNKSIZE)
1848                         break;
1849         }
1850
1851         Py_INCREF(Py_None);
1852         result = Py_None;
1853   error:
1854         Py_XDECREF(list);
1855         Py_XDECREF(it);
1856         return result;
1857 #undef CHUNKSIZE
1858 }
1859
1860 static PyObject *
1861 file_self(PyFileObject *f)
1862 {
1863         if (f->f_fp == NULL)
1864                 return err_closed();
1865         Py_INCREF(f);
1866         return (PyObject *)f;
1867 }
1868
1869 static PyObject *
1870 file_xreadlines(PyFileObject *f)
1871 {
1872         if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1873                            "try 'for line in f' instead", 1) < 0)
1874                return NULL;
1875         return file_self(f);
1876 }
1877
1878 static PyObject *
1879 file_exit(PyObject *f, PyObject *args)
1880 {
1881         PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1882         if (!ret)
1883                 /* If error occurred, pass through */
1884                 return NULL;
1885         Py_DECREF(ret);
1886         /* We cannot return the result of close since a true
1887          * value will be interpreted as "yes, swallow the
1888          * exception if one was raised inside the with block". */
1889         Py_RETURN_NONE;
1890 }
1891
1892 PyDoc_STRVAR(readline_doc,
1893 "readline([size]) -> next line from the file, as a string.\n"
1894 "\n"
1895 "Retain newline.  A non-negative size argument limits the maximum\n"
1896 "number of bytes to return (an incomplete line may be returned then).\n"
1897 "Return an empty string at EOF.");
1898
1899 PyDoc_STRVAR(read_doc,
1900 "read([size]) -> read at most size bytes, returned as a string.\n"
1901 "\n"
1902 "If the size argument is negative or omitted, read until EOF is reached.\n"
1903 "Notice that when in non-blocking mode, less data than what was requested\n"
1904 "may be returned, even if no size parameter was given.");
1905
1906 PyDoc_STRVAR(write_doc,
1907 "write(str) -> None.  Write string str to file.\n"
1908 "\n"
1909 "Note that due to buffering, flush() or close() may be needed before\n"
1910 "the file on disk reflects the data written.");
1911
1912 PyDoc_STRVAR(fileno_doc,
1913 "fileno() -> integer \"file descriptor\".\n"
1914 "\n"
1915 "This is needed for lower-level file interfaces, such os.read().");
1916
1917 PyDoc_STRVAR(seek_doc,
1918 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1919 "\n"
1920 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1921 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1922 "(move relative to current position, positive or negative), and 2 (move\n"
1923 "relative to end of file, usually negative, although many platforms allow\n"
1924 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1925 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1926 "undefined behavior."
1927 "\n"
1928 "Note that not all file objects are seekable.");
1929
1930 #ifdef HAVE_FTRUNCATE
1931 PyDoc_STRVAR(truncate_doc,
1932 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1933 "\n"
1934 "Size defaults to the current file position, as returned by tell().");
1935 #endif
1936
1937 PyDoc_STRVAR(tell_doc,
1938 "tell() -> current file position, an integer (may be a long integer).");
1939
1940 PyDoc_STRVAR(readinto_doc,
1941 "readinto() -> Undocumented.  Don't use this; it may go away.");
1942
1943 PyDoc_STRVAR(readlines_doc,
1944 "readlines([size]) -> list of strings, each a line from the file.\n"
1945 "\n"
1946 "Call readline() repeatedly and return a list of the lines so read.\n"
1947 "The optional size argument, if given, is an approximate bound on the\n"
1948 "total number of bytes in the lines returned.");
1949
1950 PyDoc_STRVAR(xreadlines_doc,
1951 "xreadlines() -> returns self.\n"
1952 "\n"
1953 "For backward compatibility. File objects now include the performance\n"
1954 "optimizations previously implemented in the xreadlines module.");
1955
1956 PyDoc_STRVAR(writelines_doc,
1957 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
1958 "\n"
1959 "Note that newlines are not added.  The sequence can be any iterable object\n"
1960 "producing strings. This is equivalent to calling write() for each string.");
1961
1962 PyDoc_STRVAR(flush_doc,
1963 "flush() -> None.  Flush the internal I/O buffer.");
1964
1965 PyDoc_STRVAR(close_doc,
1966 "close() -> None or (perhaps) an integer.  Close the file.\n"
1967 "\n"
1968 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
1969 "further I/O operations.  close() may be called more than once without\n"
1970 "error.  Some kinds of file objects (for example, opened by popen())\n"
1971 "may return an exit status upon closing.");
1972
1973 PyDoc_STRVAR(isatty_doc,
1974 "isatty() -> true or false.  True if the file is connected to a tty device.");
1975
1976 PyDoc_STRVAR(enter_doc,
1977              "__enter__() -> self.");
1978
1979 PyDoc_STRVAR(exit_doc,
1980              "__exit__(*excinfo) -> None.  Closes the file.");
1981
1982 static PyMethodDef file_methods[] = {
1983         {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1984         {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
1985         {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
1986         {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
1987         {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
1988 #ifdef HAVE_FTRUNCATE
1989         {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1990 #endif
1991         {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
1992         {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1993         {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1994         {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1995         {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
1996         {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
1997         {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
1998         {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
1999         {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2000         {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2001         {NULL,        NULL}             /* sentinel */
2002 };
2003
2004 #define OFF(x) offsetof(PyFileObject, x)
2005
2006 static PyMemberDef file_memberlist[] = {
2007         {"mode",        T_OBJECT,       OFF(f_mode),    RO,
2008          "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2009         {"name",        T_OBJECT,       OFF(f_name),    RO,
2010          "file name"},
2011         {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
2012          "file encoding"},
2013         {"errors",      T_OBJECT,       OFF(f_errors),  RO,
2014          "Unicode error handler"},
2015         /* getattr(f, "closed") is implemented without this table */
2016         {NULL}  /* Sentinel */
2017 };
2018
2019 static PyObject *
2020 get_closed(PyFileObject *f, void *closure)
2021 {
2022         return PyBool_FromLong((long)(f->f_fp == 0));
2023 }
2024 static PyObject *
2025 get_newlines(PyFileObject *f, void *closure)
2026 {
2027         switch (f->f_newlinetypes) {
2028         case NEWLINE_UNKNOWN:
2029                 Py_INCREF(Py_None);
2030                 return Py_None;
2031         case NEWLINE_CR:
2032                 return PyString_FromString("\r");
2033         case NEWLINE_LF:
2034                 return PyString_FromString("\n");
2035         case NEWLINE_CR|NEWLINE_LF:
2036                 return Py_BuildValue("(ss)", "\r", "\n");
2037         case NEWLINE_CRLF:
2038                 return PyString_FromString("\r\n");
2039         case NEWLINE_CR|NEWLINE_CRLF:
2040                 return Py_BuildValue("(ss)", "\r", "\r\n");
2041         case NEWLINE_LF|NEWLINE_CRLF:
2042                 return Py_BuildValue("(ss)", "\n", "\r\n");
2043         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2044                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2045         default:
2046                 PyErr_Format(PyExc_SystemError,
2047                              "Unknown newlines value 0x%x\n",
2048                              f->f_newlinetypes);
2049                 return NULL;
2050         }
2051 }
2052
2053 static PyObject *
2054 get_softspace(PyFileObject *f, void *closure)
2055 {
2056         if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2057                 return NULL;
2058         return PyInt_FromLong(f->f_softspace);
2059 }
2060
2061 static int
2062 set_softspace(PyFileObject *f, PyObject *value)
2063 {
2064         int new;
2065         if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2066                 return -1;
2067
2068         if (value == NULL) {
2069                 PyErr_SetString(PyExc_TypeError,
2070                                 "can't delete softspace attribute");
2071                 return -1;
2072         }
2073
2074         new = PyInt_AsLong(value);
2075         if (new == -1 && PyErr_Occurred())
2076                 return -1;
2077         f->f_softspace = new;
2078         return 0;
2079 }
2080
2081 static PyGetSetDef file_getsetlist[] = {
2082         {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2083         {"newlines", (getter)get_newlines, NULL,
2084          "end-of-line convention used in this file"},
2085         {"softspace", (getter)get_softspace, (setter)set_softspace,
2086          "flag indicating that a space needs to be printed; used by print"},
2087         {0},
2088 };
2089
2090 static void
2091 drop_readahead(PyFileObject *f)
2092 {
2093         if (f->f_buf != NULL) {
2094                 PyMem_Free(f->f_buf);
2095                 f->f_buf = NULL;
2096         }
2097 }
2098
2099 /* Make sure that file has a readahead buffer with at least one byte
2100    (unless at EOF) and no more than bufsize.  Returns negative value on
2101    error, will set MemoryError if bufsize bytes cannot be allocated. */
2102 static int
2103 readahead(PyFileObject *f, int bufsize)
2104 {
2105         Py_ssize_t chunksize;
2106
2107         if (f->f_buf != NULL) {
2108                 if( (f->f_bufend - f->f_bufptr) >= 1)
2109                         return 0;
2110                 else
2111                         drop_readahead(f);
2112         }
2113         if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2114                 PyErr_NoMemory();
2115                 return -1;
2116         }
2117         FILE_BEGIN_ALLOW_THREADS(f)
2118         errno = 0;
2119         chunksize = Py_UniversalNewlineFread(
2120                 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2121         FILE_END_ALLOW_THREADS(f)
2122         if (chunksize == 0) {
2123                 if (ferror(f->f_fp)) {
2124                         PyErr_SetFromErrno(PyExc_IOError);
2125                         clearerr(f->f_fp);
2126                         drop_readahead(f);
2127                         return -1;
2128                 }
2129         }
2130         f->f_bufptr = f->f_buf;
2131         f->f_bufend = f->f_buf + chunksize;
2132         return 0;
2133 }
2134
2135 /* Used by file_iternext.  The returned string will start with 'skip'
2136    uninitialized bytes followed by the remainder of the line. Don't be
2137    horrified by the recursive call: maximum recursion depth is limited by
2138    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2139
2140 static PyStringObject *
2141 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2142 {
2143         PyStringObject* s;
2144         char *bufptr;
2145         char *buf;
2146         Py_ssize_t len;
2147
2148         if (f->f_buf == NULL)
2149                 if (readahead(f, bufsize) < 0)
2150                         return NULL;
2151
2152         len = f->f_bufend - f->f_bufptr;
2153         if (len == 0)
2154                 return (PyStringObject *)
2155                         PyString_FromStringAndSize(NULL, skip);
2156         bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2157         if (bufptr != NULL) {
2158                 bufptr++;                       /* Count the '\n' */
2159                 len = bufptr - f->f_bufptr;
2160                 s = (PyStringObject *)
2161                         PyString_FromStringAndSize(NULL, skip+len);
2162                 if (s == NULL)
2163                         return NULL;
2164                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2165                 f->f_bufptr = bufptr;
2166                 if (bufptr == f->f_bufend)
2167                         drop_readahead(f);
2168         } else {
2169                 bufptr = f->f_bufptr;
2170                 buf = f->f_buf;
2171                 f->f_buf = NULL;        /* Force new readahead buffer */
2172                 assert(skip+len < INT_MAX);
2173                 s = readahead_get_line_skip(
2174                         f, (int)(skip+len), bufsize + (bufsize>>2) );
2175                 if (s == NULL) {
2176                         PyMem_Free(buf);
2177                         return NULL;
2178                 }
2179                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2180                 PyMem_Free(buf);
2181         }
2182         return s;
2183 }
2184
2185 /* A larger buffer size may actually decrease performance. */
2186 #define READAHEAD_BUFSIZE 8192
2187
2188 static PyObject *
2189 file_iternext(PyFileObject *f)
2190 {
2191         PyStringObject* l;
2192
2193         if (f->f_fp == NULL)
2194                 return err_closed();
2195
2196         l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2197         if (l == NULL || PyString_GET_SIZE(l) == 0) {
2198                 Py_XDECREF(l);
2199                 return NULL;
2200         }
2201         return (PyObject *)l;
2202 }
2203
2204
2205 static PyObject *
2206 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2207 {
2208         PyObject *self;
2209         static PyObject *not_yet_string;
2210
2211         assert(type != NULL && type->tp_alloc != NULL);
2212
2213         if (not_yet_string == NULL) {
2214                 not_yet_string = PyString_InternFromString("<uninitialized file>");
2215                 if (not_yet_string == NULL)
2216                         return NULL;
2217         }
2218
2219         self = type->tp_alloc(type, 0);
2220         if (self != NULL) {
2221                 /* Always fill in the name and mode, so that nobody else
2222                    needs to special-case NULLs there. */
2223                 Py_INCREF(not_yet_string);
2224                 ((PyFileObject *)self)->f_name = not_yet_string;
2225                 Py_INCREF(not_yet_string);
2226                 ((PyFileObject *)self)->f_mode = not_yet_string;
2227                 Py_INCREF(Py_None);
2228                 ((PyFileObject *)self)->f_encoding = Py_None;
2229                 Py_INCREF(Py_None);
2230                 ((PyFileObject *)self)->f_errors = Py_None;
2231                 ((PyFileObject *)self)->weakreflist = NULL;
2232                 ((PyFileObject *)self)->unlocked_count = 0;
2233         }
2234         return self;
2235 }
2236
2237 static int
2238 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2239 {
2240         PyFileObject *foself = (PyFileObject *)self;
2241         int ret = 0;
2242         static char *kwlist[] = {"name", "mode", "buffering", 0};
2243         char *name = NULL;
2244         char *mode = "r";
2245         int bufsize = -1;
2246         int wideargument = 0;
2247
2248         assert(PyFile_Check(self));
2249         if (foself->f_fp != NULL) {
2250                 /* Have to close the existing file first. */
2251                 PyObject *closeresult = file_close(foself);
2252                 if (closeresult == NULL)
2253                         return -1;
2254                 Py_DECREF(closeresult);
2255         }
2256
2257 #ifdef MS_WINDOWS
2258         if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
2259                 PyObject *po;
2260                 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2261                                                 kwlist, &po, &mode, &bufsize)) {
2262                         wideargument = 1;
2263                         if (fill_file_fields(foself, NULL, po, mode,
2264                                              fclose) == NULL)
2265                                 goto Error;
2266                 } else {
2267                         /* Drop the argument parsing error as narrow
2268                            strings are also valid. */
2269                         PyErr_Clear();
2270                 }
2271         }
2272 #endif
2273
2274         if (!wideargument) {
2275                 PyObject *o_name;
2276
2277                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2278                                                  Py_FileSystemDefaultEncoding,
2279                                                  &name,
2280                                                  &mode, &bufsize))
2281                         return -1;
2282
2283                 /* We parse again to get the name as a PyObject */
2284                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2285                                                  kwlist, &o_name, &mode,
2286                                                  &bufsize))
2287                         goto Error;
2288
2289                 if (fill_file_fields(foself, NULL, o_name, mode,
2290                                      fclose) == NULL)
2291                         goto Error;
2292         }
2293         if (open_the_file(foself, name, mode) == NULL)
2294                 goto Error;
2295         foself->f_setbuf = NULL;
2296         PyFile_SetBufSize(self, bufsize);
2297         goto Done;
2298
2299 Error:
2300         ret = -1;
2301         /* fall through */
2302 Done:
2303         PyMem_Free(name); /* free the encoded string */
2304         return ret;
2305 }
2306
2307 PyDoc_VAR(file_doc) =
2308 PyDoc_STR(
2309 "file(name[, mode[, buffering]]) -> file object\n"
2310 "\n"
2311 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2312 "writing or appending.  The file will be created if it doesn't exist\n"
2313 "when opened for writing or appending; it will be truncated when\n"
2314 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2315 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2316 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2317 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2318 "to open a file is with the builtin open() function.\n"
2319 )
2320 PyDoc_STR(
2321 "Add a 'U' to mode to open the file for input with universal newline\n"
2322 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2323 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2324 "the value for this attribute is one of None (no newline read yet),\n"
2325 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2326 "\n"
2327 "'U' cannot be combined with 'w' or '+' mode.\n"
2328 );
2329
2330 PyTypeObject PyFile_Type = {
2331         PyVarObject_HEAD_INIT(&PyType_Type, 0)
2332         "file",
2333         sizeof(PyFileObject),
2334         0,
2335         (destructor)file_dealloc,               /* tp_dealloc */
2336         0,                                      /* tp_print */
2337         0,                                      /* tp_getattr */
2338         0,                                      /* tp_setattr */
2339         0,                                      /* tp_compare */
2340         (reprfunc)file_repr,                    /* tp_repr */
2341         0,                                      /* tp_as_number */
2342         0,                                      /* tp_as_sequence */
2343         0,                                      /* tp_as_mapping */
2344         0,                                      /* tp_hash */
2345         0,                                      /* tp_call */
2346         0,                                      /* tp_str */
2347         PyObject_GenericGetAttr,                /* tp_getattro */
2348         /* softspace is writable:  we must supply tp_setattro */
2349         PyObject_GenericSetAttr,                /* tp_setattro */
2350         0,                                      /* tp_as_buffer */
2351         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2352         file_doc,                               /* tp_doc */
2353         0,                                      /* tp_traverse */
2354         0,                                      /* tp_clear */
2355         0,                                      /* tp_richcompare */
2356         offsetof(PyFileObject, weakreflist),    /* tp_weaklistoffset */
2357         (getiterfunc)file_self,                 /* tp_iter */
2358         (iternextfunc)file_iternext,            /* tp_iternext */
2359         file_methods,                           /* tp_methods */
2360         file_memberlist,                        /* tp_members */
2361         file_getsetlist,                        /* tp_getset */
2362         0,                                      /* tp_base */
2363         0,                                      /* tp_dict */
2364         0,                                      /* tp_descr_get */
2365         0,                                      /* tp_descr_set */
2366         0,                                      /* tp_dictoffset */
2367         file_init,                              /* tp_init */
2368         PyType_GenericAlloc,                    /* tp_alloc */
2369         file_new,                               /* tp_new */
2370         PyObject_Del,                           /* tp_free */
2371 };
2372
2373 /* Interface for the 'soft space' between print items. */
2374
2375 int
2376 PyFile_SoftSpace(PyObject *f, int newflag)
2377 {
2378         long oldflag = 0;
2379         if (f == NULL) {
2380                 /* Do nothing */
2381         }
2382         else if (PyFile_Check(f)) {
2383                 oldflag = ((PyFileObject *)f)->f_softspace;
2384                 ((PyFileObject *)f)->f_softspace = newflag;
2385         }
2386         else {
2387                 PyObject *v;
2388                 v = PyObject_GetAttrString(f, "softspace");
2389                 if (v == NULL)
2390                         PyErr_Clear();
2391                 else {
2392                         if (PyInt_Check(v))
2393                                 oldflag = PyInt_AsLong(v);
2394                         assert(oldflag < INT_MAX);
2395                         Py_DECREF(v);
2396                 }
2397                 v = PyInt_FromLong((long)newflag);
2398                 if (v == NULL)
2399                         PyErr_Clear();
2400                 else {
2401                         if (PyObject_SetAttrString(f, "softspace", v) != 0)
2402                                 PyErr_Clear();
2403                         Py_DECREF(v);
2404                 }
2405         }
2406         return (int)oldflag;
2407 }
2408
2409 /* Interfaces to write objects/strings to file-like objects */
2410
2411 int
2412 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2413 {
2414         PyObject *writer, *value, *args, *result;
2415         if (f == NULL) {
2416                 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2417                 return -1;
2418         }
2419         else if (PyFile_Check(f)) {
2420                 PyFileObject *fobj = (PyFileObject *) f;
2421 #ifdef Py_USING_UNICODE
2422                 PyObject *enc = fobj->f_encoding;
2423                 int result;
2424 #endif
2425                 if (fobj->f_fp == NULL) {
2426                         err_closed();
2427                         return -1;
2428                 }
2429 #ifdef Py_USING_UNICODE
2430                 if ((flags & Py_PRINT_RAW) &&
2431                     PyUnicode_Check(v) && enc != Py_None) {
2432                         char *cenc = PyString_AS_STRING(enc);
2433                         char *errors = fobj->f_errors == Py_None ?
2434                           "strict" : PyString_AS_STRING(fobj->f_errors);
2435                         value = PyUnicode_AsEncodedString(v, cenc, errors);
2436                         if (value == NULL)
2437                                 return -1;
2438                 } else {
2439                         value = v;
2440                         Py_INCREF(value);
2441                 }
2442                 result = file_PyObject_Print(value, fobj, flags);
2443                 Py_DECREF(value);
2444                 return result;
2445 #else
2446                 return file_PyObject_Print(v, fobj, flags);
2447 #endif
2448         }
2449         writer = PyObject_GetAttrString(f, "write");
2450         if (writer == NULL)
2451                 return -1;
2452         if (flags & Py_PRINT_RAW) {
2453                 if (PyUnicode_Check(v)) {
2454                         value = v;
2455                         Py_INCREF(value);
2456                 } else
2457                         value = PyObject_Str(v);
2458         }
2459         else
2460                 value = PyObject_Repr(v);
2461         if (value == NULL) {
2462                 Py_DECREF(writer);
2463                 return -1;
2464         }
2465         args = PyTuple_Pack(1, value);
2466         if (args == NULL) {
2467                 Py_DECREF(value);
2468                 Py_DECREF(writer);
2469                 return -1;
2470         }
2471         result = PyEval_CallObject(writer, args);
2472         Py_DECREF(args);
2473         Py_DECREF(value);
2474         Py_DECREF(writer);
2475         if (result == NULL)
2476                 return -1;
2477         Py_DECREF(result);
2478         return 0;
2479 }
2480
2481 int
2482 PyFile_WriteString(const char *s, PyObject *f)
2483 {
2484
2485         if (f == NULL) {
2486                 /* Should be caused by a pre-existing error */
2487                 if (!PyErr_Occurred())
2488                         PyErr_SetString(PyExc_SystemError,
2489                                         "null file for PyFile_WriteString");
2490                 return -1;
2491         }
2492         else if (PyFile_Check(f)) {
2493                 PyFileObject *fobj = (PyFileObject *) f;
2494                 FILE *fp = PyFile_AsFile(f);
2495                 if (fp == NULL) {
2496                         err_closed();
2497                         return -1;
2498                 }
2499                 FILE_BEGIN_ALLOW_THREADS(fobj)
2500                 fputs(s, fp);
2501                 FILE_END_ALLOW_THREADS(fobj)
2502                 return 0;
2503         }
2504         else if (!PyErr_Occurred()) {
2505                 PyObject *v = PyString_FromString(s);
2506                 int err;
2507                 if (v == NULL)
2508                         return -1;
2509                 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2510                 Py_DECREF(v);
2511                 return err;
2512         }
2513         else
2514                 return -1;
2515 }
2516
2517 /* Try to get a file-descriptor from a Python object.  If the object
2518    is an integer or long integer, its value is returned.  If not, the
2519    object's fileno() method is called if it exists; the method must return
2520    an integer or long integer, which is returned as the file descriptor value.
2521    -1 is returned on failure.
2522 */
2523
2524 int PyObject_AsFileDescriptor(PyObject *o)
2525 {
2526         int fd;
2527         PyObject *meth;
2528
2529         if (PyInt_Check(o)) {
2530                 fd = PyInt_AsLong(o);
2531         }
2532         else if (PyLong_Check(o)) {
2533                 fd = PyLong_AsLong(o);
2534         }
2535         else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2536         {
2537                 PyObject *fno = PyEval_CallObject(meth, NULL);
2538                 Py_DECREF(meth);
2539                 if (fno == NULL)
2540                         return -1;
2541
2542                 if (PyInt_Check(fno)) {
2543                         fd = PyInt_AsLong(fno);
2544                         Py_DECREF(fno);
2545                 }
2546                 else if (PyLong_Check(fno)) {
2547                         fd = PyLong_AsLong(fno);
2548                         Py_DECREF(fno);
2549                 }
2550                 else {
2551                         PyErr_SetString(PyExc_TypeError,
2552                                         "fileno() returned a non-integer");
2553                         Py_DECREF(fno);
2554                         return -1;
2555                 }
2556         }
2557         else {
2558                 PyErr_SetString(PyExc_TypeError,
2559                                 "argument must be an int, or have a fileno() method.");
2560                 return -1;
2561         }
2562
2563         if (fd < 0) {
2564                 PyErr_Format(PyExc_ValueError,
2565                              "file descriptor cannot be a negative integer (%i)",
2566                              fd);
2567                 return -1;
2568         }
2569         return fd;
2570 }
2571
2572 /* From here on we need access to the real fgets and fread */
2573 #undef fgets
2574 #undef fread
2575
2576 /*
2577 ** Py_UniversalNewlineFgets is an fgets variation that understands
2578 ** all of \r, \n and \r\n conventions.
2579 ** The stream should be opened in binary mode.
2580 ** If fobj is NULL the routine always does newline conversion, and
2581 ** it may peek one char ahead to gobble the second char in \r\n.
2582 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2583 ** is no readahead but in stead a flag is used to skip a following
2584 ** \n on the next read. Also, if the file is open in binary mode
2585 ** the whole conversion is skipped. Finally, the routine keeps track of
2586 ** the different types of newlines seen.
2587 ** Note that we need no error handling: fgets() treats error and eof
2588 ** identically.
2589 */
2590 char *
2591 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2592 {
2593         char *p = buf;
2594         int c;
2595         int newlinetypes = 0;
2596         int skipnextlf = 0;
2597         int univ_newline = 1;
2598
2599         if (fobj) {
2600                 if (!PyFile_Check(fobj)) {
2601                         errno = ENXIO;  /* What can you do... */
2602                         return NULL;
2603                 }
2604                 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2605                 if ( !univ_newline )
2606                         return fgets(buf, n, stream);
2607                 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2608                 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2609         }
2610         FLOCKFILE(stream);
2611         c = 'x'; /* Shut up gcc warning */
2612         while (--n > 0 && (c = GETC(stream)) != EOF ) {
2613                 if (skipnextlf ) {
2614                         skipnextlf = 0;
2615                         if (c == '\n') {
2616                                 /* Seeing a \n here with skipnextlf true
2617                                 ** means we saw a \r before.
2618                                 */
2619                                 newlinetypes |= NEWLINE_CRLF;
2620                                 c = GETC(stream);
2621                                 if (c == EOF) break;
2622                         } else {
2623                                 /*
2624                                 ** Note that c == EOF also brings us here,
2625                                 ** so we're okay if the last char in the file
2626                                 ** is a CR.
2627                                 */
2628                                 newlinetypes |= NEWLINE_CR;
2629                         }
2630                 }
2631                 if (c == '\r') {
2632                         /* A \r is translated into a \n, and we skip
2633                         ** an adjacent \n, if any. We don't set the
2634                         ** newlinetypes flag until we've seen the next char.
2635                         */
2636                         skipnextlf = 1;
2637                         c = '\n';
2638                 } else if ( c == '\n') {
2639                         newlinetypes |= NEWLINE_LF;
2640                 }
2641                 *p++ = c;
2642                 if (c == '\n') break;
2643         }
2644         if ( c == EOF && skipnextlf )
2645                 newlinetypes |= NEWLINE_CR;
2646         FUNLOCKFILE(stream);
2647         *p = '\0';
2648         if (fobj) {
2649                 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2650                 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2651         } else if ( skipnextlf ) {
2652                 /* If we have no file object we cannot save the
2653                 ** skipnextlf flag. We have to readahead, which
2654                 ** will cause a pause if we're reading from an
2655                 ** interactive stream, but that is very unlikely
2656                 ** unless we're doing something silly like
2657                 ** execfile("/dev/tty").
2658                 */
2659                 c = GETC(stream);
2660                 if ( c != '\n' )
2661                         ungetc(c, stream);
2662         }
2663         if (p == buf)
2664                 return NULL;
2665         return buf;
2666 }
2667
2668 /*
2669 ** Py_UniversalNewlineFread is an fread variation that understands
2670 ** all of \r, \n and \r\n conventions.
2671 ** The stream should be opened in binary mode.
2672 ** fobj must be a PyFileObject. In this case there
2673 ** is no readahead but in stead a flag is used to skip a following
2674 ** \n on the next read. Also, if the file is open in binary mode
2675 ** the whole conversion is skipped. Finally, the routine keeps track of
2676 ** the different types of newlines seen.
2677 */
2678 size_t
2679 Py_UniversalNewlineFread(char *buf, size_t n,
2680                          FILE *stream, PyObject *fobj)
2681 {
2682         char *dst = buf;
2683         PyFileObject *f = (PyFileObject *)fobj;
2684         int newlinetypes, skipnextlf;
2685
2686         assert(buf != NULL);
2687         assert(stream != NULL);
2688
2689         if (!fobj || !PyFile_Check(fobj)) {
2690                 errno = ENXIO;  /* What can you do... */
2691                 return 0;
2692         }
2693         if (!f->f_univ_newline)
2694                 return fread(buf, 1, n, stream);
2695         newlinetypes = f->f_newlinetypes;
2696         skipnextlf = f->f_skipnextlf;
2697         /* Invariant:  n is the number of bytes remaining to be filled
2698          * in the buffer.
2699          */
2700         while (n) {
2701                 size_t nread;
2702                 int shortread;
2703                 char *src = dst;
2704
2705                 nread = fread(dst, 1, n, stream);
2706                 assert(nread <= n);
2707                 if (nread == 0)
2708                         break;
2709
2710                 n -= nread; /* assuming 1 byte out for each in; will adjust */
2711                 shortread = n != 0;     /* true iff EOF or error */
2712                 while (nread--) {
2713                         char c = *src++;
2714                         if (c == '\r') {
2715                                 /* Save as LF and set flag to skip next LF. */
2716                                 *dst++ = '\n';
2717                                 skipnextlf = 1;
2718                         }
2719                         else if (skipnextlf && c == '\n') {
2720                                 /* Skip LF, and remember we saw CR LF. */
2721                                 skipnextlf = 0;
2722                                 newlinetypes |= NEWLINE_CRLF;
2723                                 ++n;
2724                         }
2725                         else {
2726                                 /* Normal char to be stored in buffer.  Also
2727                                  * update the newlinetypes flag if either this
2728                                  * is an LF or the previous char was a CR.
2729                                  */
2730                                 if (c == '\n')
2731                                         newlinetypes |= NEWLINE_LF;
2732                                 else if (skipnextlf)
2733                                         newlinetypes |= NEWLINE_CR;
2734                                 *dst++ = c;
2735                                 skipnextlf = 0;
2736                         }
2737                 }
2738                 if (shortread) {
2739                         /* If this is EOF, update type flags. */
2740                         if (skipnextlf && feof(stream))
2741                                 newlinetypes |= NEWLINE_CR;
2742                         break;
2743                 }
2744         }
2745         f->f_newlinetypes = newlinetypes;
2746         f->f_skipnextlf = skipnextlf;
2747         return dst - buf;
2748 }
2749
2750 #ifdef __cplusplus
2751 }
2752 #endif