Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #include "Python.h"
   4 #include "structmember.h"
   5
   6 #ifndef DONT_HAVE_SYS_TYPES_H
   7 #include <sys/types.h>
   8 #endif /* DONT_HAVE_SYS_TYPES_H */
   9
  10 #ifdef MS_WINDOWS
  11 #define fileno _fileno
  12 /* can simulate truncate with Win32 API functions; see file_truncate */
  13 #define HAVE_FTRUNCATE
  14 #define WIN32_LEAN_AND_MEAN
  15 #include <windows.h>
  16 #endif
  17
  18 #ifdef _MSC_VER
  19 /* Need GetVersion to see if on NT so safe to use _wfopen */
  20 #define WIN32_LEAN_AND_MEAN
  21 #include <windows.h>
  22 #endif /* _MSC_VER */
  23
  24 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  25 #include <io.h>
  26 #endif
  27
  28 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  29
  30 #ifndef DONT_HAVE_ERRNO_H
  31 #include <errno.h>
  32 #endif
  33
  34 #ifdef HAVE_GETC_UNLOCKED
  35 #define GETC(f) getc_unlocked(f)
  36 #define FLOCKFILE(f) flockfile(f)
  37 #define FUNLOCKFILE(f) funlockfile(f)
  38 #else
  39 #define GETC(f) getc(f)
  40 #define FLOCKFILE(f)
  41 #define FUNLOCKFILE(f)
  42 #endif
  43
  44 /* Bits in f_newlinetypes */
  45 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  46 #define NEWLINE_CR 1            /* \r newline seen */
  47 #define NEWLINE_LF 2            /* \n newline seen */
  48 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  49
  50 FILE *
  51 PyFile_AsFile(PyObject *f)
  52 {
  53         if (f == NULL || !PyFile_Check(f))
  54                 return NULL;
  55         else
  56                 return ((PyFileObject *)f)->f_fp;
  57 }
  58
  59 PyObject *
  60 PyFile_Name(PyObject *f)
  61 {
  62         if (f == NULL || !PyFile_Check(f))
  63                 return NULL;
  64         else
  65                 return ((PyFileObject *)f)->f_name;
  66 }
  67
  68 /* On Unix, fopen will succeed for directories.
  69    In Python, there should be no file objects referring to
  70    directories, so we need a check.  */
  71
  72 static PyFileObject*
  73 dircheck(PyFileObject* f)
  74 {
  75 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
  76         struct stat buf;
  77         if (f->f_fp == NULL)
  78                 return f;
  79         if (fstat(fileno(f->f_fp), &buf) == 0 &&
  80             S_ISDIR(buf.st_mode)) {
  81 #ifdef HAVE_STRERROR
  82                 char *msg = strerror(EISDIR);
  83 #else
  84                 char *msg = "Is a directory";
  85 #endif
  86                 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
  87                                                       EISDIR, msg);
  88                 PyErr_SetObject(PyExc_IOError, exc);
  89                 Py_XDECREF(exc);
  90                 return NULL;
  91         }
  92 #endif
  93         return f;
  94 }
  95
  96
  97 static PyObject *
  98 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
  99                  int (*close)(FILE *))
 100 {
 101         assert(f != NULL);
 102         assert(PyFile_Check(f));
 103         assert(f->f_fp == NULL);
 104
 105         Py_DECREF(f->f_name);
 106         Py_DECREF(f->f_mode);
 107         Py_DECREF(f->f_encoding);
 108
 109         Py_INCREF (name);
 110         f->f_name = name;
 111
 112         f->f_mode = PyString_FromString(mode);
 113
 114         f->f_close = close;
 115         f->f_softspace = 0;
 116         f->f_binary = strchr(mode,'b') != NULL;
 117         f->f_buf = NULL;
 118         f->f_univ_newline = (strchr(mode, 'U') != NULL);
 119         f->f_newlinetypes = NEWLINE_UNKNOWN;
 120         f->f_skipnextlf = 0;
 121         Py_INCREF(Py_None);
 122         f->f_encoding = Py_None;
 123
 124         if (f->f_name == NULL || f->f_mode == NULL)
 125                 return NULL;
 126         f->f_fp = fp;
 127         f = dircheck(f);
 128         return (PyObject *) f;
 129 }
 130
 131 /* check for known incorrect mode strings - problem is, platforms are
 132    free to accept any mode characters they like and are supposed to
 133    ignore stuff they don't understand... write or append mode with
 134    universal newline support is expressly forbidden by PEP 278. */
 135 /* zero return is kewl - one is un-kewl */
 136 static int
 137 check_the_mode(char *mode)
 138 {
 139         size_t len = strlen(mode);
 140
 141         switch (len) {
 142         case 0:
 143                 PyErr_SetString(PyExc_ValueError, "empty mode string");
 144                 return 1;
 145
 146         /* reject wU, aU */
 147         case 2:
 148                 switch (mode[0]) {
 149                 case 'w':
 150                 case 'a':
 151                         if (mode[1] == 'U') {
 152                                 PyErr_SetString(PyExc_ValueError,
 153                                                 "invalid mode string");
 154                                 return 1;
 155                         }
 156                         break;
 157                 }
 158                 break;
 159
 160         /* reject w+U, a+U, wU+, aU+ */
 161         case 3:
 162                 switch (mode[0]) {
 163                 case 'w':
 164                 case 'a':
 165                         if ((mode[1] == '+' && mode[2] == 'U') ||
 166                             (mode[1] == 'U' && mode[2] == '+')) {
 167                                 PyErr_SetString(PyExc_ValueError,
 168                                                 "invalid mode string");
 169                                 return 1;
 170                         }
 171                         break;
 172                 }
 173                 break;
 174         }
 175
 176         return 0;
 177 }
 178
 179 static PyObject *
 180 open_the_file(PyFileObject *f, char *name, char *mode)
 181 {
 182         assert(f != NULL);
 183         assert(PyFile_Check(f));
 184 #ifdef MS_WINDOWS
 185         /* windows ignores the passed name in order to support Unicode */
 186         assert(f->f_name != NULL);
 187 #else
 188         assert(name != NULL);
 189 #endif
 190         assert(mode != NULL);
 191         assert(f->f_fp == NULL);
 192
 193         if (check_the_mode(mode))
 194                 return NULL;
 195
 196         /* rexec.py can't stop a user from getting the file() constructor --
 197            all they have to do is get *any* file object f, and then do
 198            type(f).  Here we prevent them from doing damage with it. */
 199         if (PyEval_GetRestricted()) {
 200                 PyErr_SetString(PyExc_IOError,
 201                 "file() constructor not accessible in restricted mode");
 202                 return NULL;
 203         }
 204         errno = 0;
 205
 206         if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
 207                 mode = "rb";
 208 #ifdef MS_WINDOWS
 209         if (PyUnicode_Check(f->f_name)) {
 210                 PyObject *wmode;
 211                 wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
 212                 if (f->f_name && wmode) {
 213                         Py_BEGIN_ALLOW_THREADS
 214                         /* PyUnicode_AS_UNICODE OK without thread
 215                            lock as it is a simple dereference. */
 216                         f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 217                                           PyUnicode_AS_UNICODE(wmode));
 218                         Py_END_ALLOW_THREADS
 219                 }
 220                 Py_XDECREF(wmode);
 221         }
 222 #endif
 223         if (NULL == f->f_fp && NULL != name) {
 224                 Py_BEGIN_ALLOW_THREADS
 225                 f->f_fp = fopen(name, mode);
 226                 Py_END_ALLOW_THREADS
 227         }
 228
 229         if (f->f_fp == NULL) {
 230 #ifdef _MSC_VER
 231                 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 232                  * across all Windows flavors.  When it sets EINVAL varies
 233                  * across Windows flavors, the exact conditions aren't
 234                  * documented, and the answer lies in the OS's implementation
 235                  * of Win32's CreateFile function (whose source is secret).
 236                  * Seems the best we can do is map EINVAL to ENOENT.
 237                  */
 238                 if (errno == 0) /* bad mode string */
 239                         errno = EINVAL;
 240                 else if (errno == EINVAL) /* unknown, but not a mode string */
 241                         errno = ENOENT;
 242 #endif
 243                 if (errno == EINVAL)
 244                         PyErr_Format(PyExc_IOError, "invalid mode: %s",
 245                                      mode);
 246                 else
 247                         PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 248                 f = NULL;
 249         }
 250         if (f != NULL)
 251                 f = dircheck(f);
 252         return (PyObject *)f;
 253 }
 254
 255 PyObject *
 256 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 257 {
 258         PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 259                                                              NULL, NULL);
 260         if (f != NULL) {
 261                 PyObject *o_name = PyString_FromString(name);
 262                 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 263                         Py_DECREF(f);
 264                         f = NULL;
 265                 }
 266                 Py_DECREF(o_name);
 267         }
 268         return (PyObject *) f;
 269 }
 270
 271 PyObject *
 272 PyFile_FromString(char *name, char *mode)
 273 {
 274         extern int fclose(FILE *);
 275         PyFileObject *f;
 276
 277         f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 278         if (f != NULL) {
 279                 if (open_the_file(f, name, mode) == NULL) {
 280                         Py_DECREF(f);
 281                         f = NULL;
 282                 }
 283         }
 284         return (PyObject *)f;
 285 }
 286
 287 void
 288 PyFile_SetBufSize(PyObject *f, int bufsize)
 289 {
 290         PyFileObject *file = (PyFileObject *)f;
 291         if (bufsize >= 0) {
 292                 int type;
 293                 switch (bufsize) {
 294                 case 0:
 295                         type = _IONBF;
 296                         break;
 297 #ifdef HAVE_SETVBUF
 298                 case 1:
 299                         type = _IOLBF;
 300                         bufsize = BUFSIZ;
 301                         break;
 302 #endif
 303                 default:
 304                         type = _IOFBF;
 305 #ifndef HAVE_SETVBUF
 306                         bufsize = BUFSIZ;
 307 #endif
 308                         break;
 309                 }
 310                 fflush(file->f_fp);
 311                 if (type == _IONBF) {
 312                         PyMem_Free(file->f_setbuf);
 313                         file->f_setbuf = NULL;
 314                 } else {
 315                         file->f_setbuf = PyMem_Realloc(file->f_setbuf, bufsize);
 316                 }
 317 #ifdef HAVE_SETVBUF
 318                 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 319 #else /* !HAVE_SETVBUF */
 320                 setbuf(file->f_fp, file->f_setbuf);
 321 #endif /* !HAVE_SETVBUF */
 322         }
 323 }
 324
 325 /* Set the encoding used to output Unicode strings.
 326    Returh 1 on success, 0 on failure. */
 327
 328 int
 329 PyFile_SetEncoding(PyObject *f, const char *enc)
 330 {
 331         PyFileObject *file = (PyFileObject*)f;
 332         PyObject *str = PyString_FromString(enc);
 333         if (!str)
 334                 return 0;
 335         Py_DECREF(file->f_encoding);
 336         file->f_encoding = str;
 337         return 1;
 338 }
 339
 340 static PyObject *
 341 err_closed(void)
 342 {
 343         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 344         return NULL;
 345 }
 346
 347 static void drop_readahead(PyFileObject *);
 348
 349 /* Methods */
 350
 351 static void
 352 file_dealloc(PyFileObject *f)
 353 {
 354         int sts = 0;
 355         if (f->weakreflist != NULL)
 356                 PyObject_ClearWeakRefs((PyObject *) f);
 357         if (f->f_fp != NULL && f->f_close != NULL) {
 358                 Py_BEGIN_ALLOW_THREADS
 359                 sts = (*f->f_close)(f->f_fp);
 360                 Py_END_ALLOW_THREADS
 361                 if (sts == EOF)
 362 #ifdef HAVE_STRERROR
 363                         PySys_WriteStderr("close failed: [Errno %d] %s\n", errno, strerror(errno));
 364 #else
 365                         PySys_WriteStderr("close failed: [Errno %d]\n", errno);
 366 #endif
 367         }
 368         PyMem_Free(f->f_setbuf);
 369         Py_XDECREF(f->f_name);
 370         Py_XDECREF(f->f_mode);
 371         Py_XDECREF(f->f_encoding);
 372         drop_readahead(f);
 373         f->ob_type->tp_free((PyObject *)f);
 374 }
 375
 376 static PyObject *
 377 file_repr(PyFileObject *f)
 378 {
 379         if (PyUnicode_Check(f->f_name)) {
 380 #ifdef Py_USING_UNICODE
 381                 PyObject *ret = NULL;
 382                 PyObject *name;
 383                 name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 384                 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 385                                    f->f_fp == NULL ? "closed" : "open",
 386                                    PyString_AsString(name),
 387                                    PyString_AsString(f->f_mode),
 388                                    f);
 389                 Py_XDECREF(name);
 390                 return ret;
 391 #endif
 392         } else {
 393                 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 394                                    f->f_fp == NULL ? "closed" : "open",
 395                                    PyString_AsString(f->f_name),
 396                                    PyString_AsString(f->f_mode),
 397                                    f);
 398         }
 399 }
 400
 401 static PyObject *
 402 file_close(PyFileObject *f)
 403 {
 404         int sts = 0;
 405         if (f->f_fp != NULL) {
 406                 if (f->f_close != NULL) {
 407                         Py_BEGIN_ALLOW_THREADS
 408                         errno = 0;
 409                         sts = (*f->f_close)(f->f_fp);
 410                         Py_END_ALLOW_THREADS
 411                 }
 412                 f->f_fp = NULL;
 413         }
 414         PyMem_Free(f->f_setbuf);
 415         f->f_setbuf = NULL;
 416         if (sts == EOF)
 417                 return PyErr_SetFromErrno(PyExc_IOError);
 418         if (sts != 0)
 419                 return PyInt_FromLong((long)sts);
 420         Py_INCREF(Py_None);
 421         return Py_None;
 422 }
 423
 424
 425 /* Our very own off_t-like type, 64-bit if possible */
 426 #if !defined(HAVE_LARGEFILE_SUPPORT)
 427 typedef off_t Py_off_t;
 428 #elif SIZEOF_OFF_T >= 8
 429 typedef off_t Py_off_t;
 430 #elif SIZEOF_FPOS_T >= 8
 431 typedef fpos_t Py_off_t;
 432 #else
 433 #error "Large file support, but neither off_t nor fpos_t is large enough."
 434 #endif
 435
 436
 437 /* a portable fseek() function
 438    return 0 on success, non-zero on failure (with errno set) */
 439 static int
 440 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 441 {
 442 #if !defined(HAVE_LARGEFILE_SUPPORT)
 443         return fseek(fp, offset, whence);
 444 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 445         return fseeko(fp, offset, whence);
 446 #elif defined(HAVE_FSEEK64)
 447         return fseek64(fp, offset, whence);
 448 #elif defined(__BEOS__)
 449         return _fseek(fp, offset, whence);
 450 #elif SIZEOF_FPOS_T >= 8
 451         /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 452            and fgetpos() to implement fseek()*/
 453         fpos_t pos;
 454         switch (whence) {
 455         case SEEK_END:
 456 #ifdef MS_WINDOWS
 457                 fflush(fp);
 458                 if (_lseeki64(fileno(fp), 0, 2) == -1)
 459                         return -1;
 460 #else
 461                 if (fseek(fp, 0, SEEK_END) != 0)
 462                         return -1;
 463 #endif
 464                 /* fall through */
 465         case SEEK_CUR:
 466                 if (fgetpos(fp, &pos) != 0)
 467                         return -1;
 468                 offset += pos;
 469                 break;
 470         /* case SEEK_SET: break; */
 471         }
 472         return fsetpos(fp, &offset);
 473 #else
 474 #error "Large file support, but no way to fseek."
 475 #endif
 476 }
 477
 478
 479 /* a portable ftell() function
 480    Return -1 on failure with errno set appropriately, current file
 481    position on success */
 482 static Py_off_t
 483 _portable_ftell(FILE* fp)
 484 {
 485 #if !defined(HAVE_LARGEFILE_SUPPORT)
 486         return ftell(fp);
 487 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 488         return ftello(fp);
 489 #elif defined(HAVE_FTELL64)
 490         return ftell64(fp);
 491 #elif SIZEOF_FPOS_T >= 8
 492         fpos_t pos;
 493         if (fgetpos(fp, &pos) != 0)
 494                 return -1;
 495         return pos;
 496 #else
 497 #error "Large file support, but no way to ftell."
 498 #endif
 499 }
 500
 501
 502 static PyObject *
 503 file_seek(PyFileObject *f, PyObject *args)
 504 {
 505         int whence;
 506         int ret;
 507         Py_off_t offset;
 508         PyObject *offobj;
 509
 510         if (f->f_fp == NULL)
 511                 return err_closed();
 512         drop_readahead(f);
 513         whence = 0;
 514         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 515                 return NULL;
 516 #if !defined(HAVE_LARGEFILE_SUPPORT)
 517         offset = PyInt_AsLong(offobj);
 518 #else
 519         offset = PyLong_Check(offobj) ?
 520                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
 521 #endif
 522         if (PyErr_Occurred())
 523                 return NULL;
 524
 525         Py_BEGIN_ALLOW_THREADS
 526         errno = 0;
 527         ret = _portable_fseek(f->f_fp, offset, whence);
 528         Py_END_ALLOW_THREADS
 529
 530         if (ret != 0) {
 531                 PyErr_SetFromErrno(PyExc_IOError);
 532                 clearerr(f->f_fp);
 533                 return NULL;
 534         }
 535         f->f_skipnextlf = 0;
 536         Py_INCREF(Py_None);
 537         return Py_None;
 538 }
 539
 540
 541 #ifdef HAVE_FTRUNCATE
 542 static PyObject *
 543 file_truncate(PyFileObject *f, PyObject *args)
 544 {
 545         Py_off_t newsize;
 546         PyObject *newsizeobj = NULL;
 547         Py_off_t initialpos;
 548         int ret;
 549
 550         if (f->f_fp == NULL)
 551                 return err_closed();
 552         if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 553                 return NULL;
 554
 555         /* Get current file position.  If the file happens to be open for
 556          * update and the last operation was an input operation, C doesn't
 557          * define what the later fflush() will do, but we promise truncate()
 558          * won't change the current position (and fflush() *does* change it
 559          * then at least on Windows).  The easiest thing is to capture
 560          * current pos now and seek back to it at the end.
 561          */
 562         Py_BEGIN_ALLOW_THREADS
 563         errno = 0;
 564         initialpos = _portable_ftell(f->f_fp);
 565         Py_END_ALLOW_THREADS
 566         if (initialpos == -1)
 567                 goto onioerror;
 568
 569         /* Set newsize to current postion if newsizeobj NULL, else to the
 570          * specified value.
 571          */
 572         if (newsizeobj != NULL) {
 573 #if !defined(HAVE_LARGEFILE_SUPPORT)
 574                 newsize = PyInt_AsLong(newsizeobj);
 575 #else
 576                 newsize = PyLong_Check(newsizeobj) ?
 577                                 PyLong_AsLongLong(newsizeobj) :
 578                                 PyInt_AsLong(newsizeobj);
 579 #endif
 580                 if (PyErr_Occurred())
 581                         return NULL;
 582         }
 583         else /* default to current position */
 584                 newsize = initialpos;
 585
 586         /* Flush the stream.  We're mixing stream-level I/O with lower-level
 587          * I/O, and a flush may be necessary to synch both platform views
 588          * of the current file state.
 589          */
 590         Py_BEGIN_ALLOW_THREADS
 591         errno = 0;
 592         ret = fflush(f->f_fp);
 593         Py_END_ALLOW_THREADS
 594         if (ret != 0)
 595                 goto onioerror;
 596
 597 #ifdef MS_WINDOWS
 598         /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 599            so don't even try using it. */
 600         {
 601                 HANDLE hFile;
 602
 603                 /* Have to move current pos to desired endpoint on Windows. */
 604                 Py_BEGIN_ALLOW_THREADS
 605                 errno = 0;
 606                 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 607                 Py_END_ALLOW_THREADS
 608                 if (ret)
 609                         goto onioerror;
 610
 611                 /* Truncate.  Note that this may grow the file! */
 612                 Py_BEGIN_ALLOW_THREADS
 613                 errno = 0;
 614                 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 615                 ret = hFile == (HANDLE)-1;
 616                 if (ret == 0) {
 617                         ret = SetEndOfFile(hFile) == 0;
 618                         if (ret)
 619                                 errno = EACCES;
 620                 }
 621                 Py_END_ALLOW_THREADS
 622                 if (ret)
 623                         goto onioerror;
 624         }
 625 #else
 626         Py_BEGIN_ALLOW_THREADS
 627         errno = 0;
 628         ret = ftruncate(fileno(f->f_fp), newsize);
 629         Py_END_ALLOW_THREADS
 630         if (ret != 0)
 631                 goto onioerror;
 632 #endif /* !MS_WINDOWS */
 633
 634         /* Restore original file position. */
 635         Py_BEGIN_ALLOW_THREADS
 636         errno = 0;
 637         ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 638         Py_END_ALLOW_THREADS
 639         if (ret)
 640                 goto onioerror;
 641
 642         Py_INCREF(Py_None);
 643         return Py_None;
 644
 645 onioerror:
 646         PyErr_SetFromErrno(PyExc_IOError);
 647         clearerr(f->f_fp);
 648         return NULL;
 649 }
 650 #endif /* HAVE_FTRUNCATE */
 651
 652 static PyObject *
 653 file_tell(PyFileObject *f)
 654 {
 655         Py_off_t pos;
 656
 657         if (f->f_fp == NULL)
 658                 return err_closed();
 659         Py_BEGIN_ALLOW_THREADS
 660         errno = 0;
 661         pos = _portable_ftell(f->f_fp);
 662         Py_END_ALLOW_THREADS
 663         if (pos == -1) {
 664                 PyErr_SetFromErrno(PyExc_IOError);
 665                 clearerr(f->f_fp);
 666                 return NULL;
 667         }
 668         if (f->f_skipnextlf) {
 669                 int c;
 670                 c = GETC(f->f_fp);
 671                 if (c == '\n') {
 672                         pos++;
 673                         f->f_skipnextlf = 0;
 674                 } else if (c != EOF) ungetc(c, f->f_fp);
 675         }
 676 #if !defined(HAVE_LARGEFILE_SUPPORT)
 677         return PyInt_FromLong(pos);
 678 #else
 679         return PyLong_FromLongLong(pos);
 680 #endif
 681 }
 682
 683 static PyObject *
 684 file_fileno(PyFileObject *f)
 685 {
 686         if (f->f_fp == NULL)
 687                 return err_closed();
 688         return PyInt_FromLong((long) fileno(f->f_fp));
 689 }
 690
 691 static PyObject *
 692 file_flush(PyFileObject *f)
 693 {
 694         int res;
 695
 696         if (f->f_fp == NULL)
 697                 return err_closed();
 698         Py_BEGIN_ALLOW_THREADS
 699         errno = 0;
 700         res = fflush(f->f_fp);
 701         Py_END_ALLOW_THREADS
 702         if (res != 0) {
 703                 PyErr_SetFromErrno(PyExc_IOError);
 704                 clearerr(f->f_fp);
 705                 return NULL;
 706         }
 707         Py_INCREF(Py_None);
 708         return Py_None;
 709 }
 710
 711 static PyObject *
 712 file_isatty(PyFileObject *f)
 713 {
 714         long res;
 715         if (f->f_fp == NULL)
 716                 return err_closed();
 717         Py_BEGIN_ALLOW_THREADS
 718         res = isatty((int)fileno(f->f_fp));
 719         Py_END_ALLOW_THREADS
 720         return PyBool_FromLong(res);
 721 }
 722
 723
 724 #if BUFSIZ < 8192
 725 #define SMALLCHUNK 8192
 726 #else
 727 #define SMALLCHUNK BUFSIZ
 728 #endif
 729
 730 #if SIZEOF_INT < 4
 731 #define BIGCHUNK  (512 * 32)
 732 #else
 733 #define BIGCHUNK  (512 * 1024)
 734 #endif
 735
 736 static size_t
 737 new_buffersize(PyFileObject *f, size_t currentsize)
 738 {
 739 #ifdef HAVE_FSTAT
 740         off_t pos, end;
 741         struct stat st;
 742         if (fstat(fileno(f->f_fp), &st) == 0) {
 743                 end = st.st_size;
 744                 /* The following is not a bug: we really need to call lseek()
 745                    *and* ftell().  The reason is that some stdio libraries
 746                    mistakenly flush their buffer when ftell() is called and
 747                    the lseek() call it makes fails, thereby throwing away
 748                    data that cannot be recovered in any way.  To avoid this,
 749                    we first test lseek(), and only call ftell() if lseek()
 750                    works.  We can't use the lseek() value either, because we
 751                    need to take the amount of buffered data into account.
 752                    (Yet another reason why stdio stinks. :-) */
 753                 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
 754                 if (pos >= 0) {
 755                         pos = ftell(f->f_fp);
 756                 }
 757                 if (pos < 0)
 758                         clearerr(f->f_fp);
 759                 if (end > pos && pos >= 0)
 760                         return currentsize + end - pos + 1;
 761                 /* Add 1 so if the file were to grow we'd notice. */
 762         }
 763 #endif
 764         if (currentsize > SMALLCHUNK) {
 765                 /* Keep doubling until we reach BIGCHUNK;
 766                    then keep adding BIGCHUNK. */
 767                 if (currentsize <= BIGCHUNK)
 768                         return currentsize + currentsize;
 769                 else
 770                         return currentsize + BIGCHUNK;
 771         }
 772         return currentsize + SMALLCHUNK;
 773 }
 774
 775 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
 776 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
 777 #else
 778 #ifdef EWOULDBLOCK
 779 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
 780 #else
 781 #ifdef EAGAIN
 782 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
 783 #else
 784 #define BLOCKED_ERRNO(x) 0
 785 #endif
 786 #endif
 787 #endif
 788
 789 static PyObject *
 790 file_read(PyFileObject *f, PyObject *args)
 791 {
 792         long bytesrequested = -1;
 793         size_t bytesread, buffersize, chunksize;
 794         PyObject *v;
 795
 796         if (f->f_fp == NULL)
 797                 return err_closed();
 798         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 799                 return NULL;
 800         if (bytesrequested < 0)
 801                 buffersize = new_buffersize(f, (size_t)0);
 802         else
 803                 buffersize = bytesrequested;
 804         if (buffersize > INT_MAX) {
 805                 PyErr_SetString(PyExc_OverflowError,
 806         "requested number of bytes is more than a Python string can hold");
 807                 return NULL;
 808         }
 809         v = PyString_FromStringAndSize((char *)NULL, buffersize);
 810         if (v == NULL)
 811                 return NULL;
 812         bytesread = 0;
 813         for (;;) {
 814                 Py_BEGIN_ALLOW_THREADS
 815                 errno = 0;
 816                 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
 817                           buffersize - bytesread, f->f_fp, (PyObject *)f);
 818                 Py_END_ALLOW_THREADS
 819                 if (chunksize == 0) {
 820                         if (!ferror(f->f_fp))
 821                                 break;
 822                         clearerr(f->f_fp);
 823                         /* When in non-blocking mode, data shouldn't
 824                          * be discarded if a blocking signal was
 825                          * received. That will also happen if
 826                          * chunksize != 0, but bytesread < buffersize. */
 827                         if (bytesread > 0 && BLOCKED_ERRNO(errno))
 828                                 break;
 829                         PyErr_SetFromErrno(PyExc_IOError);
 830                         Py_DECREF(v);
 831                         return NULL;
 832                 }
 833                 bytesread += chunksize;
 834                 if (bytesread < buffersize) {
 835                         clearerr(f->f_fp);
 836                         break;
 837                 }
 838                 if (bytesrequested < 0) {
 839                         buffersize = new_buffersize(f, buffersize);
 840                         if (_PyString_Resize(&v, buffersize) < 0)
 841                                 return NULL;
 842                 } else {
 843                         /* Got what was requested. */
 844                         break;
 845                 }
 846         }
 847         if (bytesread != buffersize)
 848                 _PyString_Resize(&v, bytesread);
 849         return v;
 850 }
 851
 852 static PyObject *
 853 file_readinto(PyFileObject *f, PyObject *args)
 854 {
 855         char *ptr;
 856         int ntodo;
 857         size_t ndone, nnow;
 858
 859         if (f->f_fp == NULL)
 860                 return err_closed();
 861         if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
 862                 return NULL;
 863         ndone = 0;
 864         while (ntodo > 0) {
 865                 Py_BEGIN_ALLOW_THREADS
 866                 errno = 0;
 867                 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
 868                                                 (PyObject *)f);
 869                 Py_END_ALLOW_THREADS
 870                 if (nnow == 0) {
 871                         if (!ferror(f->f_fp))
 872                                 break;
 873                         PyErr_SetFromErrno(PyExc_IOError);
 874                         clearerr(f->f_fp);
 875                         return NULL;
 876                 }
 877                 ndone += nnow;
 878                 ntodo -= nnow;
 879         }
 880         return PyInt_FromLong((long)ndone);
 881 }
 882
 883 /**************************************************************************
 884 Routine to get next line using platform fgets().
 885
 886 Under MSVC 6:
 887
 888 + MS threadsafe getc is very slow (multiple layers of function calls before+
 889   after each character, to lock+unlock the stream).
 890 + The stream-locking functions are MS-internal -- can't access them from user
 891   code.
 892 + There's nothing Tim could find in the MS C or platform SDK libraries that
 893   can worm around this.
 894 + MS fgets locks/unlocks only once per line; it's the only hook we have.
 895
 896 So we use fgets for speed(!), despite that it's painful.
 897
 898 MS realloc is also slow.
 899
 900 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
 901 have):
 902         Linux           a wash
 903         Solaris         a wash
 904         Tru64 Unix      getline_via_fgets significantly faster
 905
 906 CAUTION:  The C std isn't clear about this:  in those cases where fgets
 907 writes something into the buffer, can it write into any position beyond the
 908 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
 909 known on which it does; and it would be a strange way to code fgets. Still,
 910 getline_via_fgets may not work correctly if it does.  The std test
 911 test_bufio.py should fail if platform fgets() routinely writes beyond the
 912 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
 913 **************************************************************************/
 914
 915 /* Use this routine if told to, or by default on non-get_unlocked()
 916  * platforms unless told not to.  Yikes!  Let's spell that out:
 917  * On a platform with getc_unlocked():
 918  *     By default, use getc_unlocked().
 919  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
 920  * On a platform without getc_unlocked():
 921  *     By default, use fgets().
 922  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
 923  */
 924 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
 925 #define USE_FGETS_IN_GETLINE
 926 #endif
 927
 928 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
 929 #undef USE_FGETS_IN_GETLINE
 930 #endif
 931
 932 #ifdef USE_FGETS_IN_GETLINE
 933 static PyObject*
 934 getline_via_fgets(FILE *fp)
 935 {
 936 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
 937  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
 938  * to fill this much of the buffer with a known value in order to figure out
 939  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
 940  * than "most" lines, we waste time filling unused buffer slots.  100 is
 941  * surely adequate for most peoples' email archives, chewing over source code,
 942  * etc -- "regular old text files".
 943  * MAXBUFSIZE is the maximum line length that lets us get away with the less
 944  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
 945  * cautions about boosting that.  300 was chosen because the worst real-life
 946  * text-crunching job reported on Python-Dev was a mail-log crawler where over
 947  * half the lines were 254 chars.
 948  */
 949 #define INITBUFSIZE 100
 950 #define MAXBUFSIZE 300
 951         char* p;        /* temp */
 952         char buf[MAXBUFSIZE];
 953         PyObject* v;    /* the string object result */
 954         char* pvfree;   /* address of next free slot */
 955         char* pvend;    /* address one beyond last free slot */
 956         size_t nfree;   /* # of free buffer slots; pvend-pvfree */
 957         size_t total_v_size;  /* total # of slots in buffer */
 958         size_t increment;       /* amount to increment the buffer */
 959
 960         /* Optimize for normal case:  avoid _PyString_Resize if at all
 961          * possible via first reading into stack buffer "buf".
 962          */
 963         total_v_size = INITBUFSIZE;     /* start small and pray */
 964         pvfree = buf;
 965         for (;;) {
 966                 Py_BEGIN_ALLOW_THREADS
 967                 pvend = buf + total_v_size;
 968                 nfree = pvend - pvfree;
 969                 memset(pvfree, '\n', nfree);
 970                 p = fgets(pvfree, nfree, fp);
 971                 Py_END_ALLOW_THREADS
 972
 973                 if (p == NULL) {
 974                         clearerr(fp);
 975                         if (PyErr_CheckSignals())
 976                                 return NULL;
 977                         v = PyString_FromStringAndSize(buf, pvfree - buf);
 978                         return v;
 979                 }
 980                 /* fgets read *something* */
 981                 p = memchr(pvfree, '\n', nfree);
 982                 if (p != NULL) {
 983                         /* Did the \n come from fgets or from us?
 984                          * Since fgets stops at the first \n, and then writes
 985                          * \0, if it's from fgets a \0 must be next.  But if
 986                          * that's so, it could not have come from us, since
 987                          * the \n's we filled the buffer with have only more
 988                          * \n's to the right.
 989                          */
 990                         if (p+1 < pvend && *(p+1) == '\0') {
 991                                 /* It's from fgets:  we win!  In particular,
 992                                  * we haven't done any mallocs yet, and can
 993                                  * build the final result on the first try.
 994                                  */
 995                                 ++p;    /* include \n from fgets */
 996                         }
 997                         else {
 998                                 /* Must be from us:  fgets didn't fill the
 999                                  * buffer and didn't find a newline, so it
1000                                  * must be the last and newline-free line of
1001                                  * the file.
1002                                  */
1003                                 assert(p > pvfree && *(p-1) == '\0');
1004                                 --p;    /* don't include \0 from fgets */
1005                         }
1006                         v = PyString_FromStringAndSize(buf, p - buf);
1007                         return v;
1008                 }
1009                 /* yuck:  fgets overwrote all the newlines, i.e. the entire
1010                  * buffer.  So this line isn't over yet, or maybe it is but
1011                  * we're exactly at EOF.  If we haven't already, try using the
1012                  * rest of the stack buffer.
1013                  */
1014                 assert(*(pvend-1) == '\0');
1015                 if (pvfree == buf) {
1016                         pvfree = pvend - 1;     /* overwrite trailing null */
1017                         total_v_size = MAXBUFSIZE;
1018                 }
1019                 else
1020                         break;
1021         }
1022
1023         /* The stack buffer isn't big enough; malloc a string object and read
1024          * into its buffer.
1025          */
1026         total_v_size = MAXBUFSIZE << 1;
1027         v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1028         if (v == NULL)
1029                 return v;
1030         /* copy over everything except the last null byte */
1031         memcpy(BUF(v), buf, MAXBUFSIZE-1);
1032         pvfree = BUF(v) + MAXBUFSIZE - 1;
1033
1034         /* Keep reading stuff into v; if it ever ends successfully, break
1035          * after setting p one beyond the end of the line.  The code here is
1036          * very much like the code above, except reads into v's buffer; see
1037          * the code above for detailed comments about the logic.
1038          */
1039         for (;;) {
1040                 Py_BEGIN_ALLOW_THREADS
1041                 pvend = BUF(v) + total_v_size;
1042                 nfree = pvend - pvfree;
1043                 memset(pvfree, '\n', nfree);
1044                 p = fgets(pvfree, nfree, fp);
1045                 Py_END_ALLOW_THREADS
1046
1047                 if (p == NULL) {
1048                         clearerr(fp);
1049                         if (PyErr_CheckSignals()) {
1050                                 Py_DECREF(v);
1051                                 return NULL;
1052                         }
1053                         p = pvfree;
1054                         break;
1055                 }
1056                 p = memchr(pvfree, '\n', nfree);
1057                 if (p != NULL) {
1058                         if (p+1 < pvend && *(p+1) == '\0') {
1059                                 /* \n came from fgets */
1060                                 ++p;
1061                                 break;
1062                         }
1063                         /* \n came from us; last line of file, no newline */
1064                         assert(p > pvfree && *(p-1) == '\0');
1065                         --p;
1066                         break;
1067                 }
1068                 /* expand buffer and try again */
1069                 assert(*(pvend-1) == '\0');
1070                 increment = total_v_size >> 2;  /* mild exponential growth */
1071                 total_v_size += increment;
1072                 if (total_v_size > INT_MAX) {
1073                         PyErr_SetString(PyExc_OverflowError,
1074                             "line is longer than a Python string can hold");
1075                         Py_DECREF(v);
1076                         return NULL;
1077                 }
1078                 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1079                         return NULL;
1080                 /* overwrite the trailing null byte */
1081                 pvfree = BUF(v) + (total_v_size - increment - 1);
1082         }
1083         if (BUF(v) + total_v_size != p)
1084                 _PyString_Resize(&v, p - BUF(v));
1085         return v;
1086 #undef INITBUFSIZE
1087 #undef MAXBUFSIZE
1088 }
1089 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1090
1091 /* Internal routine to get a line.
1092    Size argument interpretation:
1093    > 0: max length;
1094    <= 0: read arbitrary line
1095 */
1096
1097 static PyObject *
1098 get_line(PyFileObject *f, int n)
1099 {
1100         FILE *fp = f->f_fp;
1101         int c;
1102         char *buf, *end;
1103         size_t total_v_size;    /* total # of slots in buffer */
1104         size_t used_v_size;     /* # used slots in buffer */
1105         size_t increment;       /* amount to increment the buffer */
1106         PyObject *v;
1107         int newlinetypes = f->f_newlinetypes;
1108         int skipnextlf = f->f_skipnextlf;
1109         int univ_newline = f->f_univ_newline;
1110
1111 #if defined(USE_FGETS_IN_GETLINE)
1112         if (n <= 0 && !univ_newline )
1113                 return getline_via_fgets(fp);
1114 #endif
1115         total_v_size = n > 0 ? n : 100;
1116         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1117         if (v == NULL)
1118                 return NULL;
1119         buf = BUF(v);
1120         end = buf + total_v_size;
1121
1122         for (;;) {
1123                 Py_BEGIN_ALLOW_THREADS
1124                 FLOCKFILE(fp);
1125                 if (univ_newline) {
1126                         c = 'x'; /* Shut up gcc warning */
1127                         while ( buf != end && (c = GETC(fp)) != EOF ) {
1128                                 if (skipnextlf ) {
1129                                         skipnextlf = 0;
1130                                         if (c == '\n') {
1131                                                 /* Seeing a \n here with
1132                                                  * skipnextlf true means we
1133                                                  * saw a \r before.
1134                                                  */
1135                                                 newlinetypes |= NEWLINE_CRLF;
1136                                                 c = GETC(fp);
1137                                                 if (c == EOF) break;
1138                                         } else {
1139                                                 newlinetypes |= NEWLINE_CR;
1140                                         }
1141                                 }
1142                                 if (c == '\r') {
1143                                         skipnextlf = 1;
1144                                         c = '\n';
1145                                 } else if ( c == '\n')
1146                                         newlinetypes |= NEWLINE_LF;
1147                                 *buf++ = c;
1148                                 if (c == '\n') break;
1149                         }
1150                         if ( c == EOF && skipnextlf )
1151                                 newlinetypes |= NEWLINE_CR;
1152                 } else /* If not universal newlines use the normal loop */
1153                 while ((c = GETC(fp)) != EOF &&
1154                        (*buf++ = c) != '\n' &&
1155                         buf != end)
1156                         ;
1157                 FUNLOCKFILE(fp);
1158                 Py_END_ALLOW_THREADS
1159                 f->f_newlinetypes = newlinetypes;
1160                 f->f_skipnextlf = skipnextlf;
1161                 if (c == '\n')
1162                         break;
1163                 if (c == EOF) {
1164                         if (ferror(fp)) {
1165                                 PyErr_SetFromErrno(PyExc_IOError);
1166                                 clearerr(fp);
1167                                 Py_DECREF(v);
1168                                 return NULL;
1169                         }
1170                         clearerr(fp);
1171                         if (PyErr_CheckSignals()) {
1172                                 Py_DECREF(v);
1173                                 return NULL;
1174                         }
1175                         break;
1176                 }
1177                 /* Must be because buf == end */
1178                 if (n > 0)
1179                         break;
1180                 used_v_size = total_v_size;
1181                 increment = total_v_size >> 2; /* mild exponential growth */
1182                 total_v_size += increment;
1183                 if (total_v_size > INT_MAX) {
1184                         PyErr_SetString(PyExc_OverflowError,
1185                             "line is longer than a Python string can hold");
1186                         Py_DECREF(v);
1187                         return NULL;
1188                 }
1189                 if (_PyString_Resize(&v, total_v_size) < 0)
1190                         return NULL;
1191                 buf = BUF(v) + used_v_size;
1192                 end = BUF(v) + total_v_size;
1193         }
1194
1195         used_v_size = buf - BUF(v);
1196         if (used_v_size != total_v_size)
1197                 _PyString_Resize(&v, used_v_size);
1198         return v;
1199 }
1200
1201 /* External C interface */
1202
1203 PyObject *
1204 PyFile_GetLine(PyObject *f, int n)
1205 {
1206         PyObject *result;
1207
1208         if (f == NULL) {
1209                 PyErr_BadInternalCall();
1210                 return NULL;
1211         }
1212
1213         if (PyFile_Check(f)) {
1214                 if (((PyFileObject*)f)->f_fp == NULL)
1215                         return err_closed();
1216                 result = get_line((PyFileObject *)f, n);
1217         }
1218         else {
1219                 PyObject *reader;
1220                 PyObject *args;
1221
1222                 reader = PyObject_GetAttrString(f, "readline");
1223                 if (reader == NULL)
1224                         return NULL;
1225                 if (n <= 0)
1226                         args = PyTuple_New(0);
1227                 else
1228                         args = Py_BuildValue("(i)", n);
1229                 if (args == NULL) {
1230                         Py_DECREF(reader);
1231                         return NULL;
1232                 }
1233                 result = PyEval_CallObject(reader, args);
1234                 Py_DECREF(reader);
1235                 Py_DECREF(args);
1236                 if (result != NULL && !PyString_Check(result) &&
1237                     !PyUnicode_Check(result)) {
1238                         Py_DECREF(result);
1239                         result = NULL;
1240                         PyErr_SetString(PyExc_TypeError,
1241                                    "object.readline() returned non-string");
1242                 }
1243         }
1244
1245         if (n < 0 && result != NULL && PyString_Check(result)) {
1246                 char *s = PyString_AS_STRING(result);
1247                 int len = PyString_GET_SIZE(result);
1248                 if (len == 0) {
1249                         Py_DECREF(result);
1250                         result = NULL;
1251                         PyErr_SetString(PyExc_EOFError,
1252                                         "EOF when reading a line");
1253                 }
1254                 else if (s[len-1] == '\n') {
1255                         if (result->ob_refcnt == 1)
1256                                 _PyString_Resize(&result, len-1);
1257                         else {
1258                                 PyObject *v;
1259                                 v = PyString_FromStringAndSize(s, len-1);
1260                                 Py_DECREF(result);
1261                                 result = v;
1262                         }
1263                 }
1264         }
1265 #ifdef Py_USING_UNICODE
1266         if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1267                 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1268                 int len = PyUnicode_GET_SIZE(result);
1269                 if (len == 0) {
1270                         Py_DECREF(result);
1271                         result = NULL;
1272                         PyErr_SetString(PyExc_EOFError,
1273                                         "EOF when reading a line");
1274                 }
1275                 else if (s[len-1] == '\n') {
1276                         if (result->ob_refcnt == 1)
1277                                 PyUnicode_Resize(&result, len-1);
1278                         else {
1279                                 PyObject *v;
1280                                 v = PyUnicode_FromUnicode(s, len-1);
1281                                 Py_DECREF(result);
1282                                 result = v;
1283                         }
1284                 }
1285         }
1286 #endif
1287         return result;
1288 }
1289
1290 /* Python method */
1291
1292 static PyObject *
1293 file_readline(PyFileObject *f, PyObject *args)
1294 {
1295         int n = -1;
1296
1297         if (f->f_fp == NULL)
1298                 return err_closed();
1299         if (!PyArg_ParseTuple(args, "|i:readline", &n))
1300                 return NULL;
1301         if (n == 0)
1302                 return PyString_FromString("");
1303         if (n < 0)
1304                 n = 0;
1305         return get_line(f, n);
1306 }
1307
1308 static PyObject *
1309 file_readlines(PyFileObject *f, PyObject *args)
1310 {
1311         long sizehint = 0;
1312         PyObject *list;
1313         PyObject *line;
1314         char small_buffer[SMALLCHUNK];
1315         char *buffer = small_buffer;
1316         size_t buffersize = SMALLCHUNK;
1317         PyObject *big_buffer = NULL;
1318         size_t nfilled = 0;
1319         size_t nread;
1320         size_t totalread = 0;
1321         char *p, *q, *end;
1322         int err;
1323         int shortread = 0;
1324
1325         if (f->f_fp == NULL)
1326                 return err_closed();
1327         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1328                 return NULL;
1329         if ((list = PyList_New(0)) == NULL)
1330                 return NULL;
1331         for (;;) {
1332                 if (shortread)
1333                         nread = 0;
1334                 else {
1335                         Py_BEGIN_ALLOW_THREADS
1336                         errno = 0;
1337                         nread = Py_UniversalNewlineFread(buffer+nfilled,
1338                                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1339                         Py_END_ALLOW_THREADS
1340                         shortread = (nread < buffersize-nfilled);
1341                 }
1342                 if (nread == 0) {
1343                         sizehint = 0;
1344                         if (!ferror(f->f_fp))
1345                                 break;
1346                         PyErr_SetFromErrno(PyExc_IOError);
1347                         clearerr(f->f_fp);
1348                   error:
1349                         Py_DECREF(list);
1350                         list = NULL;
1351                         goto cleanup;
1352                 }
1353                 totalread += nread;
1354                 p = memchr(buffer+nfilled, '\n', nread);
1355                 if (p == NULL) {
1356                         /* Need a larger buffer to fit this line */
1357                         nfilled += nread;
1358                         buffersize *= 2;
1359                         if (buffersize > INT_MAX) {
1360                                 PyErr_SetString(PyExc_OverflowError,
1361                             "line is longer than a Python string can hold");
1362                                 goto error;
1363                         }
1364                         if (big_buffer == NULL) {
1365                                 /* Create the big buffer */
1366                                 big_buffer = PyString_FromStringAndSize(
1367                                         NULL, buffersize);
1368                                 if (big_buffer == NULL)
1369                                         goto error;
1370                                 buffer = PyString_AS_STRING(big_buffer);
1371                                 memcpy(buffer, small_buffer, nfilled);
1372                         }
1373                         else {
1374                                 /* Grow the big buffer */
1375                                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1376                                         goto error;
1377                                 buffer = PyString_AS_STRING(big_buffer);
1378                         }
1379                         continue;
1380                 }
1381                 end = buffer+nfilled+nread;
1382                 q = buffer;
1383                 do {
1384                         /* Process complete lines */
1385                         p++;
1386                         line = PyString_FromStringAndSize(q, p-q);
1387                         if (line == NULL)
1388                                 goto error;
1389                         err = PyList_Append(list, line);
1390                         Py_DECREF(line);
1391                         if (err != 0)
1392                                 goto error;
1393                         q = p;
1394                         p = memchr(q, '\n', end-q);
1395                 } while (p != NULL);
1396                 /* Move the remaining incomplete line to the start */
1397                 nfilled = end-q;
1398                 memmove(buffer, q, nfilled);
1399                 if (sizehint > 0)
1400                         if (totalread >= (size_t)sizehint)
1401                                 break;
1402         }
1403         if (nfilled != 0) {
1404                 /* Partial last line */
1405                 line = PyString_FromStringAndSize(buffer, nfilled);
1406                 if (line == NULL)
1407                         goto error;
1408                 if (sizehint > 0) {
1409                         /* Need to complete the last line */
1410                         PyObject *rest = get_line(f, 0);
1411                         if (rest == NULL) {
1412                                 Py_DECREF(line);
1413                                 goto error;
1414                         }
1415                         PyString_Concat(&line, rest);
1416                         Py_DECREF(rest);
1417                         if (line == NULL)
1418                                 goto error;
1419                 }
1420                 err = PyList_Append(list, line);
1421                 Py_DECREF(line);
1422                 if (err != 0)
1423                         goto error;
1424         }
1425   cleanup:
1426         Py_XDECREF(big_buffer);
1427         return list;
1428 }
1429
1430 static PyObject *
1431 file_write(PyFileObject *f, PyObject *args)
1432 {
1433         char *s;
1434         int n, n2;
1435         if (f->f_fp == NULL)
1436                 return err_closed();
1437         if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1438                 return NULL;
1439         f->f_softspace = 0;
1440         Py_BEGIN_ALLOW_THREADS
1441         errno = 0;
1442         n2 = fwrite(s, 1, n, f->f_fp);
1443         Py_END_ALLOW_THREADS
1444         if (n2 != n) {
1445                 PyErr_SetFromErrno(PyExc_IOError);
1446                 clearerr(f->f_fp);
1447                 return NULL;
1448         }
1449         Py_INCREF(Py_None);
1450         return Py_None;
1451 }
1452
1453 static PyObject *
1454 file_writelines(PyFileObject *f, PyObject *seq)
1455 {
1456 #define CHUNKSIZE 1000
1457         PyObject *list, *line;
1458         PyObject *it;   /* iter(seq) */
1459         PyObject *result;
1460         int i, j, index, len, nwritten, islist;
1461
1462         assert(seq != NULL);
1463         if (f->f_fp == NULL)
1464                 return err_closed();
1465
1466         result = NULL;
1467         list = NULL;
1468         islist = PyList_Check(seq);
1469         if  (islist)
1470                 it = NULL;
1471         else {
1472                 it = PyObject_GetIter(seq);
1473                 if (it == NULL) {
1474                         PyErr_SetString(PyExc_TypeError,
1475                                 "writelines() requires an iterable argument");
1476                         return NULL;
1477                 }
1478                 /* From here on, fail by going to error, to reclaim "it". */
1479                 list = PyList_New(CHUNKSIZE);
1480                 if (list == NULL)
1481                         goto error;
1482         }
1483
1484         /* Strategy: slurp CHUNKSIZE lines into a private list,
1485            checking that they are all strings, then write that list
1486            without holding the interpreter lock, then come back for more. */
1487         for (index = 0; ; index += CHUNKSIZE) {
1488                 if (islist) {
1489                         Py_XDECREF(list);
1490                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1491                         if (list == NULL)
1492                                 goto error;
1493                         j = PyList_GET_SIZE(list);
1494                 }
1495                 else {
1496                         for (j = 0; j < CHUNKSIZE; j++) {
1497                                 line = PyIter_Next(it);
1498                                 if (line == NULL) {
1499                                         if (PyErr_Occurred())
1500                                                 goto error;
1501                                         break;
1502                                 }
1503                                 PyList_SetItem(list, j, line);
1504                         }
1505                 }
1506                 if (j == 0)
1507                         break;
1508
1509                 /* Check that all entries are indeed strings. If not,
1510                    apply the same rules as for file.write() and
1511                    convert the results to strings. This is slow, but
1512                    seems to be the only way since all conversion APIs
1513                    could potentially execute Python code. */
1514                 for (i = 0; i < j; i++) {
1515                         PyObject *v = PyList_GET_ITEM(list, i);
1516                         if (!PyString_Check(v)) {
1517                                 const char *buffer;
1518                                 int len;
1519                                 if (((f->f_binary &&
1520                                       PyObject_AsReadBuffer(v,
1521                                               (const void**)&buffer,
1522                                                             &len)) ||
1523                                      PyObject_AsCharBuffer(v,
1524                                                            &buffer,
1525                                                            &len))) {
1526                                         PyErr_SetString(PyExc_TypeError,
1527                         "writelines() argument must be a sequence of strings");
1528                                         goto error;
1529                                 }
1530                                 line = PyString_FromStringAndSize(buffer,
1531                                                                   len);
1532                                 if (line == NULL)
1533                                         goto error;
1534                                 Py_DECREF(v);
1535                                 PyList_SET_ITEM(list, i, line);
1536                         }
1537                 }
1538
1539                 /* Since we are releasing the global lock, the
1540                    following code may *not* execute Python code. */
1541                 Py_BEGIN_ALLOW_THREADS
1542                 f->f_softspace = 0;
1543                 errno = 0;
1544                 for (i = 0; i < j; i++) {
1545                         line = PyList_GET_ITEM(list, i);
1546                         len = PyString_GET_SIZE(line);
1547                         nwritten = fwrite(PyString_AS_STRING(line),
1548                                           1, len, f->f_fp);
1549                         if (nwritten != len) {
1550                                 Py_BLOCK_THREADS
1551                                 PyErr_SetFromErrno(PyExc_IOError);
1552                                 clearerr(f->f_fp);
1553                                 goto error;
1554                         }
1555                 }
1556                 Py_END_ALLOW_THREADS
1557
1558                 if (j < CHUNKSIZE)
1559                         break;
1560         }
1561
1562         Py_INCREF(Py_None);
1563         result = Py_None;
1564   error:
1565         Py_XDECREF(list);
1566         Py_XDECREF(it);
1567         return result;
1568 #undef CHUNKSIZE
1569 }
1570
1571 static PyObject *
1572 file_getiter(PyFileObject *f)
1573 {
1574         if (f->f_fp == NULL)
1575                 return err_closed();
1576         Py_INCREF(f);
1577         return (PyObject *)f;
1578 }
1579
1580 PyDoc_STRVAR(readline_doc,
1581 "readline([size]) -> next line from the file, as a string.\n"
1582 "\n"
1583 "Retain newline.  A non-negative size argument limits the maximum\n"
1584 "number of bytes to return (an incomplete line may be returned then).\n"
1585 "Return an empty string at EOF.");
1586
1587 PyDoc_STRVAR(read_doc,
1588 "read([size]) -> read at most size bytes, returned as a string.\n"
1589 "\n"
1590 "If the size argument is negative or omitted, read until EOF is reached.\n"
1591 "Notice that when in non-blocking mode, less data than what was requested\n"
1592 "may be returned, even if no size parameter was given.");
1593
1594 PyDoc_STRVAR(write_doc,
1595 "write(str) -> None.  Write string str to file.\n"
1596 "\n"
1597 "Note that due to buffering, flush() or close() may be needed before\n"
1598 "the file on disk reflects the data written.");
1599
1600 PyDoc_STRVAR(fileno_doc,
1601 "fileno() -> integer \"file descriptor\".\n"
1602 "\n"
1603 "This is needed for lower-level file interfaces, such os.read().");
1604
1605 PyDoc_STRVAR(seek_doc,
1606 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1607 "\n"
1608 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1609 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1610 "(move relative to current position, positive or negative), and 2 (move\n"
1611 "relative to end of file, usually negative, although many platforms allow\n"
1612 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1613 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1614 "undefined behavior."
1615 "\n"
1616 "Note that not all file objects are seekable.");
1617
1618 #ifdef HAVE_FTRUNCATE
1619 PyDoc_STRVAR(truncate_doc,
1620 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1621 "\n"
1622 "Size defaults to the current file position, as returned by tell().");
1623 #endif
1624
1625 PyDoc_STRVAR(tell_doc,
1626 "tell() -> current file position, an integer (may be a long integer).");
1627
1628 PyDoc_STRVAR(readinto_doc,
1629 "readinto() -> Undocumented.  Don't use this; it may go away.");
1630
1631 PyDoc_STRVAR(readlines_doc,
1632 "readlines([size]) -> list of strings, each a line from the file.\n"
1633 "\n"
1634 "Call readline() repeatedly and return a list of the lines so read.\n"
1635 "The optional size argument, if given, is an approximate bound on the\n"
1636 "total number of bytes in the lines returned.");
1637
1638 PyDoc_STRVAR(xreadlines_doc,
1639 "xreadlines() -> returns self.\n"
1640 "\n"
1641 "For backward compatibility. File objects now include the performance\n"
1642 "optimizations previously implemented in the xreadlines module.");
1643
1644 PyDoc_STRVAR(writelines_doc,
1645 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
1646 "\n"
1647 "Note that newlines are not added.  The sequence can be any iterable object\n"
1648 "producing strings. This is equivalent to calling write() for each string.");
1649
1650 PyDoc_STRVAR(flush_doc,
1651 "flush() -> None.  Flush the internal I/O buffer.");
1652
1653 PyDoc_STRVAR(close_doc,
1654 "close() -> None or (perhaps) an integer.  Close the file.\n"
1655 "\n"
1656 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
1657 "further I/O operations.  close() may be called more than once without\n"
1658 "error.  Some kinds of file objects (for example, opened by popen())\n"
1659 "may return an exit status upon closing.");
1660
1661 PyDoc_STRVAR(isatty_doc,
1662 "isatty() -> true or false.  True if the file is connected to a tty device.");
1663
1664 static PyMethodDef file_methods[] = {
1665         {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1666         {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
1667         {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
1668         {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
1669         {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
1670 #ifdef HAVE_FTRUNCATE
1671         {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1672 #endif
1673         {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
1674         {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1675         {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1676         {"xreadlines",(PyCFunction)file_getiter,  METH_NOARGS, xreadlines_doc},
1677         {"writelines",(PyCFunction)file_writelines, METH_O,    writelines_doc},
1678         {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
1679         {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
1680         {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
1681         {NULL,        NULL}             /* sentinel */
1682 };
1683
1684 #define OFF(x) offsetof(PyFileObject, x)
1685
1686 static PyMemberDef file_memberlist[] = {
1687         {"softspace",   T_INT,          OFF(f_softspace), 0,
1688          "flag indicating that a space needs to be printed; used by print"},
1689         {"mode",        T_OBJECT,       OFF(f_mode),    RO,
1690          "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1691         {"name",        T_OBJECT,       OFF(f_name),    RO,
1692          "file name"},
1693         {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
1694          "file encoding"},
1695         /* getattr(f, "closed") is implemented without this table */
1696         {NULL}  /* Sentinel */
1697 };
1698
1699 static PyObject *
1700 get_closed(PyFileObject *f, void *closure)
1701 {
1702         return PyBool_FromLong((long)(f->f_fp == 0));
1703 }
1704 static PyObject *
1705 get_newlines(PyFileObject *f, void *closure)
1706 {
1707         switch (f->f_newlinetypes) {
1708         case NEWLINE_UNKNOWN:
1709                 Py_INCREF(Py_None);
1710                 return Py_None;
1711         case NEWLINE_CR:
1712                 return PyString_FromString("\r");
1713         case NEWLINE_LF:
1714                 return PyString_FromString("\n");
1715         case NEWLINE_CR|NEWLINE_LF:
1716                 return Py_BuildValue("(ss)", "\r", "\n");
1717         case NEWLINE_CRLF:
1718                 return PyString_FromString("\r\n");
1719         case NEWLINE_CR|NEWLINE_CRLF:
1720                 return Py_BuildValue("(ss)", "\r", "\r\n");
1721         case NEWLINE_LF|NEWLINE_CRLF:
1722                 return Py_BuildValue("(ss)", "\n", "\r\n");
1723         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1724                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1725         default:
1726                 PyErr_Format(PyExc_SystemError,
1727                              "Unknown newlines value 0x%x\n",
1728                              f->f_newlinetypes);
1729                 return NULL;
1730         }
1731 }
1732
1733 static PyGetSetDef file_getsetlist[] = {
1734         {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1735         {"newlines", (getter)get_newlines, NULL,
1736          "end-of-line convention used in this file"},
1737         {0},
1738 };
1739
1740 static void
1741 drop_readahead(PyFileObject *f)
1742 {
1743         if (f->f_buf != NULL) {
1744                 PyMem_Free(f->f_buf);
1745                 f->f_buf = NULL;
1746         }
1747 }
1748
1749 /* Make sure that file has a readahead buffer with at least one byte
1750    (unless at EOF) and no more than bufsize.  Returns negative value on
1751    error */
1752 static int
1753 readahead(PyFileObject *f, int bufsize)
1754 {
1755         int chunksize;
1756
1757         if (f->f_buf != NULL) {
1758                 if( (f->f_bufend - f->f_bufptr) >= 1)
1759                         return 0;
1760                 else
1761                         drop_readahead(f);
1762         }
1763         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1764                 return -1;
1765         }
1766         Py_BEGIN_ALLOW_THREADS
1767         errno = 0;
1768         chunksize = Py_UniversalNewlineFread(
1769                 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1770         Py_END_ALLOW_THREADS
1771         if (chunksize == 0) {
1772                 if (ferror(f->f_fp)) {
1773                         PyErr_SetFromErrno(PyExc_IOError);
1774                         clearerr(f->f_fp);
1775                         drop_readahead(f);
1776                         return -1;
1777                 }
1778         }
1779         f->f_bufptr = f->f_buf;
1780         f->f_bufend = f->f_buf + chunksize;
1781         return 0;
1782 }
1783
1784 /* Used by file_iternext.  The returned string will start with 'skip'
1785    uninitialized bytes followed by the remainder of the line. Don't be
1786    horrified by the recursive call: maximum recursion depth is limited by
1787    logarithmic buffer growth to about 50 even when reading a 1gb line. */
1788
1789 static PyStringObject *
1790 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1791 {
1792         PyStringObject* s;
1793         char *bufptr;
1794         char *buf;
1795         int len;
1796
1797         if (f->f_buf == NULL)
1798                 if (readahead(f, bufsize) < 0)
1799                         return NULL;
1800
1801         len = f->f_bufend - f->f_bufptr;
1802         if (len == 0)
1803                 return (PyStringObject *)
1804                         PyString_FromStringAndSize(NULL, skip);
1805         bufptr = memchr(f->f_bufptr, '\n', len);
1806         if (bufptr != NULL) {
1807                 bufptr++;                       /* Count the '\n' */
1808                 len = bufptr - f->f_bufptr;
1809                 s = (PyStringObject *)
1810                         PyString_FromStringAndSize(NULL, skip+len);
1811                 if (s == NULL)
1812                         return NULL;
1813                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1814                 f->f_bufptr = bufptr;
1815                 if (bufptr == f->f_bufend)
1816                         drop_readahead(f);
1817         } else {
1818                 bufptr = f->f_bufptr;
1819                 buf = f->f_buf;
1820                 f->f_buf = NULL;        /* Force new readahead buffer */
1821                 s = readahead_get_line_skip(
1822                         f, skip+len, bufsize + (bufsize>>2) );
1823                 if (s == NULL) {
1824                         PyMem_Free(buf);
1825                         return NULL;
1826                 }
1827                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1828                 PyMem_Free(buf);
1829         }
1830         return s;
1831 }
1832
1833 /* A larger buffer size may actually decrease performance. */
1834 #define READAHEAD_BUFSIZE 8192
1835
1836 static PyObject *
1837 file_iternext(PyFileObject *f)
1838 {
1839         PyStringObject* l;
1840
1841         if (f->f_fp == NULL)
1842                 return err_closed();
1843
1844         l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1845         if (l == NULL || PyString_GET_SIZE(l) == 0) {
1846                 Py_XDECREF(l);
1847                 return NULL;
1848         }
1849         return (PyObject *)l;
1850 }
1851
1852
1853 static PyObject *
1854 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1855 {
1856         PyObject *self;
1857         static PyObject *not_yet_string;
1858
1859         assert(type != NULL && type->tp_alloc != NULL);
1860
1861         if (not_yet_string == NULL) {
1862                 not_yet_string = PyString_FromString("<uninitialized file>");
1863                 if (not_yet_string == NULL)
1864                         return NULL;
1865         }
1866
1867         self = type->tp_alloc(type, 0);
1868         if (self != NULL) {
1869                 /* Always fill in the name and mode, so that nobody else
1870                    needs to special-case NULLs there. */
1871                 Py_INCREF(not_yet_string);
1872                 ((PyFileObject *)self)->f_name = not_yet_string;
1873                 Py_INCREF(not_yet_string);
1874                 ((PyFileObject *)self)->f_mode = not_yet_string;
1875                 Py_INCREF(Py_None);
1876                 ((PyFileObject *)self)->f_encoding = Py_None;
1877                 ((PyFileObject *)self)->weakreflist = NULL;
1878         }
1879         return self;
1880 }
1881
1882 static int
1883 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1884 {
1885         PyFileObject *foself = (PyFileObject *)self;
1886         int ret = 0;
1887         static const char *kwlist[] = {"name", "mode", "buffering", 0};
1888         char *name = NULL;
1889         char *mode = "r";
1890         int bufsize = -1;
1891         int wideargument = 0;
1892
1893         assert(PyFile_Check(self));
1894         if (foself->f_fp != NULL) {
1895                 /* Have to close the existing file first. */
1896                 PyObject *closeresult = file_close(foself);
1897                 if (closeresult == NULL)
1898                         return -1;
1899                 Py_DECREF(closeresult);
1900         }
1901
1902 #ifdef Py_WIN_WIDE_FILENAMES
1903         if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
1904                 PyObject *po;
1905                 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
1906                                                 kwlist, &po, &mode, &bufsize)) {
1907                         wideargument = 1;
1908                         if (fill_file_fields(foself, NULL, po, mode,
1909                                              fclose) == NULL)
1910                                 goto Error;
1911                 } else {
1912                         /* Drop the argument parsing error as narrow
1913                            strings are also valid. */
1914                         PyErr_Clear();
1915                 }
1916         }
1917 #endif
1918
1919         if (!wideargument) {
1920                 PyObject *o_name;
1921
1922                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1923                                                  Py_FileSystemDefaultEncoding,
1924                                                  &name,
1925                                                  &mode, &bufsize))
1926                         return -1;
1927
1928                 /* We parse again to get the name as a PyObject */
1929                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
1930                                                  kwlist, &o_name, &mode,
1931                                                  &bufsize))
1932                         return -1;
1933
1934                 if (fill_file_fields(foself, NULL, o_name, mode,
1935                                      fclose) == NULL)
1936                         goto Error;
1937         }
1938         if (open_the_file(foself, name, mode) == NULL)
1939                 goto Error;
1940         foself->f_setbuf = NULL;
1941         PyFile_SetBufSize(self, bufsize);
1942         goto Done;
1943
1944 Error:
1945         ret = -1;
1946         /* fall through */
1947 Done:
1948         PyMem_Free(name); /* free the encoded string */
1949         return ret;
1950 }
1951
1952 PyDoc_VAR(file_doc) =
1953 PyDoc_STR(
1954 "file(name[, mode[, buffering]]) -> file object\n"
1955 "\n"
1956 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
1957 "writing or appending.  The file will be created if it doesn't exist\n"
1958 "when opened for writing or appending; it will be truncated when\n"
1959 "opened for writing.  Add a 'b' to the mode for binary files.\n"
1960 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
1961 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
1962 "buffered, and larger numbers specify the buffer size.\n"
1963 )
1964 PyDoc_STR(
1965 "Add a 'U' to mode to open the file for input with universal newline\n"
1966 "support.  Any line ending in the input file will be seen as a '\\n'\n"
1967 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
1968 "the value for this attribute is one of None (no newline read yet),\n"
1969 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1970 "\n"
1971 "'U' cannot be combined with 'w' or '+' mode.\n"
1972 )
1973 PyDoc_STR(
1974 "\n"
1975 "Note:  open() is an alias for file()."
1976 );
1977
1978 PyTypeObject PyFile_Type = {
1979         PyObject_HEAD_INIT(&PyType_Type)
1980         0,
1981         "file",
1982         sizeof(PyFileObject),
1983         0,
1984         (destructor)file_dealloc,               /* tp_dealloc */
1985         0,                                      /* tp_print */
1986         0,                                      /* tp_getattr */
1987         0,                                      /* tp_setattr */
1988         0,                                      /* tp_compare */
1989         (reprfunc)file_repr,                    /* tp_repr */
1990         0,                                      /* tp_as_number */
1991         0,                                      /* tp_as_sequence */
1992         0,                                      /* tp_as_mapping */
1993         0,                                      /* tp_hash */
1994         0,                                      /* tp_call */
1995         0,                                      /* tp_str */
1996         PyObject_GenericGetAttr,                /* tp_getattro */
1997         /* softspace is writable:  we must supply tp_setattro */
1998         PyObject_GenericSetAttr,                /* tp_setattro */
1999         0,                                      /* tp_as_buffer */
2000         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2001         file_doc,                               /* tp_doc */
2002         0,                                      /* tp_traverse */
2003         0,                                      /* tp_clear */
2004         0,                                      /* tp_richcompare */
2005         offsetof(PyFileObject, weakreflist),    /* tp_weaklistoffset */
2006         (getiterfunc)file_getiter,              /* tp_iter */
2007         (iternextfunc)file_iternext,            /* tp_iternext */
2008         file_methods,                           /* tp_methods */
2009         file_memberlist,                        /* tp_members */
2010         file_getsetlist,                        /* tp_getset */
2011         0,                                      /* tp_base */
2012         0,                                      /* tp_dict */
2013         0,                                      /* tp_descr_get */
2014         0,                                      /* tp_descr_set */
2015         0,                                      /* tp_dictoffset */
2016         (initproc)file_init,                    /* tp_init */
2017         PyType_GenericAlloc,                    /* tp_alloc */
2018         file_new,                               /* tp_new */
2019         PyObject_Del,                           /* tp_free */
2020 };
2021
2022 /* Interface for the 'soft space' between print items. */
2023
2024 int
2025 PyFile_SoftSpace(PyObject *f, int newflag)
2026 {
2027         int oldflag = 0;
2028         if (f == NULL) {
2029                 /* Do nothing */
2030         }
2031         else if (PyFile_Check(f)) {
2032                 oldflag = ((PyFileObject *)f)->f_softspace;
2033                 ((PyFileObject *)f)->f_softspace = newflag;
2034         }
2035         else {
2036                 PyObject *v;
2037                 v = PyObject_GetAttrString(f, "softspace");
2038                 if (v == NULL)
2039                         PyErr_Clear();
2040                 else {
2041                         if (PyInt_Check(v))
2042                                 oldflag = PyInt_AsLong(v);
2043                         Py_DECREF(v);
2044                 }
2045                 v = PyInt_FromLong((long)newflag);
2046                 if (v == NULL)
2047                         PyErr_Clear();
2048                 else {
2049                         if (PyObject_SetAttrString(f, "softspace", v) != 0)
2050                                 PyErr_Clear();
2051                         Py_DECREF(v);
2052                 }
2053         }
2054         return oldflag;
2055 }
2056
2057 /* Interfaces to write objects/strings to file-like objects */
2058
2059 int
2060 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2061 {
2062         PyObject *writer, *value, *args, *result;
2063         if (f == NULL) {
2064                 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2065                 return -1;
2066         }
2067         else if (PyFile_Check(f)) {
2068                 FILE *fp = PyFile_AsFile(f);
2069 #ifdef Py_USING_UNICODE
2070                 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2071                 int result;
2072 #endif
2073                 if (fp == NULL) {
2074                         err_closed();
2075                         return -1;
2076                 }
2077 #ifdef Py_USING_UNICODE
2078                 if ((flags & Py_PRINT_RAW) &&
2079                     PyUnicode_Check(v) && enc != Py_None) {
2080                         char *cenc = PyString_AS_STRING(enc);
2081                         value = PyUnicode_AsEncodedString(v, cenc, "strict");
2082                         if (value == NULL)
2083                                 return -1;
2084                 } else {
2085                         value = v;
2086                         Py_INCREF(value);
2087                 }
2088                 result = PyObject_Print(value, fp, flags);
2089                 Py_DECREF(value);
2090                 return result;
2091 #else
2092                 return PyObject_Print(v, fp, flags);
2093 #endif
2094         }
2095         writer = PyObject_GetAttrString(f, "write");
2096         if (writer == NULL)
2097                 return -1;
2098         if (flags & Py_PRINT_RAW) {
2099                 if (PyUnicode_Check(v)) {
2100                         value = v;
2101                         Py_INCREF(value);
2102                 } else
2103                         value = PyObject_Str(v);
2104         }
2105         else
2106                 value = PyObject_Repr(v);
2107         if (value == NULL) {
2108                 Py_DECREF(writer);
2109                 return -1;
2110         }
2111         args = PyTuple_Pack(1, value);
2112         if (args == NULL) {
2113                 Py_DECREF(value);
2114                 Py_DECREF(writer);
2115                 return -1;
2116         }
2117         result = PyEval_CallObject(writer, args);
2118         Py_DECREF(args);
2119         Py_DECREF(value);
2120         Py_DECREF(writer);
2121         if (result == NULL)
2122                 return -1;
2123         Py_DECREF(result);
2124         return 0;
2125 }
2126
2127 int
2128 PyFile_WriteString(const char *s, PyObject *f)
2129 {
2130         if (f == NULL) {
2131                 /* Should be caused by a pre-existing error */
2132                 if (!PyErr_Occurred())
2133                         PyErr_SetString(PyExc_SystemError,
2134                                         "null file for PyFile_WriteString");
2135                 return -1;
2136         }
2137         else if (PyFile_Check(f)) {
2138                 FILE *fp = PyFile_AsFile(f);
2139                 if (fp == NULL) {
2140                         err_closed();
2141                         return -1;
2142                 }
2143                 fputs(s, fp);
2144                 return 0;
2145         }
2146         else if (!PyErr_Occurred()) {
2147                 PyObject *v = PyString_FromString(s);
2148                 int err;
2149                 if (v == NULL)
2150                         return -1;
2151                 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2152                 Py_DECREF(v);
2153                 return err;
2154         }
2155         else
2156                 return -1;
2157 }
2158
2159 /* Try to get a file-descriptor from a Python object.  If the object
2160    is an integer or long integer, its value is returned.  If not, the
2161    object's fileno() method is called if it exists; the method must return
2162    an integer or long integer, which is returned as the file descriptor value.
2163    -1 is returned on failure.
2164 */
2165
2166 int PyObject_AsFileDescriptor(PyObject *o)
2167 {
2168         int fd;
2169         PyObject *meth;
2170
2171         if (PyInt_Check(o)) {
2172                 fd = PyInt_AsLong(o);
2173         }
2174         else if (PyLong_Check(o)) {
2175                 fd = PyLong_AsLong(o);
2176         }
2177         else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2178         {
2179                 PyObject *fno = PyEval_CallObject(meth, NULL);
2180                 Py_DECREF(meth);
2181                 if (fno == NULL)
2182                         return -1;
2183
2184                 if (PyInt_Check(fno)) {
2185                         fd = PyInt_AsLong(fno);
2186                         Py_DECREF(fno);
2187                 }
2188                 else if (PyLong_Check(fno)) {
2189                         fd = PyLong_AsLong(fno);
2190                         Py_DECREF(fno);
2191                 }
2192                 else {
2193                         PyErr_SetString(PyExc_TypeError,
2194                                         "fileno() returned a non-integer");
2195                         Py_DECREF(fno);
2196                         return -1;
2197                 }
2198         }
2199         else {
2200                 PyErr_SetString(PyExc_TypeError,
2201                                 "argument must be an int, or have a fileno() method.");
2202                 return -1;
2203         }
2204
2205         if (fd < 0) {
2206                 PyErr_Format(PyExc_ValueError,
2207                              "file descriptor cannot be a negative integer (%i)",
2208                              fd);
2209                 return -1;
2210         }
2211         return fd;
2212 }
2213
2214 /* From here on we need access to the real fgets and fread */
2215 #undef fgets
2216 #undef fread
2217
2218 /*
2219 ** Py_UniversalNewlineFgets is an fgets variation that understands
2220 ** all of \r, \n and \r\n conventions.
2221 ** The stream should be opened in binary mode.
2222 ** If fobj is NULL the routine always does newline conversion, and
2223 ** it may peek one char ahead to gobble the second char in \r\n.
2224 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2225 ** is no readahead but in stead a flag is used to skip a following
2226 ** \n on the next read. Also, if the file is open in binary mode
2227 ** the whole conversion is skipped. Finally, the routine keeps track of
2228 ** the different types of newlines seen.
2229 ** Note that we need no error handling: fgets() treats error and eof
2230 ** identically.
2231 */
2232 char *
2233 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2234 {
2235         char *p = buf;
2236         int c;
2237         int newlinetypes = 0;
2238         int skipnextlf = 0;
2239         int univ_newline = 1;
2240
2241         if (fobj) {
2242                 if (!PyFile_Check(fobj)) {
2243                         errno = ENXIO;  /* What can you do... */
2244                         return NULL;
2245                 }
2246                 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2247                 if ( !univ_newline )
2248                         return fgets(buf, n, stream);
2249                 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2250                 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2251         }
2252         FLOCKFILE(stream);
2253         c = 'x'; /* Shut up gcc warning */
2254         while (--n > 0 && (c = GETC(stream)) != EOF ) {
2255                 if (skipnextlf ) {
2256                         skipnextlf = 0;
2257                         if (c == '\n') {
2258                                 /* Seeing a \n here with skipnextlf true
2259                                 ** means we saw a \r before.
2260                                 */
2261                                 newlinetypes |= NEWLINE_CRLF;
2262                                 c = GETC(stream);
2263                                 if (c == EOF) break;
2264                         } else {
2265                                 /*
2266                                 ** Note that c == EOF also brings us here,
2267                                 ** so we're okay if the last char in the file
2268                                 ** is a CR.
2269                                 */
2270                                 newlinetypes |= NEWLINE_CR;
2271                         }
2272                 }
2273                 if (c == '\r') {
2274                         /* A \r is translated into a \n, and we skip
2275                         ** an adjacent \n, if any. We don't set the
2276                         ** newlinetypes flag until we've seen the next char.
2277                         */
2278                         skipnextlf = 1;
2279                         c = '\n';
2280                 } else if ( c == '\n') {
2281                         newlinetypes |= NEWLINE_LF;
2282                 }
2283                 *p++ = c;
2284                 if (c == '\n') break;
2285         }
2286         if ( c == EOF && skipnextlf )
2287                 newlinetypes |= NEWLINE_CR;
2288         FUNLOCKFILE(stream);
2289         *p = '\0';
2290         if (fobj) {
2291                 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2292                 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2293         } else if ( skipnextlf ) {
2294                 /* If we have no file object we cannot save the
2295                 ** skipnextlf flag. We have to readahead, which
2296                 ** will cause a pause if we're reading from an
2297                 ** interactive stream, but that is very unlikely
2298                 ** unless we're doing something silly like
2299                 ** execfile("/dev/tty").
2300                 */
2301                 c = GETC(stream);
2302                 if ( c != '\n' )
2303                         ungetc(c, stream);
2304         }
2305         if (p == buf)
2306                 return NULL;
2307         return buf;
2308 }
2309
2310 /*
2311 ** Py_UniversalNewlineFread is an fread variation that understands
2312 ** all of \r, \n and \r\n conventions.
2313 ** The stream should be opened in binary mode.
2314 ** fobj must be a PyFileObject. In this case there
2315 ** is no readahead but in stead a flag is used to skip a following
2316 ** \n on the next read. Also, if the file is open in binary mode
2317 ** the whole conversion is skipped. Finally, the routine keeps track of
2318 ** the different types of newlines seen.
2319 */
2320 size_t
2321 Py_UniversalNewlineFread(char *buf, size_t n,
2322                          FILE *stream, PyObject *fobj)
2323 {
2324         char *dst = buf;
2325         PyFileObject *f = (PyFileObject *)fobj;
2326         int newlinetypes, skipnextlf;
2327
2328         assert(buf != NULL);
2329         assert(stream != NULL);
2330
2331         if (!fobj || !PyFile_Check(fobj)) {
2332                 errno = ENXIO;  /* What can you do... */
2333                 return 0;
2334         }
2335         if (!f->f_univ_newline)
2336                 return fread(buf, 1, n, stream);
2337         newlinetypes = f->f_newlinetypes;
2338         skipnextlf = f->f_skipnextlf;
2339         /* Invariant:  n is the number of bytes remaining to be filled
2340          * in the buffer.
2341          */
2342         while (n) {
2343                 size_t nread;
2344                 int shortread;
2345                 char *src = dst;
2346
2347                 nread = fread(dst, 1, n, stream);
2348                 assert(nread <= n);
2349                 if (nread == 0)
2350                         break;
2351
2352                 n -= nread; /* assuming 1 byte out for each in; will adjust */
2353                 shortread = n != 0;     /* true iff EOF or error */
2354                 while (nread--) {
2355                         char c = *src++;
2356                         if (c == '\r') {
2357                                 /* Save as LF and set flag to skip next LF. */
2358                                 *dst++ = '\n';
2359                                 skipnextlf = 1;
2360                         }
2361                         else if (skipnextlf && c == '\n') {
2362                                 /* Skip LF, and remember we saw CR LF. */
2363                                 skipnextlf = 0;
2364                                 newlinetypes |= NEWLINE_CRLF;
2365                                 ++n;
2366                         }
2367                         else {
2368                                 /* Normal char to be stored in buffer.  Also
2369                                  * update the newlinetypes flag if either this
2370                                  * is an LF or the previous char was a CR.
2371                                  */
2372                                 if (c == '\n')
2373                                         newlinetypes |= NEWLINE_LF;
2374                                 else if (skipnextlf)
2375                                         newlinetypes |= NEWLINE_CR;
2376                                 *dst++ = c;
2377                                 skipnextlf = 0;
2378                         }
2379                 }
2380                 if (shortread) {
2381                         /* If this is EOF, update type flags. */
2382                         if (skipnextlf && feof(stream))
2383                                 newlinetypes |= NEWLINE_CR;
2384                         break;
2385                 }
2386         }
2387         f->f_newlinetypes = newlinetypes;
2388         f->f_skipnextlf = skipnextlf;
2389         return dst - buf;
2390 }