Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include "structmember.h"
   6
   7 #ifndef DONT_HAVE_SYS_TYPES_H
   8 #include <sys/types.h>
   9 #endif /* DONT_HAVE_SYS_TYPES_H */
  10
  11 #ifdef MS_WINDOWS
  12 #define fileno _fileno
  13 /* can simulate truncate with Win32 API functions; see file_truncate */
  14 #define HAVE_FTRUNCATE
  15 #define WIN32_LEAN_AND_MEAN
  16 #include <windows.h>
  17 #endif
  18
  19 #ifdef _MSC_VER
  20 /* Need GetVersion to see if on NT so safe to use _wfopen */
  21 #define WIN32_LEAN_AND_MEAN
  22 #include <windows.h>
  23 #endif /* _MSC_VER */
  24
  25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  26 #include <io.h>
  27 #endif
  28
  29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  30
  31 #ifndef DONT_HAVE_ERRNO_H
  32 #include <errno.h>
  33 #endif
  34
  35 #ifdef HAVE_GETC_UNLOCKED
  36 #define GETC(f) getc_unlocked(f)
  37 #define FLOCKFILE(f) flockfile(f)
  38 #define FUNLOCKFILE(f) funlockfile(f)
  39 #else
  40 #define GETC(f) getc(f)
  41 #define FLOCKFILE(f)
  42 #define FUNLOCKFILE(f)
  43 #endif
  44
  45 /* Bits in f_newlinetypes */
  46 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  47 #define NEWLINE_CR 1            /* \r newline seen */
  48 #define NEWLINE_LF 2            /* \n newline seen */
  49 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  50
  51 FILE *
  52 PyFile_AsFile(PyObject *f)
  53 {
  54         if (f == NULL || !PyFile_Check(f))
  55                 return NULL;
  56         else
  57                 return ((PyFileObject *)f)->f_fp;
  58 }
  59
  60 PyObject *
  61 PyFile_Name(PyObject *f)
  62 {
  63         if (f == NULL || !PyFile_Check(f))
  64                 return NULL;
  65         else
  66                 return ((PyFileObject *)f)->f_name;
  67 }
  68
  69 /* On Unix, fopen will succeed for directories.
  70    In Python, there should be no file objects referring to
  71    directories, so we need a check.  */
  72
  73 static PyFileObject*
  74 dircheck(PyFileObject* f)
  75 {
  76 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
  77         struct stat buf;
  78         if (f->f_fp == NULL)
  79                 return f;
  80         if (fstat(fileno(f->f_fp), &buf) == 0 &&
  81             S_ISDIR(buf.st_mode)) {
  82 #ifdef HAVE_STRERROR
  83                 char *msg = strerror(EISDIR);
  84 #else
  85                 char *msg = "Is a directory";
  86 #endif
  87                 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
  88                                                       EISDIR, msg);
  89                 PyErr_SetObject(PyExc_IOError, exc);
  90                 Py_XDECREF(exc);
  91                 return NULL;
  92         }
  93 #endif
  94         return f;
  95 }
  96
  97
  98 static PyObject *
  99 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
 100                  int (*close)(FILE *))
 101 {
 102         assert(f != NULL);
 103         assert(PyFile_Check(f));
 104         assert(f->f_fp == NULL);
 105
 106         Py_DECREF(f->f_name);
 107         Py_DECREF(f->f_mode);
 108         Py_DECREF(f->f_encoding);
 109
 110         Py_INCREF (name);
 111         f->f_name = name;
 112
 113         f->f_mode = PyString_FromString(mode);
 114
 115         f->f_close = close;
 116         f->f_softspace = 0;
 117         f->f_binary = strchr(mode,'b') != NULL;
 118         f->f_buf = NULL;
 119         f->f_univ_newline = (strchr(mode, 'U') != NULL);
 120         f->f_newlinetypes = NEWLINE_UNKNOWN;
 121         f->f_skipnextlf = 0;
 122         Py_INCREF(Py_None);
 123         f->f_encoding = Py_None;
 124
 125         if (f->f_name == NULL || f->f_mode == NULL)
 126                 return NULL;
 127         f->f_fp = fp;
 128         f = dircheck(f);
 129         return (PyObject *) f;
 130 }
 131
 132 /* check for known incorrect mode strings - problem is, platforms are
 133    free to accept any mode characters they like and are supposed to
 134    ignore stuff they don't understand... write or append mode with
 135    universal newline support is expressly forbidden by PEP 278. */
 136 /* zero return is kewl - one is un-kewl */
 137 static int
 138 check_the_mode(char *mode)
 139 {
 140         size_t len = strlen(mode);
 141
 142         switch (len) {
 143         case 0:
 144                 PyErr_SetString(PyExc_ValueError, "empty mode string");
 145                 return 1;
 146
 147         /* reject wU, aU */
 148         case 2:
 149                 switch (mode[0]) {
 150                 case 'w':
 151                 case 'a':
 152                         if (mode[1] == 'U') {
 153                                 PyErr_SetString(PyExc_ValueError,
 154                                                 "invalid mode string");
 155                                 return 1;
 156                         }
 157                         break;
 158                 }
 159                 break;
 160
 161         /* reject w+U, a+U, wU+, aU+ */
 162         case 3:
 163                 switch (mode[0]) {
 164                 case 'w':
 165                 case 'a':
 166                         if ((mode[1] == '+' && mode[2] == 'U') ||
 167                             (mode[1] == 'U' && mode[2] == '+')) {
 168                                 PyErr_SetString(PyExc_ValueError,
 169                                                 "invalid mode string");
 170                                 return 1;
 171                         }
 172                         break;
 173                 }
 174                 break;
 175         }
 176
 177         return 0;
 178 }
 179
 180 static PyObject *
 181 open_the_file(PyFileObject *f, char *name, char *mode)
 182 {
 183         assert(f != NULL);
 184         assert(PyFile_Check(f));
 185 #ifdef MS_WINDOWS
 186         /* windows ignores the passed name in order to support Unicode */
 187         assert(f->f_name != NULL);
 188 #else
 189         assert(name != NULL);
 190 #endif
 191         assert(mode != NULL);
 192         assert(f->f_fp == NULL);
 193
 194         if (check_the_mode(mode))
 195                 return NULL;
 196
 197         /* rexec.py can't stop a user from getting the file() constructor --
 198            all they have to do is get *any* file object f, and then do
 199            type(f).  Here we prevent them from doing damage with it. */
 200         if (PyEval_GetRestricted()) {
 201                 PyErr_SetString(PyExc_IOError,
 202                 "file() constructor not accessible in restricted mode");
 203                 return NULL;
 204         }
 205         errno = 0;
 206
 207         if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
 208                 mode = "rb";
 209 #ifdef MS_WINDOWS
 210         if (PyUnicode_Check(f->f_name)) {
 211                 PyObject *wmode;
 212                 wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
 213                 if (f->f_name && wmode) {
 214                         Py_BEGIN_ALLOW_THREADS
 215                         /* PyUnicode_AS_UNICODE OK without thread
 216                            lock as it is a simple dereference. */
 217                         f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 218                                           PyUnicode_AS_UNICODE(wmode));
 219                         Py_END_ALLOW_THREADS
 220                 }
 221                 Py_XDECREF(wmode);
 222         }
 223 #endif
 224         if (NULL == f->f_fp && NULL != name) {
 225                 Py_BEGIN_ALLOW_THREADS
 226                 f->f_fp = fopen(name, mode);
 227                 Py_END_ALLOW_THREADS
 228         }
 229
 230         if (f->f_fp == NULL) {
 231 #ifdef _MSC_VER
 232                 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 233                  * across all Windows flavors.  When it sets EINVAL varies
 234                  * across Windows flavors, the exact conditions aren't
 235                  * documented, and the answer lies in the OS's implementation
 236                  * of Win32's CreateFile function (whose source is secret).
 237                  * Seems the best we can do is map EINVAL to ENOENT.
 238                  */
 239                 if (errno == 0) /* bad mode string */
 240                         errno = EINVAL;
 241                 else if (errno == EINVAL) /* unknown, but not a mode string */
 242                         errno = ENOENT;
 243 #endif
 244                 if (errno == EINVAL)
 245                         PyErr_Format(PyExc_IOError, "invalid mode: %s",
 246                                      mode);
 247                 else
 248                         PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 249                 f = NULL;
 250         }
 251         if (f != NULL)
 252                 f = dircheck(f);
 253         return (PyObject *)f;
 254 }
 255
 256 PyObject *
 257 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 258 {
 259         PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 260                                                              NULL, NULL);
 261         if (f != NULL) {
 262                 PyObject *o_name = PyString_FromString(name);
 263                 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 264                         Py_DECREF(f);
 265                         f = NULL;
 266                 }
 267                 Py_DECREF(o_name);
 268         }
 269         return (PyObject *) f;
 270 }
 271
 272 PyObject *
 273 PyFile_FromString(char *name, char *mode)
 274 {
 275         extern int fclose(FILE *);
 276         PyFileObject *f;
 277
 278         f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 279         if (f != NULL) {
 280                 if (open_the_file(f, name, mode) == NULL) {
 281                         Py_DECREF(f);
 282                         f = NULL;
 283                 }
 284         }
 285         return (PyObject *)f;
 286 }
 287
 288 void
 289 PyFile_SetBufSize(PyObject *f, int bufsize)
 290 {
 291         PyFileObject *file = (PyFileObject *)f;
 292         if (bufsize >= 0) {
 293                 int type;
 294                 switch (bufsize) {
 295                 case 0:
 296                         type = _IONBF;
 297                         break;
 298 #ifdef HAVE_SETVBUF
 299                 case 1:
 300                         type = _IOLBF;
 301                         bufsize = BUFSIZ;
 302                         break;
 303 #endif
 304                 default:
 305                         type = _IOFBF;
 306 #ifndef HAVE_SETVBUF
 307                         bufsize = BUFSIZ;
 308 #endif
 309                         break;
 310                 }
 311                 fflush(file->f_fp);
 312                 if (type == _IONBF) {
 313                         PyMem_Free(file->f_setbuf);
 314                         file->f_setbuf = NULL;
 315                 } else {
 316                         file->f_setbuf = PyMem_Realloc(file->f_setbuf, bufsize);
 317                 }
 318 #ifdef HAVE_SETVBUF
 319                 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 320 #else /* !HAVE_SETVBUF */
 321                 setbuf(file->f_fp, file->f_setbuf);
 322 #endif /* !HAVE_SETVBUF */
 323         }
 324 }
 325
 326 /* Set the encoding used to output Unicode strings.
 327    Returh 1 on success, 0 on failure. */
 328
 329 int
 330 PyFile_SetEncoding(PyObject *f, const char *enc)
 331 {
 332         PyFileObject *file = (PyFileObject*)f;
 333         PyObject *str = PyString_FromString(enc);
 334         if (!str)
 335                 return 0;
 336         Py_DECREF(file->f_encoding);
 337         file->f_encoding = str;
 338         return 1;
 339 }
 340
 341 static PyObject *
 342 err_closed(void)
 343 {
 344         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 345         return NULL;
 346 }
 347
 348 /* Refuse regular file I/O if there's data in the iteration-buffer.
 349  * Mixing them would cause data to arrive out of order, as the read*
 350  * methods don't use the iteration buffer. */
 351 static PyObject *
 352 err_iterbuffered(void)
 353 {
 354         PyErr_SetString(PyExc_ValueError,
 355                 "Mixing iteration and read methods would lose data");
 356         return NULL;
 357 }
 358
 359 static void drop_readahead(PyFileObject *);
 360
 361 /* Methods */
 362
 363 static void
 364 file_dealloc(PyFileObject *f)
 365 {
 366         int sts = 0;
 367         if (f->weakreflist != NULL)
 368                 PyObject_ClearWeakRefs((PyObject *) f);
 369         if (f->f_fp != NULL && f->f_close != NULL) {
 370                 Py_BEGIN_ALLOW_THREADS
 371                 sts = (*f->f_close)(f->f_fp);
 372                 Py_END_ALLOW_THREADS
 373                 if (sts == EOF)
 374 #ifdef HAVE_STRERROR
 375                         PySys_WriteStderr("close failed: [Errno %d] %s\n", errno, strerror(errno));
 376 #else
 377                         PySys_WriteStderr("close failed: [Errno %d]\n", errno);
 378 #endif
 379         }
 380         PyMem_Free(f->f_setbuf);
 381         Py_XDECREF(f->f_name);
 382         Py_XDECREF(f->f_mode);
 383         Py_XDECREF(f->f_encoding);
 384         drop_readahead(f);
 385         f->ob_type->tp_free((PyObject *)f);
 386 }
 387
 388 static PyObject *
 389 file_repr(PyFileObject *f)
 390 {
 391         if (PyUnicode_Check(f->f_name)) {
 392 #ifdef Py_USING_UNICODE
 393                 PyObject *ret = NULL;
 394                 PyObject *name;
 395                 name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 396                 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 397                                    f->f_fp == NULL ? "closed" : "open",
 398                                    PyString_AsString(name),
 399                                    PyString_AsString(f->f_mode),
 400                                    f);
 401                 Py_XDECREF(name);
 402                 return ret;
 403 #endif
 404         } else {
 405                 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 406                                    f->f_fp == NULL ? "closed" : "open",
 407                                    PyString_AsString(f->f_name),
 408                                    PyString_AsString(f->f_mode),
 409                                    f);
 410         }
 411 }
 412
 413 static PyObject *
 414 file_close(PyFileObject *f)
 415 {
 416         int sts = 0;
 417         if (f->f_fp != NULL) {
 418                 if (f->f_close != NULL) {
 419                         Py_BEGIN_ALLOW_THREADS
 420                         errno = 0;
 421                         sts = (*f->f_close)(f->f_fp);
 422                         Py_END_ALLOW_THREADS
 423                 }
 424                 f->f_fp = NULL;
 425         }
 426         PyMem_Free(f->f_setbuf);
 427         f->f_setbuf = NULL;
 428         if (sts == EOF)
 429                 return PyErr_SetFromErrno(PyExc_IOError);
 430         if (sts != 0)
 431                 return PyInt_FromLong((long)sts);
 432         Py_INCREF(Py_None);
 433         return Py_None;
 434 }
 435
 436
 437 /* Our very own off_t-like type, 64-bit if possible */
 438 #if !defined(HAVE_LARGEFILE_SUPPORT)
 439 typedef off_t Py_off_t;
 440 #elif SIZEOF_OFF_T >= 8
 441 typedef off_t Py_off_t;
 442 #elif SIZEOF_FPOS_T >= 8
 443 typedef fpos_t Py_off_t;
 444 #else
 445 #error "Large file support, but neither off_t nor fpos_t is large enough."
 446 #endif
 447
 448
 449 /* a portable fseek() function
 450    return 0 on success, non-zero on failure (with errno set) */
 451 static int
 452 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 453 {
 454 #if !defined(HAVE_LARGEFILE_SUPPORT)
 455         return fseek(fp, offset, whence);
 456 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 457         return fseeko(fp, offset, whence);
 458 #elif defined(HAVE_FSEEK64)
 459         return fseek64(fp, offset, whence);
 460 #elif defined(__BEOS__)
 461         return _fseek(fp, offset, whence);
 462 #elif SIZEOF_FPOS_T >= 8
 463         /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 464            and fgetpos() to implement fseek()*/
 465         fpos_t pos;
 466         switch (whence) {
 467         case SEEK_END:
 468 #ifdef MS_WINDOWS
 469                 fflush(fp);
 470                 if (_lseeki64(fileno(fp), 0, 2) == -1)
 471                         return -1;
 472 #else
 473                 if (fseek(fp, 0, SEEK_END) != 0)
 474                         return -1;
 475 #endif
 476                 /* fall through */
 477         case SEEK_CUR:
 478                 if (fgetpos(fp, &pos) != 0)
 479                         return -1;
 480                 offset += pos;
 481                 break;
 482         /* case SEEK_SET: break; */
 483         }
 484         return fsetpos(fp, &offset);
 485 #else
 486 #error "Large file support, but no way to fseek."
 487 #endif
 488 }
 489
 490
 491 /* a portable ftell() function
 492    Return -1 on failure with errno set appropriately, current file
 493    position on success */
 494 static Py_off_t
 495 _portable_ftell(FILE* fp)
 496 {
 497 #if !defined(HAVE_LARGEFILE_SUPPORT)
 498         return ftell(fp);
 499 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 500         return ftello(fp);
 501 #elif defined(HAVE_FTELL64)
 502         return ftell64(fp);
 503 #elif SIZEOF_FPOS_T >= 8
 504         fpos_t pos;
 505         if (fgetpos(fp, &pos) != 0)
 506                 return -1;
 507         return pos;
 508 #else
 509 #error "Large file support, but no way to ftell."
 510 #endif
 511 }
 512
 513
 514 static PyObject *
 515 file_seek(PyFileObject *f, PyObject *args)
 516 {
 517         int whence;
 518         int ret;
 519         Py_off_t offset;
 520         PyObject *offobj;
 521
 522         if (f->f_fp == NULL)
 523                 return err_closed();
 524         drop_readahead(f);
 525         whence = 0;
 526         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 527                 return NULL;
 528 #if !defined(HAVE_LARGEFILE_SUPPORT)
 529         offset = PyInt_AsLong(offobj);
 530 #else
 531         offset = PyLong_Check(offobj) ?
 532                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
 533 #endif
 534         if (PyErr_Occurred())
 535                 return NULL;
 536
 537         Py_BEGIN_ALLOW_THREADS
 538         errno = 0;
 539         ret = _portable_fseek(f->f_fp, offset, whence);
 540         Py_END_ALLOW_THREADS
 541
 542         if (ret != 0) {
 543                 PyErr_SetFromErrno(PyExc_IOError);
 544                 clearerr(f->f_fp);
 545                 return NULL;
 546         }
 547         f->f_skipnextlf = 0;
 548         Py_INCREF(Py_None);
 549         return Py_None;
 550 }
 551
 552
 553 #ifdef HAVE_FTRUNCATE
 554 static PyObject *
 555 file_truncate(PyFileObject *f, PyObject *args)
 556 {
 557         Py_off_t newsize;
 558         PyObject *newsizeobj = NULL;
 559         Py_off_t initialpos;
 560         int ret;
 561
 562         if (f->f_fp == NULL)
 563                 return err_closed();
 564         if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 565                 return NULL;
 566
 567         /* Get current file position.  If the file happens to be open for
 568          * update and the last operation was an input operation, C doesn't
 569          * define what the later fflush() will do, but we promise truncate()
 570          * won't change the current position (and fflush() *does* change it
 571          * then at least on Windows).  The easiest thing is to capture
 572          * current pos now and seek back to it at the end.
 573          */
 574         Py_BEGIN_ALLOW_THREADS
 575         errno = 0;
 576         initialpos = _portable_ftell(f->f_fp);
 577         Py_END_ALLOW_THREADS
 578         if (initialpos == -1)
 579                 goto onioerror;
 580
 581         /* Set newsize to current postion if newsizeobj NULL, else to the
 582          * specified value.
 583          */
 584         if (newsizeobj != NULL) {
 585 #if !defined(HAVE_LARGEFILE_SUPPORT)
 586                 newsize = PyInt_AsLong(newsizeobj);
 587 #else
 588                 newsize = PyLong_Check(newsizeobj) ?
 589                                 PyLong_AsLongLong(newsizeobj) :
 590                                 PyInt_AsLong(newsizeobj);
 591 #endif
 592                 if (PyErr_Occurred())
 593                         return NULL;
 594         }
 595         else /* default to current position */
 596                 newsize = initialpos;
 597
 598         /* Flush the stream.  We're mixing stream-level I/O with lower-level
 599          * I/O, and a flush may be necessary to synch both platform views
 600          * of the current file state.
 601          */
 602         Py_BEGIN_ALLOW_THREADS
 603         errno = 0;
 604         ret = fflush(f->f_fp);
 605         Py_END_ALLOW_THREADS
 606         if (ret != 0)
 607                 goto onioerror;
 608
 609 #ifdef MS_WINDOWS
 610         /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 611            so don't even try using it. */
 612         {
 613                 HANDLE hFile;
 614
 615                 /* Have to move current pos to desired endpoint on Windows. */
 616                 Py_BEGIN_ALLOW_THREADS
 617                 errno = 0;
 618                 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 619                 Py_END_ALLOW_THREADS
 620                 if (ret)
 621                         goto onioerror;
 622
 623                 /* Truncate.  Note that this may grow the file! */
 624                 Py_BEGIN_ALLOW_THREADS
 625                 errno = 0;
 626                 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 627                 ret = hFile == (HANDLE)-1;
 628                 if (ret == 0) {
 629                         ret = SetEndOfFile(hFile) == 0;
 630                         if (ret)
 631                                 errno = EACCES;
 632                 }
 633                 Py_END_ALLOW_THREADS
 634                 if (ret)
 635                         goto onioerror;
 636         }
 637 #else
 638         Py_BEGIN_ALLOW_THREADS
 639         errno = 0;
 640         ret = ftruncate(fileno(f->f_fp), newsize);
 641         Py_END_ALLOW_THREADS
 642         if (ret != 0)
 643                 goto onioerror;
 644 #endif /* !MS_WINDOWS */
 645
 646         /* Restore original file position. */
 647         Py_BEGIN_ALLOW_THREADS
 648         errno = 0;
 649         ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 650         Py_END_ALLOW_THREADS
 651         if (ret)
 652                 goto onioerror;
 653
 654         Py_INCREF(Py_None);
 655         return Py_None;
 656
 657 onioerror:
 658         PyErr_SetFromErrno(PyExc_IOError);
 659         clearerr(f->f_fp);
 660         return NULL;
 661 }
 662 #endif /* HAVE_FTRUNCATE */
 663
 664 static PyObject *
 665 file_tell(PyFileObject *f)
 666 {
 667         Py_off_t pos;
 668
 669         if (f->f_fp == NULL)
 670                 return err_closed();
 671         Py_BEGIN_ALLOW_THREADS
 672         errno = 0;
 673         pos = _portable_ftell(f->f_fp);
 674         Py_END_ALLOW_THREADS
 675         if (pos == -1) {
 676                 PyErr_SetFromErrno(PyExc_IOError);
 677                 clearerr(f->f_fp);
 678                 return NULL;
 679         }
 680         if (f->f_skipnextlf) {
 681                 int c;
 682                 c = GETC(f->f_fp);
 683                 if (c == '\n') {
 684                         pos++;
 685                         f->f_skipnextlf = 0;
 686                 } else if (c != EOF) ungetc(c, f->f_fp);
 687         }
 688 #if !defined(HAVE_LARGEFILE_SUPPORT)
 689         return PyInt_FromLong(pos);
 690 #else
 691         return PyLong_FromLongLong(pos);
 692 #endif
 693 }
 694
 695 static PyObject *
 696 file_fileno(PyFileObject *f)
 697 {
 698         if (f->f_fp == NULL)
 699                 return err_closed();
 700         return PyInt_FromLong((long) fileno(f->f_fp));
 701 }
 702
 703 static PyObject *
 704 file_flush(PyFileObject *f)
 705 {
 706         int res;
 707
 708         if (f->f_fp == NULL)
 709                 return err_closed();
 710         Py_BEGIN_ALLOW_THREADS
 711         errno = 0;
 712         res = fflush(f->f_fp);
 713         Py_END_ALLOW_THREADS
 714         if (res != 0) {
 715                 PyErr_SetFromErrno(PyExc_IOError);
 716                 clearerr(f->f_fp);
 717                 return NULL;
 718         }
 719         Py_INCREF(Py_None);
 720         return Py_None;
 721 }
 722
 723 static PyObject *
 724 file_isatty(PyFileObject *f)
 725 {
 726         long res;
 727         if (f->f_fp == NULL)
 728                 return err_closed();
 729         Py_BEGIN_ALLOW_THREADS
 730         res = isatty((int)fileno(f->f_fp));
 731         Py_END_ALLOW_THREADS
 732         return PyBool_FromLong(res);
 733 }
 734
 735
 736 #if BUFSIZ < 8192
 737 #define SMALLCHUNK 8192
 738 #else
 739 #define SMALLCHUNK BUFSIZ
 740 #endif
 741
 742 #if SIZEOF_INT < 4
 743 #define BIGCHUNK  (512 * 32)
 744 #else
 745 #define BIGCHUNK  (512 * 1024)
 746 #endif
 747
 748 static size_t
 749 new_buffersize(PyFileObject *f, size_t currentsize)
 750 {
 751 #ifdef HAVE_FSTAT
 752         off_t pos, end;
 753         struct stat st;
 754         if (fstat(fileno(f->f_fp), &st) == 0) {
 755                 end = st.st_size;
 756                 /* The following is not a bug: we really need to call lseek()
 757                    *and* ftell().  The reason is that some stdio libraries
 758                    mistakenly flush their buffer when ftell() is called and
 759                    the lseek() call it makes fails, thereby throwing away
 760                    data that cannot be recovered in any way.  To avoid this,
 761                    we first test lseek(), and only call ftell() if lseek()
 762                    works.  We can't use the lseek() value either, because we
 763                    need to take the amount of buffered data into account.
 764                    (Yet another reason why stdio stinks. :-) */
 765                 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
 766                 if (pos >= 0) {
 767                         pos = ftell(f->f_fp);
 768                 }
 769                 if (pos < 0)
 770                         clearerr(f->f_fp);
 771                 if (end > pos && pos >= 0)
 772                         return currentsize + end - pos + 1;
 773                 /* Add 1 so if the file were to grow we'd notice. */
 774         }
 775 #endif
 776         if (currentsize > SMALLCHUNK) {
 777                 /* Keep doubling until we reach BIGCHUNK;
 778                    then keep adding BIGCHUNK. */
 779                 if (currentsize <= BIGCHUNK)
 780                         return currentsize + currentsize;
 781                 else
 782                         return currentsize + BIGCHUNK;
 783         }
 784         return currentsize + SMALLCHUNK;
 785 }
 786
 787 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
 788 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
 789 #else
 790 #ifdef EWOULDBLOCK
 791 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
 792 #else
 793 #ifdef EAGAIN
 794 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
 795 #else
 796 #define BLOCKED_ERRNO(x) 0
 797 #endif
 798 #endif
 799 #endif
 800
 801 static PyObject *
 802 file_read(PyFileObject *f, PyObject *args)
 803 {
 804         long bytesrequested = -1;
 805         size_t bytesread, buffersize, chunksize;
 806         PyObject *v;
 807
 808         if (f->f_fp == NULL)
 809                 return err_closed();
 810         /* refuse to mix with f.next() */
 811         if (f->f_buf != NULL &&
 812             (f->f_bufend - f->f_bufptr) > 0 &&
 813             f->f_buf[0] != '\0')
 814                 return err_iterbuffered();
 815         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 816                 return NULL;
 817         if (bytesrequested < 0)
 818                 buffersize = new_buffersize(f, (size_t)0);
 819         else
 820                 buffersize = bytesrequested;
 821         if (buffersize > INT_MAX) {
 822                 PyErr_SetString(PyExc_OverflowError,
 823         "requested number of bytes is more than a Python string can hold");
 824                 return NULL;
 825         }
 826         v = PyString_FromStringAndSize((char *)NULL, buffersize);
 827         if (v == NULL)
 828                 return NULL;
 829         bytesread = 0;
 830         for (;;) {
 831                 Py_BEGIN_ALLOW_THREADS
 832                 errno = 0;
 833                 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
 834                           buffersize - bytesread, f->f_fp, (PyObject *)f);
 835                 Py_END_ALLOW_THREADS
 836                 if (chunksize == 0) {
 837                         if (!ferror(f->f_fp))
 838                                 break;
 839                         clearerr(f->f_fp);
 840                         /* When in non-blocking mode, data shouldn't
 841                          * be discarded if a blocking signal was
 842                          * received. That will also happen if
 843                          * chunksize != 0, but bytesread < buffersize. */
 844                         if (bytesread > 0 && BLOCKED_ERRNO(errno))
 845                                 break;
 846                         PyErr_SetFromErrno(PyExc_IOError);
 847                         Py_DECREF(v);
 848                         return NULL;
 849                 }
 850                 bytesread += chunksize;
 851                 if (bytesread < buffersize) {
 852                         clearerr(f->f_fp);
 853                         break;
 854                 }
 855                 if (bytesrequested < 0) {
 856                         buffersize = new_buffersize(f, buffersize);
 857                         if (_PyString_Resize(&v, buffersize) < 0)
 858                                 return NULL;
 859                 } else {
 860                         /* Got what was requested. */
 861                         break;
 862                 }
 863         }
 864         if (bytesread != buffersize)
 865                 _PyString_Resize(&v, bytesread);
 866         return v;
 867 }
 868
 869 static PyObject *
 870 file_readinto(PyFileObject *f, PyObject *args)
 871 {
 872         char *ptr;
 873         Py_ssize_t ntodo;
 874         Py_ssize_t ndone, nnow;
 875
 876         if (f->f_fp == NULL)
 877                 return err_closed();
 878         /* refuse to mix with f.next() */
 879         if (f->f_buf != NULL &&
 880             (f->f_bufend - f->f_bufptr) > 0 &&
 881             f->f_buf[0] != '\0')
 882                 return err_iterbuffered();
 883         if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
 884                 return NULL;
 885         ndone = 0;
 886         while (ntodo > 0) {
 887                 Py_BEGIN_ALLOW_THREADS
 888                 errno = 0;
 889                 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
 890                                                 (PyObject *)f);
 891                 Py_END_ALLOW_THREADS
 892                 if (nnow == 0) {
 893                         if (!ferror(f->f_fp))
 894                                 break;
 895                         PyErr_SetFromErrno(PyExc_IOError);
 896                         clearerr(f->f_fp);
 897                         return NULL;
 898                 }
 899                 ndone += nnow;
 900                 ntodo -= nnow;
 901         }
 902         return PyInt_FromLong((long)ndone);
 903 }
 904
 905 /**************************************************************************
 906 Routine to get next line using platform fgets().
 907
 908 Under MSVC 6:
 909
 910 + MS threadsafe getc is very slow (multiple layers of function calls before+
 911   after each character, to lock+unlock the stream).
 912 + The stream-locking functions are MS-internal -- can't access them from user
 913   code.
 914 + There's nothing Tim could find in the MS C or platform SDK libraries that
 915   can worm around this.
 916 + MS fgets locks/unlocks only once per line; it's the only hook we have.
 917
 918 So we use fgets for speed(!), despite that it's painful.
 919
 920 MS realloc is also slow.
 921
 922 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
 923 have):
 924         Linux           a wash
 925         Solaris         a wash
 926         Tru64 Unix      getline_via_fgets significantly faster
 927
 928 CAUTION:  The C std isn't clear about this:  in those cases where fgets
 929 writes something into the buffer, can it write into any position beyond the
 930 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
 931 known on which it does; and it would be a strange way to code fgets. Still,
 932 getline_via_fgets may not work correctly if it does.  The std test
 933 test_bufio.py should fail if platform fgets() routinely writes beyond the
 934 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
 935 **************************************************************************/
 936
 937 /* Use this routine if told to, or by default on non-get_unlocked()
 938  * platforms unless told not to.  Yikes!  Let's spell that out:
 939  * On a platform with getc_unlocked():
 940  *     By default, use getc_unlocked().
 941  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
 942  * On a platform without getc_unlocked():
 943  *     By default, use fgets().
 944  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
 945  */
 946 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
 947 #define USE_FGETS_IN_GETLINE
 948 #endif
 949
 950 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
 951 #undef USE_FGETS_IN_GETLINE
 952 #endif
 953
 954 #ifdef USE_FGETS_IN_GETLINE
 955 static PyObject*
 956 getline_via_fgets(FILE *fp)
 957 {
 958 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
 959  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
 960  * to fill this much of the buffer with a known value in order to figure out
 961  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
 962  * than "most" lines, we waste time filling unused buffer slots.  100 is
 963  * surely adequate for most peoples' email archives, chewing over source code,
 964  * etc -- "regular old text files".
 965  * MAXBUFSIZE is the maximum line length that lets us get away with the less
 966  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
 967  * cautions about boosting that.  300 was chosen because the worst real-life
 968  * text-crunching job reported on Python-Dev was a mail-log crawler where over
 969  * half the lines were 254 chars.
 970  */
 971 #define INITBUFSIZE 100
 972 #define MAXBUFSIZE 300
 973         char* p;        /* temp */
 974         char buf[MAXBUFSIZE];
 975         PyObject* v;    /* the string object result */
 976         char* pvfree;   /* address of next free slot */
 977         char* pvend;    /* address one beyond last free slot */
 978         size_t nfree;   /* # of free buffer slots; pvend-pvfree */
 979         size_t total_v_size;  /* total # of slots in buffer */
 980         size_t increment;       /* amount to increment the buffer */
 981
 982         /* Optimize for normal case:  avoid _PyString_Resize if at all
 983          * possible via first reading into stack buffer "buf".
 984          */
 985         total_v_size = INITBUFSIZE;     /* start small and pray */
 986         pvfree = buf;
 987         for (;;) {
 988                 Py_BEGIN_ALLOW_THREADS
 989                 pvend = buf + total_v_size;
 990                 nfree = pvend - pvfree;
 991                 memset(pvfree, '\n', nfree);
 992                 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
 993                 p = fgets(pvfree, (int)nfree, fp);
 994                 Py_END_ALLOW_THREADS
 995
 996                 if (p == NULL) {
 997                         clearerr(fp);
 998                         if (PyErr_CheckSignals())
 999                                 return NULL;
1000                         v = PyString_FromStringAndSize(buf, pvfree - buf);
1001                         return v;
1002                 }
1003                 /* fgets read *something* */
1004                 p = memchr(pvfree, '\n', nfree);
1005                 if (p != NULL) {
1006                         /* Did the \n come from fgets or from us?
1007                          * Since fgets stops at the first \n, and then writes
1008                          * \0, if it's from fgets a \0 must be next.  But if
1009                          * that's so, it could not have come from us, since
1010                          * the \n's we filled the buffer with have only more
1011                          * \n's to the right.
1012                          */
1013                         if (p+1 < pvend && *(p+1) == '\0') {
1014                                 /* It's from fgets:  we win!  In particular,
1015                                  * we haven't done any mallocs yet, and can
1016                                  * build the final result on the first try.
1017                                  */
1018                                 ++p;    /* include \n from fgets */
1019                         }
1020                         else {
1021                                 /* Must be from us:  fgets didn't fill the
1022                                  * buffer and didn't find a newline, so it
1023                                  * must be the last and newline-free line of
1024                                  * the file.
1025                                  */
1026                                 assert(p > pvfree && *(p-1) == '\0');
1027                                 --p;    /* don't include \0 from fgets */
1028                         }
1029                         v = PyString_FromStringAndSize(buf, p - buf);
1030                         return v;
1031                 }
1032                 /* yuck:  fgets overwrote all the newlines, i.e. the entire
1033                  * buffer.  So this line isn't over yet, or maybe it is but
1034                  * we're exactly at EOF.  If we haven't already, try using the
1035                  * rest of the stack buffer.
1036                  */
1037                 assert(*(pvend-1) == '\0');
1038                 if (pvfree == buf) {
1039                         pvfree = pvend - 1;     /* overwrite trailing null */
1040                         total_v_size = MAXBUFSIZE;
1041                 }
1042                 else
1043                         break;
1044         }
1045
1046         /* The stack buffer isn't big enough; malloc a string object and read
1047          * into its buffer.
1048          */
1049         total_v_size = MAXBUFSIZE << 1;
1050         v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1051         if (v == NULL)
1052                 return v;
1053         /* copy over everything except the last null byte */
1054         memcpy(BUF(v), buf, MAXBUFSIZE-1);
1055         pvfree = BUF(v) + MAXBUFSIZE - 1;
1056
1057         /* Keep reading stuff into v; if it ever ends successfully, break
1058          * after setting p one beyond the end of the line.  The code here is
1059          * very much like the code above, except reads into v's buffer; see
1060          * the code above for detailed comments about the logic.
1061          */
1062         for (;;) {
1063                 Py_BEGIN_ALLOW_THREADS
1064                 pvend = BUF(v) + total_v_size;
1065                 nfree = pvend - pvfree;
1066                 memset(pvfree, '\n', nfree);
1067                 assert(nfree < INT_MAX);
1068                 p = fgets(pvfree, (int)nfree, fp);
1069                 Py_END_ALLOW_THREADS
1070
1071                 if (p == NULL) {
1072                         clearerr(fp);
1073                         if (PyErr_CheckSignals()) {
1074                                 Py_DECREF(v);
1075                                 return NULL;
1076                         }
1077                         p = pvfree;
1078                         break;
1079                 }
1080                 p = memchr(pvfree, '\n', nfree);
1081                 if (p != NULL) {
1082                         if (p+1 < pvend && *(p+1) == '\0') {
1083                                 /* \n came from fgets */
1084                                 ++p;
1085                                 break;
1086                         }
1087                         /* \n came from us; last line of file, no newline */
1088                         assert(p > pvfree && *(p-1) == '\0');
1089                         --p;
1090                         break;
1091                 }
1092                 /* expand buffer and try again */
1093                 assert(*(pvend-1) == '\0');
1094                 increment = total_v_size >> 2;  /* mild exponential growth */
1095                 total_v_size += increment;
1096                 if (total_v_size > INT_MAX) {
1097                         PyErr_SetString(PyExc_OverflowError,
1098                             "line is longer than a Python string can hold");
1099                         Py_DECREF(v);
1100                         return NULL;
1101                 }
1102                 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1103                         return NULL;
1104                 /* overwrite the trailing null byte */
1105                 pvfree = BUF(v) + (total_v_size - increment - 1);
1106         }
1107         if (BUF(v) + total_v_size != p)
1108                 _PyString_Resize(&v, p - BUF(v));
1109         return v;
1110 #undef INITBUFSIZE
1111 #undef MAXBUFSIZE
1112 }
1113 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1114
1115 /* Internal routine to get a line.
1116    Size argument interpretation:
1117    > 0: max length;
1118    <= 0: read arbitrary line
1119 */
1120
1121 static PyObject *
1122 get_line(PyFileObject *f, int n)
1123 {
1124         FILE *fp = f->f_fp;
1125         int c;
1126         char *buf, *end;
1127         size_t total_v_size;    /* total # of slots in buffer */
1128         size_t used_v_size;     /* # used slots in buffer */
1129         size_t increment;       /* amount to increment the buffer */
1130         PyObject *v;
1131         int newlinetypes = f->f_newlinetypes;
1132         int skipnextlf = f->f_skipnextlf;
1133         int univ_newline = f->f_univ_newline;
1134
1135 #if defined(USE_FGETS_IN_GETLINE)
1136         if (n <= 0 && !univ_newline )
1137                 return getline_via_fgets(fp);
1138 #endif
1139         total_v_size = n > 0 ? n : 100;
1140         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1141         if (v == NULL)
1142                 return NULL;
1143         buf = BUF(v);
1144         end = buf + total_v_size;
1145
1146         for (;;) {
1147                 Py_BEGIN_ALLOW_THREADS
1148                 FLOCKFILE(fp);
1149                 if (univ_newline) {
1150                         c = 'x'; /* Shut up gcc warning */
1151                         while ( buf != end && (c = GETC(fp)) != EOF ) {
1152                                 if (skipnextlf ) {
1153                                         skipnextlf = 0;
1154                                         if (c == '\n') {
1155                                                 /* Seeing a \n here with
1156                                                  * skipnextlf true means we
1157                                                  * saw a \r before.
1158                                                  */
1159                                                 newlinetypes |= NEWLINE_CRLF;
1160                                                 c = GETC(fp);
1161                                                 if (c == EOF) break;
1162                                         } else {
1163                                                 newlinetypes |= NEWLINE_CR;
1164                                         }
1165                                 }
1166                                 if (c == '\r') {
1167                                         skipnextlf = 1;
1168                                         c = '\n';
1169                                 } else if ( c == '\n')
1170                                         newlinetypes |= NEWLINE_LF;
1171                                 *buf++ = c;
1172                                 if (c == '\n') break;
1173                         }
1174                         if ( c == EOF && skipnextlf )
1175                                 newlinetypes |= NEWLINE_CR;
1176                 } else /* If not universal newlines use the normal loop */
1177                 while ((c = GETC(fp)) != EOF &&
1178                        (*buf++ = c) != '\n' &&
1179                         buf != end)
1180                         ;
1181                 FUNLOCKFILE(fp);
1182                 Py_END_ALLOW_THREADS
1183                 f->f_newlinetypes = newlinetypes;
1184                 f->f_skipnextlf = skipnextlf;
1185                 if (c == '\n')
1186                         break;
1187                 if (c == EOF) {
1188                         if (ferror(fp)) {
1189                                 PyErr_SetFromErrno(PyExc_IOError);
1190                                 clearerr(fp);
1191                                 Py_DECREF(v);
1192                                 return NULL;
1193                         }
1194                         clearerr(fp);
1195                         if (PyErr_CheckSignals()) {
1196                                 Py_DECREF(v);
1197                                 return NULL;
1198                         }
1199                         break;
1200                 }
1201                 /* Must be because buf == end */
1202                 if (n > 0)
1203                         break;
1204                 used_v_size = total_v_size;
1205                 increment = total_v_size >> 2; /* mild exponential growth */
1206                 total_v_size += increment;
1207                 if (total_v_size > INT_MAX) {
1208                         PyErr_SetString(PyExc_OverflowError,
1209                             "line is longer than a Python string can hold");
1210                         Py_DECREF(v);
1211                         return NULL;
1212                 }
1213                 if (_PyString_Resize(&v, total_v_size) < 0)
1214                         return NULL;
1215                 buf = BUF(v) + used_v_size;
1216                 end = BUF(v) + total_v_size;
1217         }
1218
1219         used_v_size = buf - BUF(v);
1220         if (used_v_size != total_v_size)
1221                 _PyString_Resize(&v, used_v_size);
1222         return v;
1223 }
1224
1225 /* External C interface */
1226
1227 PyObject *
1228 PyFile_GetLine(PyObject *f, int n)
1229 {
1230         PyObject *result;
1231
1232         if (f == NULL) {
1233                 PyErr_BadInternalCall();
1234                 return NULL;
1235         }
1236
1237         if (PyFile_Check(f)) {
1238                 PyFileObject *fo = (PyFileObject *)f;
1239                 if (fo->f_fp == NULL)
1240                         return err_closed();
1241                 /* refuse to mix with f.next() */
1242                 if (fo->f_buf != NULL &&
1243                     (fo->f_bufend - fo->f_bufptr) > 0 &&
1244                     fo->f_buf[0] != '\0')
1245                         return err_iterbuffered();
1246                 result = get_line(fo, n);
1247         }
1248         else {
1249                 PyObject *reader;
1250                 PyObject *args;
1251
1252                 reader = PyObject_GetAttrString(f, "readline");
1253                 if (reader == NULL)
1254                         return NULL;
1255                 if (n <= 0)
1256                         args = PyTuple_New(0);
1257                 else
1258                         args = Py_BuildValue("(i)", n);
1259                 if (args == NULL) {
1260                         Py_DECREF(reader);
1261                         return NULL;
1262                 }
1263                 result = PyEval_CallObject(reader, args);
1264                 Py_DECREF(reader);
1265                 Py_DECREF(args);
1266                 if (result != NULL && !PyString_Check(result) &&
1267                     !PyUnicode_Check(result)) {
1268                         Py_DECREF(result);
1269                         result = NULL;
1270                         PyErr_SetString(PyExc_TypeError,
1271                                    "object.readline() returned non-string");
1272                 }
1273         }
1274
1275         if (n < 0 && result != NULL && PyString_Check(result)) {
1276                 char *s = PyString_AS_STRING(result);
1277                 Py_ssize_t len = PyString_GET_SIZE(result);
1278                 if (len == 0) {
1279                         Py_DECREF(result);
1280                         result = NULL;
1281                         PyErr_SetString(PyExc_EOFError,
1282                                         "EOF when reading a line");
1283                 }
1284                 else if (s[len-1] == '\n') {
1285                         if (result->ob_refcnt == 1)
1286                                 _PyString_Resize(&result, len-1);
1287                         else {
1288                                 PyObject *v;
1289                                 v = PyString_FromStringAndSize(s, len-1);
1290                                 Py_DECREF(result);
1291                                 result = v;
1292                         }
1293                 }
1294         }
1295 #ifdef Py_USING_UNICODE
1296         if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1297                 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1298                 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1299                 if (len == 0) {
1300                         Py_DECREF(result);
1301                         result = NULL;
1302                         PyErr_SetString(PyExc_EOFError,
1303                                         "EOF when reading a line");
1304                 }
1305                 else if (s[len-1] == '\n') {
1306                         if (result->ob_refcnt == 1)
1307                                 PyUnicode_Resize(&result, len-1);
1308                         else {
1309                                 PyObject *v;
1310                                 v = PyUnicode_FromUnicode(s, len-1);
1311                                 Py_DECREF(result);
1312                                 result = v;
1313                         }
1314                 }
1315         }
1316 #endif
1317         return result;
1318 }
1319
1320 /* Python method */
1321
1322 static PyObject *
1323 file_readline(PyFileObject *f, PyObject *args)
1324 {
1325         int n = -1;
1326
1327         if (f->f_fp == NULL)
1328                 return err_closed();
1329         /* refuse to mix with f.next() */
1330         if (f->f_buf != NULL &&
1331             (f->f_bufend - f->f_bufptr) > 0 &&
1332             f->f_buf[0] != '\0')
1333                 return err_iterbuffered();
1334         if (!PyArg_ParseTuple(args, "|i:readline", &n))
1335                 return NULL;
1336         if (n == 0)
1337                 return PyString_FromString("");
1338         if (n < 0)
1339                 n = 0;
1340         return get_line(f, n);
1341 }
1342
1343 static PyObject *
1344 file_readlines(PyFileObject *f, PyObject *args)
1345 {
1346         long sizehint = 0;
1347         PyObject *list;
1348         PyObject *line;
1349         char small_buffer[SMALLCHUNK];
1350         char *buffer = small_buffer;
1351         size_t buffersize = SMALLCHUNK;
1352         PyObject *big_buffer = NULL;
1353         size_t nfilled = 0;
1354         size_t nread;
1355         size_t totalread = 0;
1356         char *p, *q, *end;
1357         int err;
1358         int shortread = 0;
1359
1360         if (f->f_fp == NULL)
1361                 return err_closed();
1362         /* refuse to mix with f.next() */
1363         if (f->f_buf != NULL &&
1364             (f->f_bufend - f->f_bufptr) > 0 &&
1365             f->f_buf[0] != '\0')
1366                 return err_iterbuffered();
1367         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1368                 return NULL;
1369         if ((list = PyList_New(0)) == NULL)
1370                 return NULL;
1371         for (;;) {
1372                 if (shortread)
1373                         nread = 0;
1374                 else {
1375                         Py_BEGIN_ALLOW_THREADS
1376                         errno = 0;
1377                         nread = Py_UniversalNewlineFread(buffer+nfilled,
1378                                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1379                         Py_END_ALLOW_THREADS
1380                         shortread = (nread < buffersize-nfilled);
1381                 }
1382                 if (nread == 0) {
1383                         sizehint = 0;
1384                         if (!ferror(f->f_fp))
1385                                 break;
1386                         PyErr_SetFromErrno(PyExc_IOError);
1387                         clearerr(f->f_fp);
1388                   error:
1389                         Py_DECREF(list);
1390                         list = NULL;
1391                         goto cleanup;
1392                 }
1393                 totalread += nread;
1394                 p = memchr(buffer+nfilled, '\n', nread);
1395                 if (p == NULL) {
1396                         /* Need a larger buffer to fit this line */
1397                         nfilled += nread;
1398                         buffersize *= 2;
1399                         if (buffersize > INT_MAX) {
1400                                 PyErr_SetString(PyExc_OverflowError,
1401                             "line is longer than a Python string can hold");
1402                                 goto error;
1403                         }
1404                         if (big_buffer == NULL) {
1405                                 /* Create the big buffer */
1406                                 big_buffer = PyString_FromStringAndSize(
1407                                         NULL, buffersize);
1408                                 if (big_buffer == NULL)
1409                                         goto error;
1410                                 buffer = PyString_AS_STRING(big_buffer);
1411                                 memcpy(buffer, small_buffer, nfilled);
1412                         }
1413                         else {
1414                                 /* Grow the big buffer */
1415                                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1416                                         goto error;
1417                                 buffer = PyString_AS_STRING(big_buffer);
1418                         }
1419                         continue;
1420                 }
1421                 end = buffer+nfilled+nread;
1422                 q = buffer;
1423                 do {
1424                         /* Process complete lines */
1425                         p++;
1426                         line = PyString_FromStringAndSize(q, p-q);
1427                         if (line == NULL)
1428                                 goto error;
1429                         err = PyList_Append(list, line);
1430                         Py_DECREF(line);
1431                         if (err != 0)
1432                                 goto error;
1433                         q = p;
1434                         p = memchr(q, '\n', end-q);
1435                 } while (p != NULL);
1436                 /* Move the remaining incomplete line to the start */
1437                 nfilled = end-q;
1438                 memmove(buffer, q, nfilled);
1439                 if (sizehint > 0)
1440                         if (totalread >= (size_t)sizehint)
1441                                 break;
1442         }
1443         if (nfilled != 0) {
1444                 /* Partial last line */
1445                 line = PyString_FromStringAndSize(buffer, nfilled);
1446                 if (line == NULL)
1447                         goto error;
1448                 if (sizehint > 0) {
1449                         /* Need to complete the last line */
1450                         PyObject *rest = get_line(f, 0);
1451                         if (rest == NULL) {
1452                                 Py_DECREF(line);
1453                                 goto error;
1454                         }
1455                         PyString_Concat(&line, rest);
1456                         Py_DECREF(rest);
1457                         if (line == NULL)
1458                                 goto error;
1459                 }
1460                 err = PyList_Append(list, line);
1461                 Py_DECREF(line);
1462                 if (err != 0)
1463                         goto error;
1464         }
1465   cleanup:
1466         Py_XDECREF(big_buffer);
1467         return list;
1468 }
1469
1470 static PyObject *
1471 file_write(PyFileObject *f, PyObject *args)
1472 {
1473         char *s;
1474         Py_ssize_t n, n2;
1475         if (f->f_fp == NULL)
1476                 return err_closed();
1477         if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1478                 return NULL;
1479         f->f_softspace = 0;
1480         Py_BEGIN_ALLOW_THREADS
1481         errno = 0;
1482         n2 = fwrite(s, 1, n, f->f_fp);
1483         Py_END_ALLOW_THREADS
1484         if (n2 != n) {
1485                 PyErr_SetFromErrno(PyExc_IOError);
1486                 clearerr(f->f_fp);
1487                 return NULL;
1488         }
1489         Py_INCREF(Py_None);
1490         return Py_None;
1491 }
1492
1493 static PyObject *
1494 file_writelines(PyFileObject *f, PyObject *seq)
1495 {
1496 #define CHUNKSIZE 1000
1497         PyObject *list, *line;
1498         PyObject *it;   /* iter(seq) */
1499         PyObject *result;
1500         int index, islist;
1501         Py_ssize_t i, j, nwritten, len;
1502
1503         assert(seq != NULL);
1504         if (f->f_fp == NULL)
1505                 return err_closed();
1506
1507         result = NULL;
1508         list = NULL;
1509         islist = PyList_Check(seq);
1510         if  (islist)
1511                 it = NULL;
1512         else {
1513                 it = PyObject_GetIter(seq);
1514                 if (it == NULL) {
1515                         PyErr_SetString(PyExc_TypeError,
1516                                 "writelines() requires an iterable argument");
1517                         return NULL;
1518                 }
1519                 /* From here on, fail by going to error, to reclaim "it". */
1520                 list = PyList_New(CHUNKSIZE);
1521                 if (list == NULL)
1522                         goto error;
1523         }
1524
1525         /* Strategy: slurp CHUNKSIZE lines into a private list,
1526            checking that they are all strings, then write that list
1527            without holding the interpreter lock, then come back for more. */
1528         for (index = 0; ; index += CHUNKSIZE) {
1529                 if (islist) {
1530                         Py_XDECREF(list);
1531                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1532                         if (list == NULL)
1533                                 goto error;
1534                         j = PyList_GET_SIZE(list);
1535                 }
1536                 else {
1537                         for (j = 0; j < CHUNKSIZE; j++) {
1538                                 line = PyIter_Next(it);
1539                                 if (line == NULL) {
1540                                         if (PyErr_Occurred())
1541                                                 goto error;
1542                                         break;
1543                                 }
1544                                 PyList_SetItem(list, j, line);
1545                         }
1546                 }
1547                 if (j == 0)
1548                         break;
1549
1550                 /* Check that all entries are indeed strings. If not,
1551                    apply the same rules as for file.write() and
1552                    convert the results to strings. This is slow, but
1553                    seems to be the only way since all conversion APIs
1554                    could potentially execute Python code. */
1555                 for (i = 0; i < j; i++) {
1556                         PyObject *v = PyList_GET_ITEM(list, i);
1557                         if (!PyString_Check(v)) {
1558                                 const char *buffer;
1559                                 if (((f->f_binary &&
1560                                       PyObject_AsReadBuffer(v,
1561                                               (const void**)&buffer,
1562                                                             &len)) ||
1563                                      PyObject_AsCharBuffer(v,
1564                                                            &buffer,
1565                                                            &len))) {
1566                                         PyErr_SetString(PyExc_TypeError,
1567                         "writelines() argument must be a sequence of strings");
1568                                         goto error;
1569                                 }
1570                                 line = PyString_FromStringAndSize(buffer,
1571                                                                   len);
1572                                 if (line == NULL)
1573                                         goto error;
1574                                 Py_DECREF(v);
1575                                 PyList_SET_ITEM(list, i, line);
1576                         }
1577                 }
1578
1579                 /* Since we are releasing the global lock, the
1580                    following code may *not* execute Python code. */
1581                 Py_BEGIN_ALLOW_THREADS
1582                 f->f_softspace = 0;
1583                 errno = 0;
1584                 for (i = 0; i < j; i++) {
1585                         line = PyList_GET_ITEM(list, i);
1586                         len = PyString_GET_SIZE(line);
1587                         nwritten = fwrite(PyString_AS_STRING(line),
1588                                           1, len, f->f_fp);
1589                         if (nwritten != len) {
1590                                 Py_BLOCK_THREADS
1591                                 PyErr_SetFromErrno(PyExc_IOError);
1592                                 clearerr(f->f_fp);
1593                                 goto error;
1594                         }
1595                 }
1596                 Py_END_ALLOW_THREADS
1597
1598                 if (j < CHUNKSIZE)
1599                         break;
1600         }
1601
1602         Py_INCREF(Py_None);
1603         result = Py_None;
1604   error:
1605         Py_XDECREF(list);
1606         Py_XDECREF(it);
1607         return result;
1608 #undef CHUNKSIZE
1609 }
1610
1611 static PyObject *
1612 file_self(PyFileObject *f)
1613 {
1614         if (f->f_fp == NULL)
1615                 return err_closed();
1616         Py_INCREF(f);
1617         return (PyObject *)f;
1618 }
1619
1620 PyDoc_STRVAR(readline_doc,
1621 "readline([size]) -> next line from the file, as a string.\n"
1622 "\n"
1623 "Retain newline.  A non-negative size argument limits the maximum\n"
1624 "number of bytes to return (an incomplete line may be returned then).\n"
1625 "Return an empty string at EOF.");
1626
1627 PyDoc_STRVAR(read_doc,
1628 "read([size]) -> read at most size bytes, returned as a string.\n"
1629 "\n"
1630 "If the size argument is negative or omitted, read until EOF is reached.\n"
1631 "Notice that when in non-blocking mode, less data than what was requested\n"
1632 "may be returned, even if no size parameter was given.");
1633
1634 PyDoc_STRVAR(write_doc,
1635 "write(str) -> None.  Write string str to file.\n"
1636 "\n"
1637 "Note that due to buffering, flush() or close() may be needed before\n"
1638 "the file on disk reflects the data written.");
1639
1640 PyDoc_STRVAR(fileno_doc,
1641 "fileno() -> integer \"file descriptor\".\n"
1642 "\n"
1643 "This is needed for lower-level file interfaces, such os.read().");
1644
1645 PyDoc_STRVAR(seek_doc,
1646 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1647 "\n"
1648 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1649 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1650 "(move relative to current position, positive or negative), and 2 (move\n"
1651 "relative to end of file, usually negative, although many platforms allow\n"
1652 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1653 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1654 "undefined behavior."
1655 "\n"
1656 "Note that not all file objects are seekable.");
1657
1658 #ifdef HAVE_FTRUNCATE
1659 PyDoc_STRVAR(truncate_doc,
1660 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1661 "\n"
1662 "Size defaults to the current file position, as returned by tell().");
1663 #endif
1664
1665 PyDoc_STRVAR(tell_doc,
1666 "tell() -> current file position, an integer (may be a long integer).");
1667
1668 PyDoc_STRVAR(readinto_doc,
1669 "readinto() -> Undocumented.  Don't use this; it may go away.");
1670
1671 PyDoc_STRVAR(readlines_doc,
1672 "readlines([size]) -> list of strings, each a line from the file.\n"
1673 "\n"
1674 "Call readline() repeatedly and return a list of the lines so read.\n"
1675 "The optional size argument, if given, is an approximate bound on the\n"
1676 "total number of bytes in the lines returned.");
1677
1678 PyDoc_STRVAR(xreadlines_doc,
1679 "xreadlines() -> returns self.\n"
1680 "\n"
1681 "For backward compatibility. File objects now include the performance\n"
1682 "optimizations previously implemented in the xreadlines module.");
1683
1684 PyDoc_STRVAR(writelines_doc,
1685 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
1686 "\n"
1687 "Note that newlines are not added.  The sequence can be any iterable object\n"
1688 "producing strings. This is equivalent to calling write() for each string.");
1689
1690 PyDoc_STRVAR(flush_doc,
1691 "flush() -> None.  Flush the internal I/O buffer.");
1692
1693 PyDoc_STRVAR(close_doc,
1694 "close() -> None or (perhaps) an integer.  Close the file.\n"
1695 "\n"
1696 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
1697 "further I/O operations.  close() may be called more than once without\n"
1698 "error.  Some kinds of file objects (for example, opened by popen())\n"
1699 "may return an exit status upon closing.");
1700
1701 PyDoc_STRVAR(isatty_doc,
1702 "isatty() -> true or false.  True if the file is connected to a tty device.");
1703
1704 PyDoc_STRVAR(context_doc,
1705              "__context__() -> self.");
1706
1707 PyDoc_STRVAR(enter_doc,
1708              "__enter__() -> self.");
1709
1710 static PyMethodDef file_methods[] = {
1711         {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1712         {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
1713         {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
1714         {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
1715         {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
1716 #ifdef HAVE_FTRUNCATE
1717         {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1718 #endif
1719         {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
1720         {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1721         {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1722         {"xreadlines",(PyCFunction)file_self,     METH_NOARGS, xreadlines_doc},
1723         {"writelines",(PyCFunction)file_writelines, METH_O,    writelines_doc},
1724         {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
1725         {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
1726         {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
1727         {"__context__", (PyCFunction)file_self,   METH_NOARGS,  context_doc},
1728         {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
1729         {"__exit__",  (PyCFunction)file_close,    METH_VARARGS, close_doc},
1730         {NULL,        NULL}             /* sentinel */
1731 };
1732
1733 #define OFF(x) offsetof(PyFileObject, x)
1734
1735 static PyMemberDef file_memberlist[] = {
1736         {"softspace",   T_INT,          OFF(f_softspace), 0,
1737          "flag indicating that a space needs to be printed; used by print"},
1738         {"mode",        T_OBJECT,       OFF(f_mode),    RO,
1739          "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1740         {"name",        T_OBJECT,       OFF(f_name),    RO,
1741          "file name"},
1742         {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
1743          "file encoding"},
1744         /* getattr(f, "closed") is implemented without this table */
1745         {NULL}  /* Sentinel */
1746 };
1747
1748 static PyObject *
1749 get_closed(PyFileObject *f, void *closure)
1750 {
1751         return PyBool_FromLong((long)(f->f_fp == 0));
1752 }
1753 static PyObject *
1754 get_newlines(PyFileObject *f, void *closure)
1755 {
1756         switch (f->f_newlinetypes) {
1757         case NEWLINE_UNKNOWN:
1758                 Py_INCREF(Py_None);
1759                 return Py_None;
1760         case NEWLINE_CR:
1761                 return PyString_FromString("\r");
1762         case NEWLINE_LF:
1763                 return PyString_FromString("\n");
1764         case NEWLINE_CR|NEWLINE_LF:
1765                 return Py_BuildValue("(ss)", "\r", "\n");
1766         case NEWLINE_CRLF:
1767                 return PyString_FromString("\r\n");
1768         case NEWLINE_CR|NEWLINE_CRLF:
1769                 return Py_BuildValue("(ss)", "\r", "\r\n");
1770         case NEWLINE_LF|NEWLINE_CRLF:
1771                 return Py_BuildValue("(ss)", "\n", "\r\n");
1772         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1773                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1774         default:
1775                 PyErr_Format(PyExc_SystemError,
1776                              "Unknown newlines value 0x%x\n",
1777                              f->f_newlinetypes);
1778                 return NULL;
1779         }
1780 }
1781
1782 static PyGetSetDef file_getsetlist[] = {
1783         {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1784         {"newlines", (getter)get_newlines, NULL,
1785          "end-of-line convention used in this file"},
1786         {0},
1787 };
1788
1789 static void
1790 drop_readahead(PyFileObject *f)
1791 {
1792         if (f->f_buf != NULL) {
1793                 PyMem_Free(f->f_buf);
1794                 f->f_buf = NULL;
1795         }
1796 }
1797
1798 /* Make sure that file has a readahead buffer with at least one byte
1799    (unless at EOF) and no more than bufsize.  Returns negative value on
1800    error, will set MemoryError if bufsize bytes cannot be allocated. */
1801 static int
1802 readahead(PyFileObject *f, int bufsize)
1803 {
1804         Py_ssize_t chunksize;
1805
1806         if (f->f_buf != NULL) {
1807                 if( (f->f_bufend - f->f_bufptr) >= 1)
1808                         return 0;
1809                 else
1810                         drop_readahead(f);
1811         }
1812         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1813                 PyErr_NoMemory();
1814                 return -1;
1815         }
1816         Py_BEGIN_ALLOW_THREADS
1817         errno = 0;
1818         chunksize = Py_UniversalNewlineFread(
1819                 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1820         Py_END_ALLOW_THREADS
1821         if (chunksize == 0) {
1822                 if (ferror(f->f_fp)) {
1823                         PyErr_SetFromErrno(PyExc_IOError);
1824                         clearerr(f->f_fp);
1825                         drop_readahead(f);
1826                         return -1;
1827                 }
1828         }
1829         f->f_bufptr = f->f_buf;
1830         f->f_bufend = f->f_buf + chunksize;
1831         return 0;
1832 }
1833
1834 /* Used by file_iternext.  The returned string will start with 'skip'
1835    uninitialized bytes followed by the remainder of the line. Don't be
1836    horrified by the recursive call: maximum recursion depth is limited by
1837    logarithmic buffer growth to about 50 even when reading a 1gb line. */
1838
1839 static PyStringObject *
1840 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1841 {
1842         PyStringObject* s;
1843         char *bufptr;
1844         char *buf;
1845         Py_ssize_t len;
1846
1847         if (f->f_buf == NULL)
1848                 if (readahead(f, bufsize) < 0)
1849                         return NULL;
1850
1851         len = f->f_bufend - f->f_bufptr;
1852         if (len == 0)
1853                 return (PyStringObject *)
1854                         PyString_FromStringAndSize(NULL, skip);
1855         bufptr = memchr(f->f_bufptr, '\n', len);
1856         if (bufptr != NULL) {
1857                 bufptr++;                       /* Count the '\n' */
1858                 len = bufptr - f->f_bufptr;
1859                 s = (PyStringObject *)
1860                         PyString_FromStringAndSize(NULL, skip+len);
1861                 if (s == NULL)
1862                         return NULL;
1863                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1864                 f->f_bufptr = bufptr;
1865                 if (bufptr == f->f_bufend)
1866                         drop_readahead(f);
1867         } else {
1868                 bufptr = f->f_bufptr;
1869                 buf = f->f_buf;
1870                 f->f_buf = NULL;        /* Force new readahead buffer */
1871                 assert(skip+len < INT_MAX);
1872                 s = readahead_get_line_skip(
1873                         f, (int)(skip+len), bufsize + (bufsize>>2) );
1874                 if (s == NULL) {
1875                         PyMem_Free(buf);
1876                         return NULL;
1877                 }
1878                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1879                 PyMem_Free(buf);
1880         }
1881         return s;
1882 }
1883
1884 /* A larger buffer size may actually decrease performance. */
1885 #define READAHEAD_BUFSIZE 8192
1886
1887 static PyObject *
1888 file_iternext(PyFileObject *f)
1889 {
1890         PyStringObject* l;
1891
1892         if (f->f_fp == NULL)
1893                 return err_closed();
1894
1895         l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1896         if (l == NULL || PyString_GET_SIZE(l) == 0) {
1897                 Py_XDECREF(l);
1898                 return NULL;
1899         }
1900         return (PyObject *)l;
1901 }
1902
1903
1904 static PyObject *
1905 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1906 {
1907         PyObject *self;
1908         static PyObject *not_yet_string;
1909
1910         assert(type != NULL && type->tp_alloc != NULL);
1911
1912         if (not_yet_string == NULL) {
1913                 not_yet_string = PyString_FromString("<uninitialized file>");
1914                 if (not_yet_string == NULL)
1915                         return NULL;
1916         }
1917
1918         self = type->tp_alloc(type, 0);
1919         if (self != NULL) {
1920                 /* Always fill in the name and mode, so that nobody else
1921                    needs to special-case NULLs there. */
1922                 Py_INCREF(not_yet_string);
1923                 ((PyFileObject *)self)->f_name = not_yet_string;
1924                 Py_INCREF(not_yet_string);
1925                 ((PyFileObject *)self)->f_mode = not_yet_string;
1926                 Py_INCREF(Py_None);
1927                 ((PyFileObject *)self)->f_encoding = Py_None;
1928                 ((PyFileObject *)self)->weakreflist = NULL;
1929         }
1930         return self;
1931 }
1932
1933 static int
1934 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1935 {
1936         PyFileObject *foself = (PyFileObject *)self;
1937         int ret = 0;
1938         static char *kwlist[] = {"name", "mode", "buffering", 0};
1939         char *name = NULL;
1940         char *mode = "r";
1941         int bufsize = -1;
1942         int wideargument = 0;
1943
1944         assert(PyFile_Check(self));
1945         if (foself->f_fp != NULL) {
1946                 /* Have to close the existing file first. */
1947                 PyObject *closeresult = file_close(foself);
1948                 if (closeresult == NULL)
1949                         return -1;
1950                 Py_DECREF(closeresult);
1951         }
1952
1953 #ifdef Py_WIN_WIDE_FILENAMES
1954         if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
1955                 PyObject *po;
1956                 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
1957                                                 kwlist, &po, &mode, &bufsize)) {
1958                         wideargument = 1;
1959                         if (fill_file_fields(foself, NULL, po, mode,
1960                                              fclose) == NULL)
1961                                 goto Error;
1962                 } else {
1963                         /* Drop the argument parsing error as narrow
1964                            strings are also valid. */
1965                         PyErr_Clear();
1966                 }
1967         }
1968 #endif
1969
1970         if (!wideargument) {
1971                 PyObject *o_name;
1972
1973                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1974                                                  Py_FileSystemDefaultEncoding,
1975                                                  &name,
1976                                                  &mode, &bufsize))
1977                         return -1;
1978
1979                 /* We parse again to get the name as a PyObject */
1980                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
1981                                                  kwlist, &o_name, &mode,
1982                                                  &bufsize))
1983                         return -1;
1984
1985                 if (fill_file_fields(foself, NULL, o_name, mode,
1986                                      fclose) == NULL)
1987                         goto Error;
1988         }
1989         if (open_the_file(foself, name, mode) == NULL)
1990                 goto Error;
1991         foself->f_setbuf = NULL;
1992         PyFile_SetBufSize(self, bufsize);
1993         goto Done;
1994
1995 Error:
1996         ret = -1;
1997         /* fall through */
1998 Done:
1999         PyMem_Free(name); /* free the encoded string */
2000         return ret;
2001 }
2002
2003 PyDoc_VAR(file_doc) =
2004 PyDoc_STR(
2005 "file(name[, mode[, buffering]]) -> file object\n"
2006 "\n"
2007 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2008 "writing or appending.  The file will be created if it doesn't exist\n"
2009 "when opened for writing or appending; it will be truncated when\n"
2010 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2011 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2012 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2013 "buffered, and larger numbers specify the buffer size.\n"
2014 )
2015 PyDoc_STR(
2016 "Add a 'U' to mode to open the file for input with universal newline\n"
2017 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2018 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2019 "the value for this attribute is one of None (no newline read yet),\n"
2020 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2021 "\n"
2022 "'U' cannot be combined with 'w' or '+' mode.\n"
2023 )
2024 PyDoc_STR(
2025 "\n"
2026 "Note:  open() is an alias for file()."
2027 );
2028
2029 PyTypeObject PyFile_Type = {
2030         PyObject_HEAD_INIT(&PyType_Type)
2031         0,
2032         "file",
2033         sizeof(PyFileObject),
2034         0,
2035         (destructor)file_dealloc,               /* tp_dealloc */
2036         0,                                      /* tp_print */
2037         0,                                      /* tp_getattr */
2038         0,                                      /* tp_setattr */
2039         0,                                      /* tp_compare */
2040         (reprfunc)file_repr,                    /* tp_repr */
2041         0,                                      /* tp_as_number */
2042         0,                                      /* tp_as_sequence */
2043         0,                                      /* tp_as_mapping */
2044         0,                                      /* tp_hash */
2045         0,                                      /* tp_call */
2046         0,                                      /* tp_str */
2047         PyObject_GenericGetAttr,                /* tp_getattro */
2048         /* softspace is writable:  we must supply tp_setattro */
2049         PyObject_GenericSetAttr,                /* tp_setattro */
2050         0,                                      /* tp_as_buffer */
2051         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2052         file_doc,                               /* tp_doc */
2053         0,                                      /* tp_traverse */
2054         0,                                      /* tp_clear */
2055         0,                                      /* tp_richcompare */
2056         offsetof(PyFileObject, weakreflist),    /* tp_weaklistoffset */
2057         (getiterfunc)file_self,                 /* tp_iter */
2058         (iternextfunc)file_iternext,            /* tp_iternext */
2059         file_methods,                           /* tp_methods */
2060         file_memberlist,                        /* tp_members */
2061         file_getsetlist,                        /* tp_getset */
2062         0,                                      /* tp_base */
2063         0,                                      /* tp_dict */
2064         0,                                      /* tp_descr_get */
2065         0,                                      /* tp_descr_set */
2066         0,                                      /* tp_dictoffset */
2067         file_init,                              /* tp_init */
2068         PyType_GenericAlloc,                    /* tp_alloc */
2069         file_new,                               /* tp_new */
2070         PyObject_Del,                           /* tp_free */
2071 };
2072
2073 /* Interface for the 'soft space' between print items. */
2074
2075 int
2076 PyFile_SoftSpace(PyObject *f, int newflag)
2077 {
2078         long oldflag = 0;
2079         if (f == NULL) {
2080                 /* Do nothing */
2081         }
2082         else if (PyFile_Check(f)) {
2083                 oldflag = ((PyFileObject *)f)->f_softspace;
2084                 ((PyFileObject *)f)->f_softspace = newflag;
2085         }
2086         else {
2087                 PyObject *v;
2088                 v = PyObject_GetAttrString(f, "softspace");
2089                 if (v == NULL)
2090                         PyErr_Clear();
2091                 else {
2092                         if (PyInt_Check(v))
2093                                 oldflag = PyInt_AsLong(v);
2094                         assert(oldflag < INT_MAX);
2095                         Py_DECREF(v);
2096                 }
2097                 v = PyInt_FromLong((long)newflag);
2098                 if (v == NULL)
2099                         PyErr_Clear();
2100                 else {
2101                         if (PyObject_SetAttrString(f, "softspace", v) != 0)
2102                                 PyErr_Clear();
2103                         Py_DECREF(v);
2104                 }
2105         }
2106         return (int)oldflag;
2107 }
2108
2109 /* Interfaces to write objects/strings to file-like objects */
2110
2111 int
2112 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2113 {
2114         PyObject *writer, *value, *args, *result;
2115         if (f == NULL) {
2116                 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2117                 return -1;
2118         }
2119         else if (PyFile_Check(f)) {
2120                 FILE *fp = PyFile_AsFile(f);
2121 #ifdef Py_USING_UNICODE
2122                 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2123                 int result;
2124 #endif
2125                 if (fp == NULL) {
2126                         err_closed();
2127                         return -1;
2128                 }
2129 #ifdef Py_USING_UNICODE
2130                 if ((flags & Py_PRINT_RAW) &&
2131                     PyUnicode_Check(v) && enc != Py_None) {
2132                         char *cenc = PyString_AS_STRING(enc);
2133                         value = PyUnicode_AsEncodedString(v, cenc, "strict");
2134                         if (value == NULL)
2135                                 return -1;
2136                 } else {
2137                         value = v;
2138                         Py_INCREF(value);
2139                 }
2140                 result = PyObject_Print(value, fp, flags);
2141                 Py_DECREF(value);
2142                 return result;
2143 #else
2144                 return PyObject_Print(v, fp, flags);
2145 #endif
2146         }
2147         writer = PyObject_GetAttrString(f, "write");
2148         if (writer == NULL)
2149                 return -1;
2150         if (flags & Py_PRINT_RAW) {
2151                 if (PyUnicode_Check(v)) {
2152                         value = v;
2153                         Py_INCREF(value);
2154                 } else
2155                         value = PyObject_Str(v);
2156         }
2157         else
2158                 value = PyObject_Repr(v);
2159         if (value == NULL) {
2160                 Py_DECREF(writer);
2161                 return -1;
2162         }
2163         args = PyTuple_Pack(1, value);
2164         if (args == NULL) {
2165                 Py_DECREF(value);
2166                 Py_DECREF(writer);
2167                 return -1;
2168         }
2169         result = PyEval_CallObject(writer, args);
2170         Py_DECREF(args);
2171         Py_DECREF(value);
2172         Py_DECREF(writer);
2173         if (result == NULL)
2174                 return -1;
2175         Py_DECREF(result);
2176         return 0;
2177 }
2178
2179 int
2180 PyFile_WriteString(const char *s, PyObject *f)
2181 {
2182         if (f == NULL) {
2183                 /* Should be caused by a pre-existing error */
2184                 if (!PyErr_Occurred())
2185                         PyErr_SetString(PyExc_SystemError,
2186                                         "null file for PyFile_WriteString");
2187                 return -1;
2188         }
2189         else if (PyFile_Check(f)) {
2190                 FILE *fp = PyFile_AsFile(f);
2191                 if (fp == NULL) {
2192                         err_closed();
2193                         return -1;
2194                 }
2195                 fputs(s, fp);
2196                 return 0;
2197         }
2198         else if (!PyErr_Occurred()) {
2199                 PyObject *v = PyString_FromString(s);
2200                 int err;
2201                 if (v == NULL)
2202                         return -1;
2203                 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2204                 Py_DECREF(v);
2205                 return err;
2206         }
2207         else
2208                 return -1;
2209 }
2210
2211 /* Try to get a file-descriptor from a Python object.  If the object
2212    is an integer or long integer, its value is returned.  If not, the
2213    object's fileno() method is called if it exists; the method must return
2214    an integer or long integer, which is returned as the file descriptor value.
2215    -1 is returned on failure.
2216 */
2217
2218 int PyObject_AsFileDescriptor(PyObject *o)
2219 {
2220         int fd;
2221         PyObject *meth;
2222
2223         if (PyInt_Check(o)) {
2224                 fd = PyInt_AsLong(o);
2225         }
2226         else if (PyLong_Check(o)) {
2227                 fd = PyLong_AsLong(o);
2228         }
2229         else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2230         {
2231                 PyObject *fno = PyEval_CallObject(meth, NULL);
2232                 Py_DECREF(meth);
2233                 if (fno == NULL)
2234                         return -1;
2235
2236                 if (PyInt_Check(fno)) {
2237                         fd = PyInt_AsLong(fno);
2238                         Py_DECREF(fno);
2239                 }
2240                 else if (PyLong_Check(fno)) {
2241                         fd = PyLong_AsLong(fno);
2242                         Py_DECREF(fno);
2243                 }
2244                 else {
2245                         PyErr_SetString(PyExc_TypeError,
2246                                         "fileno() returned a non-integer");
2247                         Py_DECREF(fno);
2248                         return -1;
2249                 }
2250         }
2251         else {
2252                 PyErr_SetString(PyExc_TypeError,
2253                                 "argument must be an int, or have a fileno() method.");
2254                 return -1;
2255         }
2256
2257         if (fd < 0) {
2258                 PyErr_Format(PyExc_ValueError,
2259                              "file descriptor cannot be a negative integer (%i)",
2260                              fd);
2261                 return -1;
2262         }
2263         return fd;
2264 }
2265
2266 /* From here on we need access to the real fgets and fread */
2267 #undef fgets
2268 #undef fread
2269
2270 /*
2271 ** Py_UniversalNewlineFgets is an fgets variation that understands
2272 ** all of \r, \n and \r\n conventions.
2273 ** The stream should be opened in binary mode.
2274 ** If fobj is NULL the routine always does newline conversion, and
2275 ** it may peek one char ahead to gobble the second char in \r\n.
2276 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2277 ** is no readahead but in stead a flag is used to skip a following
2278 ** \n on the next read. Also, if the file is open in binary mode
2279 ** the whole conversion is skipped. Finally, the routine keeps track of
2280 ** the different types of newlines seen.
2281 ** Note that we need no error handling: fgets() treats error and eof
2282 ** identically.
2283 */
2284 char *
2285 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2286 {
2287         char *p = buf;
2288         int c;
2289         int newlinetypes = 0;
2290         int skipnextlf = 0;
2291         int univ_newline = 1;
2292
2293         if (fobj) {
2294                 if (!PyFile_Check(fobj)) {
2295                         errno = ENXIO;  /* What can you do... */
2296                         return NULL;
2297                 }
2298                 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2299                 if ( !univ_newline )
2300                         return fgets(buf, n, stream);
2301                 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2302                 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2303         }
2304         FLOCKFILE(stream);
2305         c = 'x'; /* Shut up gcc warning */
2306         while (--n > 0 && (c = GETC(stream)) != EOF ) {
2307                 if (skipnextlf ) {
2308                         skipnextlf = 0;
2309                         if (c == '\n') {
2310                                 /* Seeing a \n here with skipnextlf true
2311                                 ** means we saw a \r before.
2312                                 */
2313                                 newlinetypes |= NEWLINE_CRLF;
2314                                 c = GETC(stream);
2315                                 if (c == EOF) break;
2316                         } else {
2317                                 /*
2318                                 ** Note that c == EOF also brings us here,
2319                                 ** so we're okay if the last char in the file
2320                                 ** is a CR.
2321                                 */
2322                                 newlinetypes |= NEWLINE_CR;
2323                         }
2324                 }
2325                 if (c == '\r') {
2326                         /* A \r is translated into a \n, and we skip
2327                         ** an adjacent \n, if any. We don't set the
2328                         ** newlinetypes flag until we've seen the next char.
2329                         */
2330                         skipnextlf = 1;
2331                         c = '\n';
2332                 } else if ( c == '\n') {
2333                         newlinetypes |= NEWLINE_LF;
2334                 }
2335                 *p++ = c;
2336                 if (c == '\n') break;
2337         }
2338         if ( c == EOF && skipnextlf )
2339                 newlinetypes |= NEWLINE_CR;
2340         FUNLOCKFILE(stream);
2341         *p = '\0';
2342         if (fobj) {
2343                 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2344                 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2345         } else if ( skipnextlf ) {
2346                 /* If we have no file object we cannot save the
2347                 ** skipnextlf flag. We have to readahead, which
2348                 ** will cause a pause if we're reading from an
2349                 ** interactive stream, but that is very unlikely
2350                 ** unless we're doing something silly like
2351                 ** execfile("/dev/tty").
2352                 */
2353                 c = GETC(stream);
2354                 if ( c != '\n' )
2355                         ungetc(c, stream);
2356         }
2357         if (p == buf)
2358                 return NULL;
2359         return buf;
2360 }
2361
2362 /*
2363 ** Py_UniversalNewlineFread is an fread variation that understands
2364 ** all of \r, \n and \r\n conventions.
2365 ** The stream should be opened in binary mode.
2366 ** fobj must be a PyFileObject. In this case there
2367 ** is no readahead but in stead a flag is used to skip a following
2368 ** \n on the next read. Also, if the file is open in binary mode
2369 ** the whole conversion is skipped. Finally, the routine keeps track of
2370 ** the different types of newlines seen.
2371 */
2372 size_t
2373 Py_UniversalNewlineFread(char *buf, size_t n,
2374                          FILE *stream, PyObject *fobj)
2375 {
2376         char *dst = buf;
2377         PyFileObject *f = (PyFileObject *)fobj;
2378         int newlinetypes, skipnextlf;
2379
2380         assert(buf != NULL);
2381         assert(stream != NULL);
2382
2383         if (!fobj || !PyFile_Check(fobj)) {
2384                 errno = ENXIO;  /* What can you do... */
2385                 return 0;
2386         }
2387         if (!f->f_univ_newline)
2388                 return fread(buf, 1, n, stream);
2389         newlinetypes = f->f_newlinetypes;
2390         skipnextlf = f->f_skipnextlf;
2391         /* Invariant:  n is the number of bytes remaining to be filled
2392          * in the buffer.
2393          */
2394         while (n) {
2395                 size_t nread;
2396                 int shortread;
2397                 char *src = dst;
2398
2399                 nread = fread(dst, 1, n, stream);
2400                 assert(nread <= n);
2401                 if (nread == 0)
2402                         break;
2403
2404                 n -= nread; /* assuming 1 byte out for each in; will adjust */
2405                 shortread = n != 0;     /* true iff EOF or error */
2406                 while (nread--) {
2407                         char c = *src++;
2408                         if (c == '\r') {
2409                                 /* Save as LF and set flag to skip next LF. */
2410                                 *dst++ = '\n';
2411                                 skipnextlf = 1;
2412                         }
2413                         else if (skipnextlf && c == '\n') {
2414                                 /* Skip LF, and remember we saw CR LF. */
2415                                 skipnextlf = 0;
2416                                 newlinetypes |= NEWLINE_CRLF;
2417                                 ++n;
2418                         }
2419                         else {
2420                                 /* Normal char to be stored in buffer.  Also
2421                                  * update the newlinetypes flag if either this
2422                                  * is an LF or the previous char was a CR.
2423                                  */
2424                                 if (c == '\n')
2425                                         newlinetypes |= NEWLINE_LF;
2426                                 else if (skipnextlf)
2427                                         newlinetypes |= NEWLINE_CR;
2428                                 *dst++ = c;
2429                                 skipnextlf = 0;
2430                         }
2431                 }
2432                 if (shortread) {
2433                         /* If this is EOF, update type flags. */
2434                         if (skipnextlf && feof(stream))
2435                                 newlinetypes |= NEWLINE_CR;
2436                         break;
2437                 }
2438         }
2439         f->f_newlinetypes = newlinetypes;
2440         f->f_skipnextlf = skipnextlf;
2441         return dst - buf;
2442 }