Add better error reporting for MemoryErrors caused by str->float conversions.
[python.git] / Objects / fileobject.c
blob32207027e0ce25d33691726c5bc4c78b8794000c
1 /* File object implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
7 #ifdef HAVE_SYS_TYPES_H
8 #include <sys/types.h>
9 #endif /* HAVE_SYS_TYPES_H */
11 #ifdef MS_WINDOWS
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
16 #include <windows.h>
17 #endif
19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
20 #include <io.h>
21 #endif
23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
25 #ifndef DONT_HAVE_ERRNO_H
26 #include <errno.h>
27 #endif
29 #ifdef HAVE_GETC_UNLOCKED
30 #define GETC(f) getc_unlocked(f)
31 #define FLOCKFILE(f) flockfile(f)
32 #define FUNLOCKFILE(f) funlockfile(f)
33 #else
34 #define GETC(f) getc(f)
35 #define FLOCKFILE(f)
36 #define FUNLOCKFILE(f)
37 #endif
39 /* Bits in f_newlinetypes */
40 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
41 #define NEWLINE_CR 1 /* \r newline seen */
42 #define NEWLINE_LF 2 /* \n newline seen */
43 #define NEWLINE_CRLF 4 /* \r\n newline seen */
46 * These macros release the GIL while preventing the f_close() function being
47 * called in the interval between them. For that purpose, a running total of
48 * the number of currently running unlocked code sections is kept in
49 * the unlocked_count field of the PyFileObject. The close() method raises
50 * an IOError if that field is non-zero. See issue #815646, #595601.
53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
54 { \
55 fobj->unlocked_count++; \
56 Py_BEGIN_ALLOW_THREADS
58 #define FILE_END_ALLOW_THREADS(fobj) \
59 Py_END_ALLOW_THREADS \
60 fobj->unlocked_count--; \
61 assert(fobj->unlocked_count >= 0); \
64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
65 Py_BLOCK_THREADS \
66 fobj->unlocked_count--; \
67 assert(fobj->unlocked_count >= 0);
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
73 FILE *
74 PyFile_AsFile(PyObject *f)
76 if (f == NULL || !PyFile_Check(f))
77 return NULL;
78 else
79 return ((PyFileObject *)f)->f_fp;
82 void PyFile_IncUseCount(PyFileObject *fobj)
84 fobj->unlocked_count++;
87 void PyFile_DecUseCount(PyFileObject *fobj)
89 fobj->unlocked_count--;
90 assert(fobj->unlocked_count >= 0);
93 PyObject *
94 PyFile_Name(PyObject *f)
96 if (f == NULL || !PyFile_Check(f))
97 return NULL;
98 else
99 return ((PyFileObject *)f)->f_name;
102 /* This is a safe wrapper around PyObject_Print to print to the FILE
103 of a PyFileObject. PyObject_Print releases the GIL but knows nothing
104 about PyFileObject. */
105 static int
106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
108 int result;
109 PyFile_IncUseCount(f);
110 result = PyObject_Print(op, f->f_fp, flags);
111 PyFile_DecUseCount(f);
112 return result;
115 /* On Unix, fopen will succeed for directories.
116 In Python, there should be no file objects referring to
117 directories, so we need a check. */
119 static PyFileObject*
120 dircheck(PyFileObject* f)
122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
123 struct stat buf;
124 if (f->f_fp == NULL)
125 return f;
126 if (fstat(fileno(f->f_fp), &buf) == 0 &&
127 S_ISDIR(buf.st_mode)) {
128 char *msg = strerror(EISDIR);
129 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
130 EISDIR, msg, f->f_name);
131 PyErr_SetObject(PyExc_IOError, exc);
132 Py_XDECREF(exc);
133 return NULL;
135 #endif
136 return f;
140 static PyObject *
141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
142 int (*close)(FILE *))
144 assert(name != NULL);
145 assert(f != NULL);
146 assert(PyFile_Check(f));
147 assert(f->f_fp == NULL);
149 Py_DECREF(f->f_name);
150 Py_DECREF(f->f_mode);
151 Py_DECREF(f->f_encoding);
152 Py_DECREF(f->f_errors);
154 Py_INCREF(name);
155 f->f_name = name;
157 f->f_mode = PyString_FromString(mode);
159 f->f_close = close;
160 f->f_softspace = 0;
161 f->f_binary = strchr(mode,'b') != NULL;
162 f->f_buf = NULL;
163 f->f_univ_newline = (strchr(mode, 'U') != NULL);
164 f->f_newlinetypes = NEWLINE_UNKNOWN;
165 f->f_skipnextlf = 0;
166 Py_INCREF(Py_None);
167 f->f_encoding = Py_None;
168 Py_INCREF(Py_None);
169 f->f_errors = Py_None;
171 if (f->f_mode == NULL)
172 return NULL;
173 f->f_fp = fp;
174 f = dircheck(f);
175 return (PyObject *) f;
178 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
179 #define Py_VERIFY_WINNT
180 /* The CRT on windows compiled with Visual Studio 2005 and higher may
181 * assert if given invalid mode strings. This is all fine and well
182 * in static languages like C where the mode string is typcially hard
183 * coded. But in Python, were we pass in the mode string from the user,
184 * we need to verify it first manually
186 static int _PyVerify_Mode_WINNT(const char *mode)
188 /* See if mode string is valid on Windows to avoid hard assertions */
189 /* remove leading spacese */
190 int singles = 0;
191 int pairs = 0;
192 int encoding = 0;
193 const char *s, *c;
195 while(*mode == ' ') /* strip initial spaces */
196 ++mode;
197 if (!strchr("rwa", *mode)) /* must start with one of these */
198 return 0;
199 while (*++mode) {
200 if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
201 continue;
202 s = "+TD"; /* each of this can appear only once */
203 c = strchr(s, *mode);
204 if (c) {
205 ptrdiff_t idx = s-c;
206 if (singles & (1<<idx))
207 return 0;
208 singles |= (1<<idx);
209 continue;
211 s = "btcnSR"; /* only one of each letter in the pairs allowed */
212 c = strchr(s, *mode);
213 if (c) {
214 ptrdiff_t idx = (s-c)/2;
215 if (pairs & (1<<idx))
216 return 0;
217 pairs |= (1<<idx);
218 continue;
220 if (*mode == ',') {
221 encoding = 1;
222 break;
224 return 0; /* found an invalid char */
227 if (encoding) {
228 char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
229 while (*mode == ' ')
230 ++mode;
231 /* find 'ccs =' */
232 if (strncmp(mode, "ccs", 3))
233 return 0;
234 mode += 3;
235 while (*mode == ' ')
236 ++mode;
237 if (*mode != '=')
238 return 0;
239 while (*mode == ' ')
240 ++mode;
241 for(encoding = 0; encoding<_countof(e); ++encoding) {
242 size_t l = strlen(e[encoding]);
243 if (!strncmp(mode, e[encoding], l)) {
244 mode += l; /* found a valid encoding */
245 break;
248 if (encoding == _countof(e))
249 return 0;
251 /* skip trailing spaces */
252 while (*mode == ' ')
253 ++mode;
255 return *mode == '\0'; /* must be at the end of the string */
257 #endif
259 /* check for known incorrect mode strings - problem is, platforms are
260 free to accept any mode characters they like and are supposed to
261 ignore stuff they don't understand... write or append mode with
262 universal newline support is expressly forbidden by PEP 278.
263 Additionally, remove the 'U' from the mode string as platforms
264 won't know what it is. Non-zero return signals an exception */
266 _PyFile_SanitizeMode(char *mode)
268 char *upos;
269 size_t len = strlen(mode);
271 if (!len) {
272 PyErr_SetString(PyExc_ValueError, "empty mode string");
273 return -1;
276 upos = strchr(mode, 'U');
277 if (upos) {
278 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
280 if (mode[0] == 'w' || mode[0] == 'a') {
281 PyErr_Format(PyExc_ValueError, "universal newline "
282 "mode can only be used with modes "
283 "starting with 'r'");
284 return -1;
287 if (mode[0] != 'r') {
288 memmove(mode+1, mode, strlen(mode)+1);
289 mode[0] = 'r';
292 if (!strchr(mode, 'b')) {
293 memmove(mode+2, mode+1, strlen(mode));
294 mode[1] = 'b';
296 } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
297 PyErr_Format(PyExc_ValueError, "mode string must begin with "
298 "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
299 return -1;
301 #ifdef Py_VERIFY_WINNT
302 /* additional checks on NT with visual studio 2005 and higher */
303 if (!_PyVerify_Mode_WINNT(mode)) {
304 PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
305 return -1;
307 #endif
308 return 0;
311 static PyObject *
312 open_the_file(PyFileObject *f, char *name, char *mode)
314 char *newmode;
315 assert(f != NULL);
316 assert(PyFile_Check(f));
317 #ifdef MS_WINDOWS
318 /* windows ignores the passed name in order to support Unicode */
319 assert(f->f_name != NULL);
320 #else
321 assert(name != NULL);
322 #endif
323 assert(mode != NULL);
324 assert(f->f_fp == NULL);
326 /* probably need to replace 'U' by 'rb' */
327 newmode = PyMem_MALLOC(strlen(mode) + 3);
328 if (!newmode) {
329 PyErr_NoMemory();
330 return NULL;
332 strcpy(newmode, mode);
334 if (_PyFile_SanitizeMode(newmode)) {
335 f = NULL;
336 goto cleanup;
339 /* rexec.py can't stop a user from getting the file() constructor --
340 all they have to do is get *any* file object f, and then do
341 type(f). Here we prevent them from doing damage with it. */
342 if (PyEval_GetRestricted()) {
343 PyErr_SetString(PyExc_IOError,
344 "file() constructor not accessible in restricted mode");
345 f = NULL;
346 goto cleanup;
348 errno = 0;
350 #ifdef MS_WINDOWS
351 if (PyUnicode_Check(f->f_name)) {
352 PyObject *wmode;
353 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
354 if (f->f_name && wmode) {
355 FILE_BEGIN_ALLOW_THREADS(f)
356 /* PyUnicode_AS_UNICODE OK without thread
357 lock as it is a simple dereference. */
358 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
359 PyUnicode_AS_UNICODE(wmode));
360 FILE_END_ALLOW_THREADS(f)
362 Py_XDECREF(wmode);
364 #endif
365 if (NULL == f->f_fp && NULL != name) {
366 FILE_BEGIN_ALLOW_THREADS(f)
367 f->f_fp = fopen(name, newmode);
368 FILE_END_ALLOW_THREADS(f)
371 if (f->f_fp == NULL) {
372 #if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
373 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
374 * across all Windows flavors. When it sets EINVAL varies
375 * across Windows flavors, the exact conditions aren't
376 * documented, and the answer lies in the OS's implementation
377 * of Win32's CreateFile function (whose source is secret).
378 * Seems the best we can do is map EINVAL to ENOENT.
379 * Starting with Visual Studio .NET 2005, EINVAL is correctly
380 * set by our CRT error handler (set in exceptions.c.)
382 if (errno == 0) /* bad mode string */
383 errno = EINVAL;
384 else if (errno == EINVAL) /* unknown, but not a mode string */
385 errno = ENOENT;
386 #endif
387 /* EINVAL is returned when an invalid filename or
388 * an invalid mode is supplied. */
389 if (errno == EINVAL) {
390 PyObject *v;
391 char message[100];
392 PyOS_snprintf(message, 100,
393 "invalid mode ('%.50s') or filename", mode);
394 v = Py_BuildValue("(isO)", errno, message, f->f_name);
395 if (v != NULL) {
396 PyErr_SetObject(PyExc_IOError, v);
397 Py_DECREF(v);
400 else
401 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
402 f = NULL;
404 if (f != NULL)
405 f = dircheck(f);
407 cleanup:
408 PyMem_FREE(newmode);
410 return (PyObject *)f;
413 static PyObject *
414 close_the_file(PyFileObject *f)
416 int sts = 0;
417 int (*local_close)(FILE *);
418 FILE *local_fp = f->f_fp;
419 if (local_fp != NULL) {
420 local_close = f->f_close;
421 if (local_close != NULL && f->unlocked_count > 0) {
422 if (f->ob_refcnt > 0) {
423 PyErr_SetString(PyExc_IOError,
424 "close() called during concurrent "
425 "operation on the same file object.");
426 } else {
427 /* This should not happen unless someone is
428 * carelessly playing with the PyFileObject
429 * struct fields and/or its associated FILE
430 * pointer. */
431 PyErr_SetString(PyExc_SystemError,
432 "PyFileObject locking error in "
433 "destructor (refcnt <= 0 at close).");
435 return NULL;
437 /* NULL out the FILE pointer before releasing the GIL, because
438 * it will not be valid anymore after the close() function is
439 * called. */
440 f->f_fp = NULL;
441 if (local_close != NULL) {
442 Py_BEGIN_ALLOW_THREADS
443 errno = 0;
444 sts = (*local_close)(local_fp);
445 Py_END_ALLOW_THREADS
446 if (sts == EOF)
447 return PyErr_SetFromErrno(PyExc_IOError);
448 if (sts != 0)
449 return PyInt_FromLong((long)sts);
452 Py_RETURN_NONE;
455 PyObject *
456 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
458 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
459 NULL, NULL);
460 if (f != NULL) {
461 PyObject *o_name = PyString_FromString(name);
462 if (o_name == NULL)
463 return NULL;
464 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
465 Py_DECREF(f);
466 f = NULL;
468 Py_DECREF(o_name);
470 return (PyObject *) f;
473 PyObject *
474 PyFile_FromString(char *name, char *mode)
476 extern int fclose(FILE *);
477 PyFileObject *f;
479 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
480 if (f != NULL) {
481 if (open_the_file(f, name, mode) == NULL) {
482 Py_DECREF(f);
483 f = NULL;
486 return (PyObject *)f;
489 void
490 PyFile_SetBufSize(PyObject *f, int bufsize)
492 PyFileObject *file = (PyFileObject *)f;
493 if (bufsize >= 0) {
494 int type;
495 switch (bufsize) {
496 case 0:
497 type = _IONBF;
498 break;
499 #ifdef HAVE_SETVBUF
500 case 1:
501 type = _IOLBF;
502 bufsize = BUFSIZ;
503 break;
504 #endif
505 default:
506 type = _IOFBF;
507 #ifndef HAVE_SETVBUF
508 bufsize = BUFSIZ;
509 #endif
510 break;
512 fflush(file->f_fp);
513 if (type == _IONBF) {
514 PyMem_Free(file->f_setbuf);
515 file->f_setbuf = NULL;
516 } else {
517 file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
518 bufsize);
520 #ifdef HAVE_SETVBUF
521 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
522 #else /* !HAVE_SETVBUF */
523 setbuf(file->f_fp, file->f_setbuf);
524 #endif /* !HAVE_SETVBUF */
528 /* Set the encoding used to output Unicode strings.
529 Return 1 on success, 0 on failure. */
532 PyFile_SetEncoding(PyObject *f, const char *enc)
534 return PyFile_SetEncodingAndErrors(f, enc, NULL);
538 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
540 PyFileObject *file = (PyFileObject*)f;
541 PyObject *str, *oerrors;
543 assert(PyFile_Check(f));
544 str = PyString_FromString(enc);
545 if (!str)
546 return 0;
547 if (errors) {
548 oerrors = PyString_FromString(errors);
549 if (!oerrors) {
550 Py_DECREF(str);
551 return 0;
553 } else {
554 oerrors = Py_None;
555 Py_INCREF(Py_None);
557 Py_DECREF(file->f_encoding);
558 file->f_encoding = str;
559 Py_DECREF(file->f_errors);
560 file->f_errors = oerrors;
561 return 1;
564 static PyObject *
565 err_closed(void)
567 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
568 return NULL;
571 /* Refuse regular file I/O if there's data in the iteration-buffer.
572 * Mixing them would cause data to arrive out of order, as the read*
573 * methods don't use the iteration buffer. */
574 static PyObject *
575 err_iterbuffered(void)
577 PyErr_SetString(PyExc_ValueError,
578 "Mixing iteration and read methods would lose data");
579 return NULL;
582 static void drop_readahead(PyFileObject *);
584 /* Methods */
586 static void
587 file_dealloc(PyFileObject *f)
589 PyObject *ret;
590 if (f->weakreflist != NULL)
591 PyObject_ClearWeakRefs((PyObject *) f);
592 ret = close_the_file(f);
593 if (!ret) {
594 PySys_WriteStderr("close failed in file object destructor:\n");
595 PyErr_Print();
597 else {
598 Py_DECREF(ret);
600 PyMem_Free(f->f_setbuf);
601 Py_XDECREF(f->f_name);
602 Py_XDECREF(f->f_mode);
603 Py_XDECREF(f->f_encoding);
604 Py_XDECREF(f->f_errors);
605 drop_readahead(f);
606 Py_TYPE(f)->tp_free((PyObject *)f);
609 static PyObject *
610 file_repr(PyFileObject *f)
612 if (PyUnicode_Check(f->f_name)) {
613 #ifdef Py_USING_UNICODE
614 PyObject *ret = NULL;
615 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
616 const char *name_str = name ? PyString_AsString(name) : "?";
617 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
618 f->f_fp == NULL ? "closed" : "open",
619 name_str,
620 PyString_AsString(f->f_mode),
622 Py_XDECREF(name);
623 return ret;
624 #endif
625 } else {
626 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
627 f->f_fp == NULL ? "closed" : "open",
628 PyString_AsString(f->f_name),
629 PyString_AsString(f->f_mode),
634 static PyObject *
635 file_close(PyFileObject *f)
637 PyObject *sts = close_the_file(f);
638 PyMem_Free(f->f_setbuf);
639 f->f_setbuf = NULL;
640 return sts;
644 /* Our very own off_t-like type, 64-bit if possible */
645 #if !defined(HAVE_LARGEFILE_SUPPORT)
646 typedef off_t Py_off_t;
647 #elif SIZEOF_OFF_T >= 8
648 typedef off_t Py_off_t;
649 #elif SIZEOF_FPOS_T >= 8
650 typedef fpos_t Py_off_t;
651 #else
652 #error "Large file support, but neither off_t nor fpos_t is large enough."
653 #endif
656 /* a portable fseek() function
657 return 0 on success, non-zero on failure (with errno set) */
658 static int
659 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
661 #if !defined(HAVE_LARGEFILE_SUPPORT)
662 return fseek(fp, offset, whence);
663 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
664 return fseeko(fp, offset, whence);
665 #elif defined(HAVE_FSEEK64)
666 return fseek64(fp, offset, whence);
667 #elif defined(__BEOS__)
668 return _fseek(fp, offset, whence);
669 #elif SIZEOF_FPOS_T >= 8
670 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
671 and fgetpos() to implement fseek()*/
672 fpos_t pos;
673 switch (whence) {
674 case SEEK_END:
675 #ifdef MS_WINDOWS
676 fflush(fp);
677 if (_lseeki64(fileno(fp), 0, 2) == -1)
678 return -1;
679 #else
680 if (fseek(fp, 0, SEEK_END) != 0)
681 return -1;
682 #endif
683 /* fall through */
684 case SEEK_CUR:
685 if (fgetpos(fp, &pos) != 0)
686 return -1;
687 offset += pos;
688 break;
689 /* case SEEK_SET: break; */
691 return fsetpos(fp, &offset);
692 #else
693 #error "Large file support, but no way to fseek."
694 #endif
698 /* a portable ftell() function
699 Return -1 on failure with errno set appropriately, current file
700 position on success */
701 static Py_off_t
702 _portable_ftell(FILE* fp)
704 #if !defined(HAVE_LARGEFILE_SUPPORT)
705 return ftell(fp);
706 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
707 return ftello(fp);
708 #elif defined(HAVE_FTELL64)
709 return ftell64(fp);
710 #elif SIZEOF_FPOS_T >= 8
711 fpos_t pos;
712 if (fgetpos(fp, &pos) != 0)
713 return -1;
714 return pos;
715 #else
716 #error "Large file support, but no way to ftell."
717 #endif
721 static PyObject *
722 file_seek(PyFileObject *f, PyObject *args)
724 int whence;
725 int ret;
726 Py_off_t offset;
727 PyObject *offobj, *off_index;
729 if (f->f_fp == NULL)
730 return err_closed();
731 drop_readahead(f);
732 whence = 0;
733 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
734 return NULL;
735 off_index = PyNumber_Index(offobj);
736 if (!off_index) {
737 if (!PyFloat_Check(offobj))
738 return NULL;
739 /* Deprecated in 2.6 */
740 PyErr_Clear();
741 if (PyErr_WarnEx(PyExc_DeprecationWarning,
742 "integer argument expected, got float",
743 1) < 0)
744 return NULL;
745 off_index = offobj;
746 Py_INCREF(offobj);
748 #if !defined(HAVE_LARGEFILE_SUPPORT)
749 offset = PyInt_AsLong(off_index);
750 #else
751 offset = PyLong_Check(off_index) ?
752 PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
753 #endif
754 Py_DECREF(off_index);
755 if (PyErr_Occurred())
756 return NULL;
758 FILE_BEGIN_ALLOW_THREADS(f)
759 errno = 0;
760 ret = _portable_fseek(f->f_fp, offset, whence);
761 FILE_END_ALLOW_THREADS(f)
763 if (ret != 0) {
764 PyErr_SetFromErrno(PyExc_IOError);
765 clearerr(f->f_fp);
766 return NULL;
768 f->f_skipnextlf = 0;
769 Py_INCREF(Py_None);
770 return Py_None;
774 #ifdef HAVE_FTRUNCATE
775 static PyObject *
776 file_truncate(PyFileObject *f, PyObject *args)
778 Py_off_t newsize;
779 PyObject *newsizeobj = NULL;
780 Py_off_t initialpos;
781 int ret;
783 if (f->f_fp == NULL)
784 return err_closed();
785 if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
786 return NULL;
788 /* Get current file position. If the file happens to be open for
789 * update and the last operation was an input operation, C doesn't
790 * define what the later fflush() will do, but we promise truncate()
791 * won't change the current position (and fflush() *does* change it
792 * then at least on Windows). The easiest thing is to capture
793 * current pos now and seek back to it at the end.
795 FILE_BEGIN_ALLOW_THREADS(f)
796 errno = 0;
797 initialpos = _portable_ftell(f->f_fp);
798 FILE_END_ALLOW_THREADS(f)
799 if (initialpos == -1)
800 goto onioerror;
802 /* Set newsize to current postion if newsizeobj NULL, else to the
803 * specified value.
805 if (newsizeobj != NULL) {
806 #if !defined(HAVE_LARGEFILE_SUPPORT)
807 newsize = PyInt_AsLong(newsizeobj);
808 #else
809 newsize = PyLong_Check(newsizeobj) ?
810 PyLong_AsLongLong(newsizeobj) :
811 PyInt_AsLong(newsizeobj);
812 #endif
813 if (PyErr_Occurred())
814 return NULL;
816 else /* default to current position */
817 newsize = initialpos;
819 /* Flush the stream. We're mixing stream-level I/O with lower-level
820 * I/O, and a flush may be necessary to synch both platform views
821 * of the current file state.
823 FILE_BEGIN_ALLOW_THREADS(f)
824 errno = 0;
825 ret = fflush(f->f_fp);
826 FILE_END_ALLOW_THREADS(f)
827 if (ret != 0)
828 goto onioerror;
830 #ifdef MS_WINDOWS
831 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
832 so don't even try using it. */
834 HANDLE hFile;
836 /* Have to move current pos to desired endpoint on Windows. */
837 FILE_BEGIN_ALLOW_THREADS(f)
838 errno = 0;
839 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
840 FILE_END_ALLOW_THREADS(f)
841 if (ret)
842 goto onioerror;
844 /* Truncate. Note that this may grow the file! */
845 FILE_BEGIN_ALLOW_THREADS(f)
846 errno = 0;
847 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
848 ret = hFile == (HANDLE)-1;
849 if (ret == 0) {
850 ret = SetEndOfFile(hFile) == 0;
851 if (ret)
852 errno = EACCES;
854 FILE_END_ALLOW_THREADS(f)
855 if (ret)
856 goto onioerror;
858 #else
859 FILE_BEGIN_ALLOW_THREADS(f)
860 errno = 0;
861 ret = ftruncate(fileno(f->f_fp), newsize);
862 FILE_END_ALLOW_THREADS(f)
863 if (ret != 0)
864 goto onioerror;
865 #endif /* !MS_WINDOWS */
867 /* Restore original file position. */
868 FILE_BEGIN_ALLOW_THREADS(f)
869 errno = 0;
870 ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
871 FILE_END_ALLOW_THREADS(f)
872 if (ret)
873 goto onioerror;
875 Py_INCREF(Py_None);
876 return Py_None;
878 onioerror:
879 PyErr_SetFromErrno(PyExc_IOError);
880 clearerr(f->f_fp);
881 return NULL;
883 #endif /* HAVE_FTRUNCATE */
885 static PyObject *
886 file_tell(PyFileObject *f)
888 Py_off_t pos;
890 if (f->f_fp == NULL)
891 return err_closed();
892 FILE_BEGIN_ALLOW_THREADS(f)
893 errno = 0;
894 pos = _portable_ftell(f->f_fp);
895 FILE_END_ALLOW_THREADS(f)
897 if (pos == -1) {
898 PyErr_SetFromErrno(PyExc_IOError);
899 clearerr(f->f_fp);
900 return NULL;
902 if (f->f_skipnextlf) {
903 int c;
904 c = GETC(f->f_fp);
905 if (c == '\n') {
906 f->f_newlinetypes |= NEWLINE_CRLF;
907 pos++;
908 f->f_skipnextlf = 0;
909 } else if (c != EOF) ungetc(c, f->f_fp);
911 #if !defined(HAVE_LARGEFILE_SUPPORT)
912 return PyInt_FromLong(pos);
913 #else
914 return PyLong_FromLongLong(pos);
915 #endif
918 static PyObject *
919 file_fileno(PyFileObject *f)
921 if (f->f_fp == NULL)
922 return err_closed();
923 return PyInt_FromLong((long) fileno(f->f_fp));
926 static PyObject *
927 file_flush(PyFileObject *f)
929 int res;
931 if (f->f_fp == NULL)
932 return err_closed();
933 FILE_BEGIN_ALLOW_THREADS(f)
934 errno = 0;
935 res = fflush(f->f_fp);
936 FILE_END_ALLOW_THREADS(f)
937 if (res != 0) {
938 PyErr_SetFromErrno(PyExc_IOError);
939 clearerr(f->f_fp);
940 return NULL;
942 Py_INCREF(Py_None);
943 return Py_None;
946 static PyObject *
947 file_isatty(PyFileObject *f)
949 long res;
950 if (f->f_fp == NULL)
951 return err_closed();
952 FILE_BEGIN_ALLOW_THREADS(f)
953 res = isatty((int)fileno(f->f_fp));
954 FILE_END_ALLOW_THREADS(f)
955 return PyBool_FromLong(res);
959 #if BUFSIZ < 8192
960 #define SMALLCHUNK 8192
961 #else
962 #define SMALLCHUNK BUFSIZ
963 #endif
965 #if SIZEOF_INT < 4
966 #define BIGCHUNK (512 * 32)
967 #else
968 #define BIGCHUNK (512 * 1024)
969 #endif
971 static size_t
972 new_buffersize(PyFileObject *f, size_t currentsize)
974 #ifdef HAVE_FSTAT
975 off_t pos, end;
976 struct stat st;
977 if (fstat(fileno(f->f_fp), &st) == 0) {
978 end = st.st_size;
979 /* The following is not a bug: we really need to call lseek()
980 *and* ftell(). The reason is that some stdio libraries
981 mistakenly flush their buffer when ftell() is called and
982 the lseek() call it makes fails, thereby throwing away
983 data that cannot be recovered in any way. To avoid this,
984 we first test lseek(), and only call ftell() if lseek()
985 works. We can't use the lseek() value either, because we
986 need to take the amount of buffered data into account.
987 (Yet another reason why stdio stinks. :-) */
988 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
989 if (pos >= 0) {
990 pos = ftell(f->f_fp);
992 if (pos < 0)
993 clearerr(f->f_fp);
994 if (end > pos && pos >= 0)
995 return currentsize + end - pos + 1;
996 /* Add 1 so if the file were to grow we'd notice. */
998 #endif
999 if (currentsize > SMALLCHUNK) {
1000 /* Keep doubling until we reach BIGCHUNK;
1001 then keep adding BIGCHUNK. */
1002 if (currentsize <= BIGCHUNK)
1003 return currentsize + currentsize;
1004 else
1005 return currentsize + BIGCHUNK;
1007 return currentsize + SMALLCHUNK;
1010 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1011 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1012 #else
1013 #ifdef EWOULDBLOCK
1014 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1015 #else
1016 #ifdef EAGAIN
1017 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1018 #else
1019 #define BLOCKED_ERRNO(x) 0
1020 #endif
1021 #endif
1022 #endif
1024 static PyObject *
1025 file_read(PyFileObject *f, PyObject *args)
1027 long bytesrequested = -1;
1028 size_t bytesread, buffersize, chunksize;
1029 PyObject *v;
1031 if (f->f_fp == NULL)
1032 return err_closed();
1033 /* refuse to mix with f.next() */
1034 if (f->f_buf != NULL &&
1035 (f->f_bufend - f->f_bufptr) > 0 &&
1036 f->f_buf[0] != '\0')
1037 return err_iterbuffered();
1038 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1039 return NULL;
1040 if (bytesrequested < 0)
1041 buffersize = new_buffersize(f, (size_t)0);
1042 else
1043 buffersize = bytesrequested;
1044 if (buffersize > PY_SSIZE_T_MAX) {
1045 PyErr_SetString(PyExc_OverflowError,
1046 "requested number of bytes is more than a Python string can hold");
1047 return NULL;
1049 v = PyString_FromStringAndSize((char *)NULL, buffersize);
1050 if (v == NULL)
1051 return NULL;
1052 bytesread = 0;
1053 for (;;) {
1054 FILE_BEGIN_ALLOW_THREADS(f)
1055 errno = 0;
1056 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1057 buffersize - bytesread, f->f_fp, (PyObject *)f);
1058 FILE_END_ALLOW_THREADS(f)
1059 if (chunksize == 0) {
1060 if (!ferror(f->f_fp))
1061 break;
1062 clearerr(f->f_fp);
1063 /* When in non-blocking mode, data shouldn't
1064 * be discarded if a blocking signal was
1065 * received. That will also happen if
1066 * chunksize != 0, but bytesread < buffersize. */
1067 if (bytesread > 0 && BLOCKED_ERRNO(errno))
1068 break;
1069 PyErr_SetFromErrno(PyExc_IOError);
1070 Py_DECREF(v);
1071 return NULL;
1073 bytesread += chunksize;
1074 if (bytesread < buffersize) {
1075 clearerr(f->f_fp);
1076 break;
1078 if (bytesrequested < 0) {
1079 buffersize = new_buffersize(f, buffersize);
1080 if (_PyString_Resize(&v, buffersize) < 0)
1081 return NULL;
1082 } else {
1083 /* Got what was requested. */
1084 break;
1087 if (bytesread != buffersize)
1088 _PyString_Resize(&v, bytesread);
1089 return v;
1092 static PyObject *
1093 file_readinto(PyFileObject *f, PyObject *args)
1095 char *ptr;
1096 Py_ssize_t ntodo;
1097 Py_ssize_t ndone, nnow;
1098 Py_buffer pbuf;
1100 if (f->f_fp == NULL)
1101 return err_closed();
1102 /* refuse to mix with f.next() */
1103 if (f->f_buf != NULL &&
1104 (f->f_bufend - f->f_bufptr) > 0 &&
1105 f->f_buf[0] != '\0')
1106 return err_iterbuffered();
1107 if (!PyArg_ParseTuple(args, "w*", &pbuf))
1108 return NULL;
1109 ptr = pbuf.buf;
1110 ntodo = pbuf.len;
1111 ndone = 0;
1112 while (ntodo > 0) {
1113 FILE_BEGIN_ALLOW_THREADS(f)
1114 errno = 0;
1115 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1116 (PyObject *)f);
1117 FILE_END_ALLOW_THREADS(f)
1118 if (nnow == 0) {
1119 if (!ferror(f->f_fp))
1120 break;
1121 PyErr_SetFromErrno(PyExc_IOError);
1122 clearerr(f->f_fp);
1123 PyBuffer_Release(&pbuf);
1124 return NULL;
1126 ndone += nnow;
1127 ntodo -= nnow;
1129 PyBuffer_Release(&pbuf);
1130 return PyInt_FromSsize_t(ndone);
1133 /**************************************************************************
1134 Routine to get next line using platform fgets().
1136 Under MSVC 6:
1138 + MS threadsafe getc is very slow (multiple layers of function calls before+
1139 after each character, to lock+unlock the stream).
1140 + The stream-locking functions are MS-internal -- can't access them from user
1141 code.
1142 + There's nothing Tim could find in the MS C or platform SDK libraries that
1143 can worm around this.
1144 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1146 So we use fgets for speed(!), despite that it's painful.
1148 MS realloc is also slow.
1150 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1151 have):
1152 Linux a wash
1153 Solaris a wash
1154 Tru64 Unix getline_via_fgets significantly faster
1156 CAUTION: The C std isn't clear about this: in those cases where fgets
1157 writes something into the buffer, can it write into any position beyond the
1158 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
1159 known on which it does; and it would be a strange way to code fgets. Still,
1160 getline_via_fgets may not work correctly if it does. The std test
1161 test_bufio.py should fail if platform fgets() routinely writes beyond the
1162 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1163 **************************************************************************/
1165 /* Use this routine if told to, or by default on non-get_unlocked()
1166 * platforms unless told not to. Yikes! Let's spell that out:
1167 * On a platform with getc_unlocked():
1168 * By default, use getc_unlocked().
1169 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1170 * On a platform without getc_unlocked():
1171 * By default, use fgets().
1172 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1174 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1175 #define USE_FGETS_IN_GETLINE
1176 #endif
1178 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1179 #undef USE_FGETS_IN_GETLINE
1180 #endif
1182 #ifdef USE_FGETS_IN_GETLINE
1183 static PyObject*
1184 getline_via_fgets(PyFileObject *f, FILE *fp)
1186 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1187 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
1188 * to fill this much of the buffer with a known value in order to figure out
1189 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
1190 * than "most" lines, we waste time filling unused buffer slots. 100 is
1191 * surely adequate for most peoples' email archives, chewing over source code,
1192 * etc -- "regular old text files".
1193 * MAXBUFSIZE is the maximum line length that lets us get away with the less
1194 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
1195 * cautions about boosting that. 300 was chosen because the worst real-life
1196 * text-crunching job reported on Python-Dev was a mail-log crawler where over
1197 * half the lines were 254 chars.
1199 #define INITBUFSIZE 100
1200 #define MAXBUFSIZE 300
1201 char* p; /* temp */
1202 char buf[MAXBUFSIZE];
1203 PyObject* v; /* the string object result */
1204 char* pvfree; /* address of next free slot */
1205 char* pvend; /* address one beyond last free slot */
1206 size_t nfree; /* # of free buffer slots; pvend-pvfree */
1207 size_t total_v_size; /* total # of slots in buffer */
1208 size_t increment; /* amount to increment the buffer */
1209 size_t prev_v_size;
1211 /* Optimize for normal case: avoid _PyString_Resize if at all
1212 * possible via first reading into stack buffer "buf".
1214 total_v_size = INITBUFSIZE; /* start small and pray */
1215 pvfree = buf;
1216 for (;;) {
1217 FILE_BEGIN_ALLOW_THREADS(f)
1218 pvend = buf + total_v_size;
1219 nfree = pvend - pvfree;
1220 memset(pvfree, '\n', nfree);
1221 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1222 p = fgets(pvfree, (int)nfree, fp);
1223 FILE_END_ALLOW_THREADS(f)
1225 if (p == NULL) {
1226 clearerr(fp);
1227 if (PyErr_CheckSignals())
1228 return NULL;
1229 v = PyString_FromStringAndSize(buf, pvfree - buf);
1230 return v;
1232 /* fgets read *something* */
1233 p = memchr(pvfree, '\n', nfree);
1234 if (p != NULL) {
1235 /* Did the \n come from fgets or from us?
1236 * Since fgets stops at the first \n, and then writes
1237 * \0, if it's from fgets a \0 must be next. But if
1238 * that's so, it could not have come from us, since
1239 * the \n's we filled the buffer with have only more
1240 * \n's to the right.
1242 if (p+1 < pvend && *(p+1) == '\0') {
1243 /* It's from fgets: we win! In particular,
1244 * we haven't done any mallocs yet, and can
1245 * build the final result on the first try.
1247 ++p; /* include \n from fgets */
1249 else {
1250 /* Must be from us: fgets didn't fill the
1251 * buffer and didn't find a newline, so it
1252 * must be the last and newline-free line of
1253 * the file.
1255 assert(p > pvfree && *(p-1) == '\0');
1256 --p; /* don't include \0 from fgets */
1258 v = PyString_FromStringAndSize(buf, p - buf);
1259 return v;
1261 /* yuck: fgets overwrote all the newlines, i.e. the entire
1262 * buffer. So this line isn't over yet, or maybe it is but
1263 * we're exactly at EOF. If we haven't already, try using the
1264 * rest of the stack buffer.
1266 assert(*(pvend-1) == '\0');
1267 if (pvfree == buf) {
1268 pvfree = pvend - 1; /* overwrite trailing null */
1269 total_v_size = MAXBUFSIZE;
1271 else
1272 break;
1275 /* The stack buffer isn't big enough; malloc a string object and read
1276 * into its buffer.
1278 total_v_size = MAXBUFSIZE << 1;
1279 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1280 if (v == NULL)
1281 return v;
1282 /* copy over everything except the last null byte */
1283 memcpy(BUF(v), buf, MAXBUFSIZE-1);
1284 pvfree = BUF(v) + MAXBUFSIZE - 1;
1286 /* Keep reading stuff into v; if it ever ends successfully, break
1287 * after setting p one beyond the end of the line. The code here is
1288 * very much like the code above, except reads into v's buffer; see
1289 * the code above for detailed comments about the logic.
1291 for (;;) {
1292 FILE_BEGIN_ALLOW_THREADS(f)
1293 pvend = BUF(v) + total_v_size;
1294 nfree = pvend - pvfree;
1295 memset(pvfree, '\n', nfree);
1296 assert(nfree < INT_MAX);
1297 p = fgets(pvfree, (int)nfree, fp);
1298 FILE_END_ALLOW_THREADS(f)
1300 if (p == NULL) {
1301 clearerr(fp);
1302 if (PyErr_CheckSignals()) {
1303 Py_DECREF(v);
1304 return NULL;
1306 p = pvfree;
1307 break;
1309 p = memchr(pvfree, '\n', nfree);
1310 if (p != NULL) {
1311 if (p+1 < pvend && *(p+1) == '\0') {
1312 /* \n came from fgets */
1313 ++p;
1314 break;
1316 /* \n came from us; last line of file, no newline */
1317 assert(p > pvfree && *(p-1) == '\0');
1318 --p;
1319 break;
1321 /* expand buffer and try again */
1322 assert(*(pvend-1) == '\0');
1323 increment = total_v_size >> 2; /* mild exponential growth */
1324 prev_v_size = total_v_size;
1325 total_v_size += increment;
1326 /* check for overflow */
1327 if (total_v_size <= prev_v_size ||
1328 total_v_size > PY_SSIZE_T_MAX) {
1329 PyErr_SetString(PyExc_OverflowError,
1330 "line is longer than a Python string can hold");
1331 Py_DECREF(v);
1332 return NULL;
1334 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1335 return NULL;
1336 /* overwrite the trailing null byte */
1337 pvfree = BUF(v) + (prev_v_size - 1);
1339 if (BUF(v) + total_v_size != p)
1340 _PyString_Resize(&v, p - BUF(v));
1341 return v;
1342 #undef INITBUFSIZE
1343 #undef MAXBUFSIZE
1345 #endif /* ifdef USE_FGETS_IN_GETLINE */
1347 /* Internal routine to get a line.
1348 Size argument interpretation:
1349 > 0: max length;
1350 <= 0: read arbitrary line
1353 static PyObject *
1354 get_line(PyFileObject *f, int n)
1356 FILE *fp = f->f_fp;
1357 int c;
1358 char *buf, *end;
1359 size_t total_v_size; /* total # of slots in buffer */
1360 size_t used_v_size; /* # used slots in buffer */
1361 size_t increment; /* amount to increment the buffer */
1362 PyObject *v;
1363 int newlinetypes = f->f_newlinetypes;
1364 int skipnextlf = f->f_skipnextlf;
1365 int univ_newline = f->f_univ_newline;
1367 #if defined(USE_FGETS_IN_GETLINE)
1368 if (n <= 0 && !univ_newline )
1369 return getline_via_fgets(f, fp);
1370 #endif
1371 total_v_size = n > 0 ? n : 100;
1372 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1373 if (v == NULL)
1374 return NULL;
1375 buf = BUF(v);
1376 end = buf + total_v_size;
1378 for (;;) {
1379 FILE_BEGIN_ALLOW_THREADS(f)
1380 FLOCKFILE(fp);
1381 if (univ_newline) {
1382 c = 'x'; /* Shut up gcc warning */
1383 while ( buf != end && (c = GETC(fp)) != EOF ) {
1384 if (skipnextlf ) {
1385 skipnextlf = 0;
1386 if (c == '\n') {
1387 /* Seeing a \n here with
1388 * skipnextlf true means we
1389 * saw a \r before.
1391 newlinetypes |= NEWLINE_CRLF;
1392 c = GETC(fp);
1393 if (c == EOF) break;
1394 } else {
1395 newlinetypes |= NEWLINE_CR;
1398 if (c == '\r') {
1399 skipnextlf = 1;
1400 c = '\n';
1401 } else if ( c == '\n')
1402 newlinetypes |= NEWLINE_LF;
1403 *buf++ = c;
1404 if (c == '\n') break;
1406 if ( c == EOF && skipnextlf )
1407 newlinetypes |= NEWLINE_CR;
1408 } else /* If not universal newlines use the normal loop */
1409 while ((c = GETC(fp)) != EOF &&
1410 (*buf++ = c) != '\n' &&
1411 buf != end)
1413 FUNLOCKFILE(fp);
1414 FILE_END_ALLOW_THREADS(f)
1415 f->f_newlinetypes = newlinetypes;
1416 f->f_skipnextlf = skipnextlf;
1417 if (c == '\n')
1418 break;
1419 if (c == EOF) {
1420 if (ferror(fp)) {
1421 PyErr_SetFromErrno(PyExc_IOError);
1422 clearerr(fp);
1423 Py_DECREF(v);
1424 return NULL;
1426 clearerr(fp);
1427 if (PyErr_CheckSignals()) {
1428 Py_DECREF(v);
1429 return NULL;
1431 break;
1433 /* Must be because buf == end */
1434 if (n > 0)
1435 break;
1436 used_v_size = total_v_size;
1437 increment = total_v_size >> 2; /* mild exponential growth */
1438 total_v_size += increment;
1439 if (total_v_size > PY_SSIZE_T_MAX) {
1440 PyErr_SetString(PyExc_OverflowError,
1441 "line is longer than a Python string can hold");
1442 Py_DECREF(v);
1443 return NULL;
1445 if (_PyString_Resize(&v, total_v_size) < 0)
1446 return NULL;
1447 buf = BUF(v) + used_v_size;
1448 end = BUF(v) + total_v_size;
1451 used_v_size = buf - BUF(v);
1452 if (used_v_size != total_v_size)
1453 _PyString_Resize(&v, used_v_size);
1454 return v;
1457 /* External C interface */
1459 PyObject *
1460 PyFile_GetLine(PyObject *f, int n)
1462 PyObject *result;
1464 if (f == NULL) {
1465 PyErr_BadInternalCall();
1466 return NULL;
1469 if (PyFile_Check(f)) {
1470 PyFileObject *fo = (PyFileObject *)f;
1471 if (fo->f_fp == NULL)
1472 return err_closed();
1473 /* refuse to mix with f.next() */
1474 if (fo->f_buf != NULL &&
1475 (fo->f_bufend - fo->f_bufptr) > 0 &&
1476 fo->f_buf[0] != '\0')
1477 return err_iterbuffered();
1478 result = get_line(fo, n);
1480 else {
1481 PyObject *reader;
1482 PyObject *args;
1484 reader = PyObject_GetAttrString(f, "readline");
1485 if (reader == NULL)
1486 return NULL;
1487 if (n <= 0)
1488 args = PyTuple_New(0);
1489 else
1490 args = Py_BuildValue("(i)", n);
1491 if (args == NULL) {
1492 Py_DECREF(reader);
1493 return NULL;
1495 result = PyEval_CallObject(reader, args);
1496 Py_DECREF(reader);
1497 Py_DECREF(args);
1498 if (result != NULL && !PyString_Check(result) &&
1499 !PyUnicode_Check(result)) {
1500 Py_DECREF(result);
1501 result = NULL;
1502 PyErr_SetString(PyExc_TypeError,
1503 "object.readline() returned non-string");
1507 if (n < 0 && result != NULL && PyString_Check(result)) {
1508 char *s = PyString_AS_STRING(result);
1509 Py_ssize_t len = PyString_GET_SIZE(result);
1510 if (len == 0) {
1511 Py_DECREF(result);
1512 result = NULL;
1513 PyErr_SetString(PyExc_EOFError,
1514 "EOF when reading a line");
1516 else if (s[len-1] == '\n') {
1517 if (result->ob_refcnt == 1)
1518 _PyString_Resize(&result, len-1);
1519 else {
1520 PyObject *v;
1521 v = PyString_FromStringAndSize(s, len-1);
1522 Py_DECREF(result);
1523 result = v;
1527 #ifdef Py_USING_UNICODE
1528 if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1529 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1530 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1531 if (len == 0) {
1532 Py_DECREF(result);
1533 result = NULL;
1534 PyErr_SetString(PyExc_EOFError,
1535 "EOF when reading a line");
1537 else if (s[len-1] == '\n') {
1538 if (result->ob_refcnt == 1)
1539 PyUnicode_Resize(&result, len-1);
1540 else {
1541 PyObject *v;
1542 v = PyUnicode_FromUnicode(s, len-1);
1543 Py_DECREF(result);
1544 result = v;
1548 #endif
1549 return result;
1552 /* Python method */
1554 static PyObject *
1555 file_readline(PyFileObject *f, PyObject *args)
1557 int n = -1;
1559 if (f->f_fp == NULL)
1560 return err_closed();
1561 /* refuse to mix with f.next() */
1562 if (f->f_buf != NULL &&
1563 (f->f_bufend - f->f_bufptr) > 0 &&
1564 f->f_buf[0] != '\0')
1565 return err_iterbuffered();
1566 if (!PyArg_ParseTuple(args, "|i:readline", &n))
1567 return NULL;
1568 if (n == 0)
1569 return PyString_FromString("");
1570 if (n < 0)
1571 n = 0;
1572 return get_line(f, n);
1575 static PyObject *
1576 file_readlines(PyFileObject *f, PyObject *args)
1578 long sizehint = 0;
1579 PyObject *list = NULL;
1580 PyObject *line;
1581 char small_buffer[SMALLCHUNK];
1582 char *buffer = small_buffer;
1583 size_t buffersize = SMALLCHUNK;
1584 PyObject *big_buffer = NULL;
1585 size_t nfilled = 0;
1586 size_t nread;
1587 size_t totalread = 0;
1588 char *p, *q, *end;
1589 int err;
1590 int shortread = 0;
1592 if (f->f_fp == NULL)
1593 return err_closed();
1594 /* refuse to mix with f.next() */
1595 if (f->f_buf != NULL &&
1596 (f->f_bufend - f->f_bufptr) > 0 &&
1597 f->f_buf[0] != '\0')
1598 return err_iterbuffered();
1599 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1600 return NULL;
1601 if ((list = PyList_New(0)) == NULL)
1602 return NULL;
1603 for (;;) {
1604 if (shortread)
1605 nread = 0;
1606 else {
1607 FILE_BEGIN_ALLOW_THREADS(f)
1608 errno = 0;
1609 nread = Py_UniversalNewlineFread(buffer+nfilled,
1610 buffersize-nfilled, f->f_fp, (PyObject *)f);
1611 FILE_END_ALLOW_THREADS(f)
1612 shortread = (nread < buffersize-nfilled);
1614 if (nread == 0) {
1615 sizehint = 0;
1616 if (!ferror(f->f_fp))
1617 break;
1618 PyErr_SetFromErrno(PyExc_IOError);
1619 clearerr(f->f_fp);
1620 goto error;
1622 totalread += nread;
1623 p = (char *)memchr(buffer+nfilled, '\n', nread);
1624 if (p == NULL) {
1625 /* Need a larger buffer to fit this line */
1626 nfilled += nread;
1627 buffersize *= 2;
1628 if (buffersize > PY_SSIZE_T_MAX) {
1629 PyErr_SetString(PyExc_OverflowError,
1630 "line is longer than a Python string can hold");
1631 goto error;
1633 if (big_buffer == NULL) {
1634 /* Create the big buffer */
1635 big_buffer = PyString_FromStringAndSize(
1636 NULL, buffersize);
1637 if (big_buffer == NULL)
1638 goto error;
1639 buffer = PyString_AS_STRING(big_buffer);
1640 memcpy(buffer, small_buffer, nfilled);
1642 else {
1643 /* Grow the big buffer */
1644 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1645 goto error;
1646 buffer = PyString_AS_STRING(big_buffer);
1648 continue;
1650 end = buffer+nfilled+nread;
1651 q = buffer;
1652 do {
1653 /* Process complete lines */
1654 p++;
1655 line = PyString_FromStringAndSize(q, p-q);
1656 if (line == NULL)
1657 goto error;
1658 err = PyList_Append(list, line);
1659 Py_DECREF(line);
1660 if (err != 0)
1661 goto error;
1662 q = p;
1663 p = (char *)memchr(q, '\n', end-q);
1664 } while (p != NULL);
1665 /* Move the remaining incomplete line to the start */
1666 nfilled = end-q;
1667 memmove(buffer, q, nfilled);
1668 if (sizehint > 0)
1669 if (totalread >= (size_t)sizehint)
1670 break;
1672 if (nfilled != 0) {
1673 /* Partial last line */
1674 line = PyString_FromStringAndSize(buffer, nfilled);
1675 if (line == NULL)
1676 goto error;
1677 if (sizehint > 0) {
1678 /* Need to complete the last line */
1679 PyObject *rest = get_line(f, 0);
1680 if (rest == NULL) {
1681 Py_DECREF(line);
1682 goto error;
1684 PyString_Concat(&line, rest);
1685 Py_DECREF(rest);
1686 if (line == NULL)
1687 goto error;
1689 err = PyList_Append(list, line);
1690 Py_DECREF(line);
1691 if (err != 0)
1692 goto error;
1695 cleanup:
1696 Py_XDECREF(big_buffer);
1697 return list;
1699 error:
1700 Py_CLEAR(list);
1701 goto cleanup;
1704 static PyObject *
1705 file_write(PyFileObject *f, PyObject *args)
1707 Py_buffer pbuf;
1708 char *s;
1709 Py_ssize_t n, n2;
1710 if (f->f_fp == NULL)
1711 return err_closed();
1712 if (f->f_binary) {
1713 if (!PyArg_ParseTuple(args, "s*", &pbuf))
1714 return NULL;
1715 s = pbuf.buf;
1716 n = pbuf.len;
1717 } else
1718 if (!PyArg_ParseTuple(args, "t#", &s, &n))
1719 return NULL;
1720 f->f_softspace = 0;
1721 FILE_BEGIN_ALLOW_THREADS(f)
1722 errno = 0;
1723 n2 = fwrite(s, 1, n, f->f_fp);
1724 FILE_END_ALLOW_THREADS(f)
1725 if (f->f_binary)
1726 PyBuffer_Release(&pbuf);
1727 if (n2 != n) {
1728 PyErr_SetFromErrno(PyExc_IOError);
1729 clearerr(f->f_fp);
1730 return NULL;
1732 Py_INCREF(Py_None);
1733 return Py_None;
1736 static PyObject *
1737 file_writelines(PyFileObject *f, PyObject *seq)
1739 #define CHUNKSIZE 1000
1740 PyObject *list, *line;
1741 PyObject *it; /* iter(seq) */
1742 PyObject *result;
1743 int index, islist;
1744 Py_ssize_t i, j, nwritten, len;
1746 assert(seq != NULL);
1747 if (f->f_fp == NULL)
1748 return err_closed();
1750 result = NULL;
1751 list = NULL;
1752 islist = PyList_Check(seq);
1753 if (islist)
1754 it = NULL;
1755 else {
1756 it = PyObject_GetIter(seq);
1757 if (it == NULL) {
1758 PyErr_SetString(PyExc_TypeError,
1759 "writelines() requires an iterable argument");
1760 return NULL;
1762 /* From here on, fail by going to error, to reclaim "it". */
1763 list = PyList_New(CHUNKSIZE);
1764 if (list == NULL)
1765 goto error;
1768 /* Strategy: slurp CHUNKSIZE lines into a private list,
1769 checking that they are all strings, then write that list
1770 without holding the interpreter lock, then come back for more. */
1771 for (index = 0; ; index += CHUNKSIZE) {
1772 if (islist) {
1773 Py_XDECREF(list);
1774 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1775 if (list == NULL)
1776 goto error;
1777 j = PyList_GET_SIZE(list);
1779 else {
1780 for (j = 0; j < CHUNKSIZE; j++) {
1781 line = PyIter_Next(it);
1782 if (line == NULL) {
1783 if (PyErr_Occurred())
1784 goto error;
1785 break;
1787 PyList_SetItem(list, j, line);
1790 if (j == 0)
1791 break;
1793 /* Check that all entries are indeed strings. If not,
1794 apply the same rules as for file.write() and
1795 convert the results to strings. This is slow, but
1796 seems to be the only way since all conversion APIs
1797 could potentially execute Python code. */
1798 for (i = 0; i < j; i++) {
1799 PyObject *v = PyList_GET_ITEM(list, i);
1800 if (!PyString_Check(v)) {
1801 const char *buffer;
1802 if (((f->f_binary &&
1803 PyObject_AsReadBuffer(v,
1804 (const void**)&buffer,
1805 &len)) ||
1806 PyObject_AsCharBuffer(v,
1807 &buffer,
1808 &len))) {
1809 PyErr_SetString(PyExc_TypeError,
1810 "writelines() argument must be a sequence of strings");
1811 goto error;
1813 line = PyString_FromStringAndSize(buffer,
1814 len);
1815 if (line == NULL)
1816 goto error;
1817 Py_DECREF(v);
1818 PyList_SET_ITEM(list, i, line);
1822 /* Since we are releasing the global lock, the
1823 following code may *not* execute Python code. */
1824 f->f_softspace = 0;
1825 FILE_BEGIN_ALLOW_THREADS(f)
1826 errno = 0;
1827 for (i = 0; i < j; i++) {
1828 line = PyList_GET_ITEM(list, i);
1829 len = PyString_GET_SIZE(line);
1830 nwritten = fwrite(PyString_AS_STRING(line),
1831 1, len, f->f_fp);
1832 if (nwritten != len) {
1833 FILE_ABORT_ALLOW_THREADS(f)
1834 PyErr_SetFromErrno(PyExc_IOError);
1835 clearerr(f->f_fp);
1836 goto error;
1839 FILE_END_ALLOW_THREADS(f)
1841 if (j < CHUNKSIZE)
1842 break;
1845 Py_INCREF(Py_None);
1846 result = Py_None;
1847 error:
1848 Py_XDECREF(list);
1849 Py_XDECREF(it);
1850 return result;
1851 #undef CHUNKSIZE
1854 static PyObject *
1855 file_self(PyFileObject *f)
1857 if (f->f_fp == NULL)
1858 return err_closed();
1859 Py_INCREF(f);
1860 return (PyObject *)f;
1863 static PyObject *
1864 file_xreadlines(PyFileObject *f)
1866 if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1867 "try 'for line in f' instead", 1) < 0)
1868 return NULL;
1869 return file_self(f);
1872 static PyObject *
1873 file_exit(PyObject *f, PyObject *args)
1875 PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1876 if (!ret)
1877 /* If error occurred, pass through */
1878 return NULL;
1879 Py_DECREF(ret);
1880 /* We cannot return the result of close since a true
1881 * value will be interpreted as "yes, swallow the
1882 * exception if one was raised inside the with block". */
1883 Py_RETURN_NONE;
1886 PyDoc_STRVAR(readline_doc,
1887 "readline([size]) -> next line from the file, as a string.\n"
1888 "\n"
1889 "Retain newline. A non-negative size argument limits the maximum\n"
1890 "number of bytes to return (an incomplete line may be returned then).\n"
1891 "Return an empty string at EOF.");
1893 PyDoc_STRVAR(read_doc,
1894 "read([size]) -> read at most size bytes, returned as a string.\n"
1895 "\n"
1896 "If the size argument is negative or omitted, read until EOF is reached.\n"
1897 "Notice that when in non-blocking mode, less data than what was requested\n"
1898 "may be returned, even if no size parameter was given.");
1900 PyDoc_STRVAR(write_doc,
1901 "write(str) -> None. Write string str to file.\n"
1902 "\n"
1903 "Note that due to buffering, flush() or close() may be needed before\n"
1904 "the file on disk reflects the data written.");
1906 PyDoc_STRVAR(fileno_doc,
1907 "fileno() -> integer \"file descriptor\".\n"
1908 "\n"
1909 "This is needed for lower-level file interfaces, such os.read().");
1911 PyDoc_STRVAR(seek_doc,
1912 "seek(offset[, whence]) -> None. Move to new file position.\n"
1913 "\n"
1914 "Argument offset is a byte count. Optional argument whence defaults to\n"
1915 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1916 "(move relative to current position, positive or negative), and 2 (move\n"
1917 "relative to end of file, usually negative, although many platforms allow\n"
1918 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1919 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1920 "undefined behavior."
1921 "\n"
1922 "Note that not all file objects are seekable.");
1924 #ifdef HAVE_FTRUNCATE
1925 PyDoc_STRVAR(truncate_doc,
1926 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1927 "\n"
1928 "Size defaults to the current file position, as returned by tell().");
1929 #endif
1931 PyDoc_STRVAR(tell_doc,
1932 "tell() -> current file position, an integer (may be a long integer).");
1934 PyDoc_STRVAR(readinto_doc,
1935 "readinto() -> Undocumented. Don't use this; it may go away.");
1937 PyDoc_STRVAR(readlines_doc,
1938 "readlines([size]) -> list of strings, each a line from the file.\n"
1939 "\n"
1940 "Call readline() repeatedly and return a list of the lines so read.\n"
1941 "The optional size argument, if given, is an approximate bound on the\n"
1942 "total number of bytes in the lines returned.");
1944 PyDoc_STRVAR(xreadlines_doc,
1945 "xreadlines() -> returns self.\n"
1946 "\n"
1947 "For backward compatibility. File objects now include the performance\n"
1948 "optimizations previously implemented in the xreadlines module.");
1950 PyDoc_STRVAR(writelines_doc,
1951 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1952 "\n"
1953 "Note that newlines are not added. The sequence can be any iterable object\n"
1954 "producing strings. This is equivalent to calling write() for each string.");
1956 PyDoc_STRVAR(flush_doc,
1957 "flush() -> None. Flush the internal I/O buffer.");
1959 PyDoc_STRVAR(close_doc,
1960 "close() -> None or (perhaps) an integer. Close the file.\n"
1961 "\n"
1962 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1963 "further I/O operations. close() may be called more than once without\n"
1964 "error. Some kinds of file objects (for example, opened by popen())\n"
1965 "may return an exit status upon closing.");
1967 PyDoc_STRVAR(isatty_doc,
1968 "isatty() -> true or false. True if the file is connected to a tty device.");
1970 PyDoc_STRVAR(enter_doc,
1971 "__enter__() -> self.");
1973 PyDoc_STRVAR(exit_doc,
1974 "__exit__(*excinfo) -> None. Closes the file.");
1976 static PyMethodDef file_methods[] = {
1977 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1978 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1979 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1980 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1981 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1982 #ifdef HAVE_FTRUNCATE
1983 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1984 #endif
1985 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1986 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1987 {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
1988 {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
1989 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1990 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1991 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1992 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1993 {"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
1994 {"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc},
1995 {NULL, NULL} /* sentinel */
1998 #define OFF(x) offsetof(PyFileObject, x)
2000 static PyMemberDef file_memberlist[] = {
2001 {"mode", T_OBJECT, OFF(f_mode), RO,
2002 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2003 {"name", T_OBJECT, OFF(f_name), RO,
2004 "file name"},
2005 {"encoding", T_OBJECT, OFF(f_encoding), RO,
2006 "file encoding"},
2007 {"errors", T_OBJECT, OFF(f_errors), RO,
2008 "Unicode error handler"},
2009 /* getattr(f, "closed") is implemented without this table */
2010 {NULL} /* Sentinel */
2013 static PyObject *
2014 get_closed(PyFileObject *f, void *closure)
2016 return PyBool_FromLong((long)(f->f_fp == 0));
2018 static PyObject *
2019 get_newlines(PyFileObject *f, void *closure)
2021 switch (f->f_newlinetypes) {
2022 case NEWLINE_UNKNOWN:
2023 Py_INCREF(Py_None);
2024 return Py_None;
2025 case NEWLINE_CR:
2026 return PyString_FromString("\r");
2027 case NEWLINE_LF:
2028 return PyString_FromString("\n");
2029 case NEWLINE_CR|NEWLINE_LF:
2030 return Py_BuildValue("(ss)", "\r", "\n");
2031 case NEWLINE_CRLF:
2032 return PyString_FromString("\r\n");
2033 case NEWLINE_CR|NEWLINE_CRLF:
2034 return Py_BuildValue("(ss)", "\r", "\r\n");
2035 case NEWLINE_LF|NEWLINE_CRLF:
2036 return Py_BuildValue("(ss)", "\n", "\r\n");
2037 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2038 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2039 default:
2040 PyErr_Format(PyExc_SystemError,
2041 "Unknown newlines value 0x%x\n",
2042 f->f_newlinetypes);
2043 return NULL;
2047 static PyObject *
2048 get_softspace(PyFileObject *f, void *closure)
2050 if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2051 return NULL;
2052 return PyInt_FromLong(f->f_softspace);
2055 static int
2056 set_softspace(PyFileObject *f, PyObject *value)
2058 int new;
2059 if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2060 return -1;
2062 if (value == NULL) {
2063 PyErr_SetString(PyExc_TypeError,
2064 "can't delete softspace attribute");
2065 return -1;
2068 new = PyInt_AsLong(value);
2069 if (new == -1 && PyErr_Occurred())
2070 return -1;
2071 f->f_softspace = new;
2072 return 0;
2075 static PyGetSetDef file_getsetlist[] = {
2076 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2077 {"newlines", (getter)get_newlines, NULL,
2078 "end-of-line convention used in this file"},
2079 {"softspace", (getter)get_softspace, (setter)set_softspace,
2080 "flag indicating that a space needs to be printed; used by print"},
2081 {0},
2084 static void
2085 drop_readahead(PyFileObject *f)
2087 if (f->f_buf != NULL) {
2088 PyMem_Free(f->f_buf);
2089 f->f_buf = NULL;
2093 /* Make sure that file has a readahead buffer with at least one byte
2094 (unless at EOF) and no more than bufsize. Returns negative value on
2095 error, will set MemoryError if bufsize bytes cannot be allocated. */
2096 static int
2097 readahead(PyFileObject *f, int bufsize)
2099 Py_ssize_t chunksize;
2101 if (f->f_buf != NULL) {
2102 if( (f->f_bufend - f->f_bufptr) >= 1)
2103 return 0;
2104 else
2105 drop_readahead(f);
2107 if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2108 PyErr_NoMemory();
2109 return -1;
2111 FILE_BEGIN_ALLOW_THREADS(f)
2112 errno = 0;
2113 chunksize = Py_UniversalNewlineFread(
2114 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2115 FILE_END_ALLOW_THREADS(f)
2116 if (chunksize == 0) {
2117 if (ferror(f->f_fp)) {
2118 PyErr_SetFromErrno(PyExc_IOError);
2119 clearerr(f->f_fp);
2120 drop_readahead(f);
2121 return -1;
2124 f->f_bufptr = f->f_buf;
2125 f->f_bufend = f->f_buf + chunksize;
2126 return 0;
2129 /* Used by file_iternext. The returned string will start with 'skip'
2130 uninitialized bytes followed by the remainder of the line. Don't be
2131 horrified by the recursive call: maximum recursion depth is limited by
2132 logarithmic buffer growth to about 50 even when reading a 1gb line. */
2134 static PyStringObject *
2135 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2137 PyStringObject* s;
2138 char *bufptr;
2139 char *buf;
2140 Py_ssize_t len;
2142 if (f->f_buf == NULL)
2143 if (readahead(f, bufsize) < 0)
2144 return NULL;
2146 len = f->f_bufend - f->f_bufptr;
2147 if (len == 0)
2148 return (PyStringObject *)
2149 PyString_FromStringAndSize(NULL, skip);
2150 bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2151 if (bufptr != NULL) {
2152 bufptr++; /* Count the '\n' */
2153 len = bufptr - f->f_bufptr;
2154 s = (PyStringObject *)
2155 PyString_FromStringAndSize(NULL, skip+len);
2156 if (s == NULL)
2157 return NULL;
2158 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2159 f->f_bufptr = bufptr;
2160 if (bufptr == f->f_bufend)
2161 drop_readahead(f);
2162 } else {
2163 bufptr = f->f_bufptr;
2164 buf = f->f_buf;
2165 f->f_buf = NULL; /* Force new readahead buffer */
2166 assert(skip+len < INT_MAX);
2167 s = readahead_get_line_skip(
2168 f, (int)(skip+len), bufsize + (bufsize>>2) );
2169 if (s == NULL) {
2170 PyMem_Free(buf);
2171 return NULL;
2173 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2174 PyMem_Free(buf);
2176 return s;
2179 /* A larger buffer size may actually decrease performance. */
2180 #define READAHEAD_BUFSIZE 8192
2182 static PyObject *
2183 file_iternext(PyFileObject *f)
2185 PyStringObject* l;
2187 if (f->f_fp == NULL)
2188 return err_closed();
2190 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2191 if (l == NULL || PyString_GET_SIZE(l) == 0) {
2192 Py_XDECREF(l);
2193 return NULL;
2195 return (PyObject *)l;
2199 static PyObject *
2200 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2202 PyObject *self;
2203 static PyObject *not_yet_string;
2205 assert(type != NULL && type->tp_alloc != NULL);
2207 if (not_yet_string == NULL) {
2208 not_yet_string = PyString_InternFromString("<uninitialized file>");
2209 if (not_yet_string == NULL)
2210 return NULL;
2213 self = type->tp_alloc(type, 0);
2214 if (self != NULL) {
2215 /* Always fill in the name and mode, so that nobody else
2216 needs to special-case NULLs there. */
2217 Py_INCREF(not_yet_string);
2218 ((PyFileObject *)self)->f_name = not_yet_string;
2219 Py_INCREF(not_yet_string);
2220 ((PyFileObject *)self)->f_mode = not_yet_string;
2221 Py_INCREF(Py_None);
2222 ((PyFileObject *)self)->f_encoding = Py_None;
2223 Py_INCREF(Py_None);
2224 ((PyFileObject *)self)->f_errors = Py_None;
2225 ((PyFileObject *)self)->weakreflist = NULL;
2226 ((PyFileObject *)self)->unlocked_count = 0;
2228 return self;
2231 static int
2232 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2234 PyFileObject *foself = (PyFileObject *)self;
2235 int ret = 0;
2236 static char *kwlist[] = {"name", "mode", "buffering", 0};
2237 char *name = NULL;
2238 char *mode = "r";
2239 int bufsize = -1;
2240 int wideargument = 0;
2241 #ifdef MS_WINDOWS
2242 PyObject *po;
2243 #endif
2245 assert(PyFile_Check(self));
2246 if (foself->f_fp != NULL) {
2247 /* Have to close the existing file first. */
2248 PyObject *closeresult = file_close(foself);
2249 if (closeresult == NULL)
2250 return -1;
2251 Py_DECREF(closeresult);
2254 #ifdef MS_WINDOWS
2255 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2256 kwlist, &po, &mode, &bufsize)) {
2257 wideargument = 1;
2258 if (fill_file_fields(foself, NULL, po, mode,
2259 fclose) == NULL)
2260 goto Error;
2261 } else {
2262 /* Drop the argument parsing error as narrow
2263 strings are also valid. */
2264 PyErr_Clear();
2266 #endif
2268 if (!wideargument) {
2269 PyObject *o_name;
2271 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2272 Py_FileSystemDefaultEncoding,
2273 &name,
2274 &mode, &bufsize))
2275 return -1;
2277 /* We parse again to get the name as a PyObject */
2278 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2279 kwlist, &o_name, &mode,
2280 &bufsize))
2281 goto Error;
2283 if (fill_file_fields(foself, NULL, o_name, mode,
2284 fclose) == NULL)
2285 goto Error;
2287 if (open_the_file(foself, name, mode) == NULL)
2288 goto Error;
2289 foself->f_setbuf = NULL;
2290 PyFile_SetBufSize(self, bufsize);
2291 goto Done;
2293 Error:
2294 ret = -1;
2295 /* fall through */
2296 Done:
2297 PyMem_Free(name); /* free the encoded string */
2298 return ret;
2301 PyDoc_VAR(file_doc) =
2302 PyDoc_STR(
2303 "file(name[, mode[, buffering]]) -> file object\n"
2304 "\n"
2305 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2306 "writing or appending. The file will be created if it doesn't exist\n"
2307 "when opened for writing or appending; it will be truncated when\n"
2308 "opened for writing. Add a 'b' to the mode for binary files.\n"
2309 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2310 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2311 "buffered, and larger numbers specify the buffer size. The preferred way\n"
2312 "to open a file is with the builtin open() function.\n"
2314 PyDoc_STR(
2315 "Add a 'U' to mode to open the file for input with universal newline\n"
2316 "support. Any line ending in the input file will be seen as a '\\n'\n"
2317 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
2318 "the value for this attribute is one of None (no newline read yet),\n"
2319 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2320 "\n"
2321 "'U' cannot be combined with 'w' or '+' mode.\n"
2324 PyTypeObject PyFile_Type = {
2325 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2326 "file",
2327 sizeof(PyFileObject),
2329 (destructor)file_dealloc, /* tp_dealloc */
2330 0, /* tp_print */
2331 0, /* tp_getattr */
2332 0, /* tp_setattr */
2333 0, /* tp_compare */
2334 (reprfunc)file_repr, /* tp_repr */
2335 0, /* tp_as_number */
2336 0, /* tp_as_sequence */
2337 0, /* tp_as_mapping */
2338 0, /* tp_hash */
2339 0, /* tp_call */
2340 0, /* tp_str */
2341 PyObject_GenericGetAttr, /* tp_getattro */
2342 /* softspace is writable: we must supply tp_setattro */
2343 PyObject_GenericSetAttr, /* tp_setattro */
2344 0, /* tp_as_buffer */
2345 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2346 file_doc, /* tp_doc */
2347 0, /* tp_traverse */
2348 0, /* tp_clear */
2349 0, /* tp_richcompare */
2350 offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
2351 (getiterfunc)file_self, /* tp_iter */
2352 (iternextfunc)file_iternext, /* tp_iternext */
2353 file_methods, /* tp_methods */
2354 file_memberlist, /* tp_members */
2355 file_getsetlist, /* tp_getset */
2356 0, /* tp_base */
2357 0, /* tp_dict */
2358 0, /* tp_descr_get */
2359 0, /* tp_descr_set */
2360 0, /* tp_dictoffset */
2361 file_init, /* tp_init */
2362 PyType_GenericAlloc, /* tp_alloc */
2363 file_new, /* tp_new */
2364 PyObject_Del, /* tp_free */
2367 /* Interface for the 'soft space' between print items. */
2370 PyFile_SoftSpace(PyObject *f, int newflag)
2372 long oldflag = 0;
2373 if (f == NULL) {
2374 /* Do nothing */
2376 else if (PyFile_Check(f)) {
2377 oldflag = ((PyFileObject *)f)->f_softspace;
2378 ((PyFileObject *)f)->f_softspace = newflag;
2380 else {
2381 PyObject *v;
2382 v = PyObject_GetAttrString(f, "softspace");
2383 if (v == NULL)
2384 PyErr_Clear();
2385 else {
2386 if (PyInt_Check(v))
2387 oldflag = PyInt_AsLong(v);
2388 assert(oldflag < INT_MAX);
2389 Py_DECREF(v);
2391 v = PyInt_FromLong((long)newflag);
2392 if (v == NULL)
2393 PyErr_Clear();
2394 else {
2395 if (PyObject_SetAttrString(f, "softspace", v) != 0)
2396 PyErr_Clear();
2397 Py_DECREF(v);
2400 return (int)oldflag;
2403 /* Interfaces to write objects/strings to file-like objects */
2406 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2408 PyObject *writer, *value, *args, *result;
2409 if (f == NULL) {
2410 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2411 return -1;
2413 else if (PyFile_Check(f)) {
2414 PyFileObject *fobj = (PyFileObject *) f;
2415 #ifdef Py_USING_UNICODE
2416 PyObject *enc = fobj->f_encoding;
2417 int result;
2418 #endif
2419 if (fobj->f_fp == NULL) {
2420 err_closed();
2421 return -1;
2423 #ifdef Py_USING_UNICODE
2424 if ((flags & Py_PRINT_RAW) &&
2425 PyUnicode_Check(v) && enc != Py_None) {
2426 char *cenc = PyString_AS_STRING(enc);
2427 char *errors = fobj->f_errors == Py_None ?
2428 "strict" : PyString_AS_STRING(fobj->f_errors);
2429 value = PyUnicode_AsEncodedString(v, cenc, errors);
2430 if (value == NULL)
2431 return -1;
2432 } else {
2433 value = v;
2434 Py_INCREF(value);
2436 result = file_PyObject_Print(value, fobj, flags);
2437 Py_DECREF(value);
2438 return result;
2439 #else
2440 return file_PyObject_Print(v, fobj, flags);
2441 #endif
2443 writer = PyObject_GetAttrString(f, "write");
2444 if (writer == NULL)
2445 return -1;
2446 if (flags & Py_PRINT_RAW) {
2447 if (PyUnicode_Check(v)) {
2448 value = v;
2449 Py_INCREF(value);
2450 } else
2451 value = PyObject_Str(v);
2453 else
2454 value = PyObject_Repr(v);
2455 if (value == NULL) {
2456 Py_DECREF(writer);
2457 return -1;
2459 args = PyTuple_Pack(1, value);
2460 if (args == NULL) {
2461 Py_DECREF(value);
2462 Py_DECREF(writer);
2463 return -1;
2465 result = PyEval_CallObject(writer, args);
2466 Py_DECREF(args);
2467 Py_DECREF(value);
2468 Py_DECREF(writer);
2469 if (result == NULL)
2470 return -1;
2471 Py_DECREF(result);
2472 return 0;
2476 PyFile_WriteString(const char *s, PyObject *f)
2479 if (f == NULL) {
2480 /* Should be caused by a pre-existing error */
2481 if (!PyErr_Occurred())
2482 PyErr_SetString(PyExc_SystemError,
2483 "null file for PyFile_WriteString");
2484 return -1;
2486 else if (PyFile_Check(f)) {
2487 PyFileObject *fobj = (PyFileObject *) f;
2488 FILE *fp = PyFile_AsFile(f);
2489 if (fp == NULL) {
2490 err_closed();
2491 return -1;
2493 FILE_BEGIN_ALLOW_THREADS(fobj)
2494 fputs(s, fp);
2495 FILE_END_ALLOW_THREADS(fobj)
2496 return 0;
2498 else if (!PyErr_Occurred()) {
2499 PyObject *v = PyString_FromString(s);
2500 int err;
2501 if (v == NULL)
2502 return -1;
2503 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2504 Py_DECREF(v);
2505 return err;
2507 else
2508 return -1;
2511 /* Try to get a file-descriptor from a Python object. If the object
2512 is an integer or long integer, its value is returned. If not, the
2513 object's fileno() method is called if it exists; the method must return
2514 an integer or long integer, which is returned as the file descriptor value.
2515 -1 is returned on failure.
2518 int PyObject_AsFileDescriptor(PyObject *o)
2520 int fd;
2521 PyObject *meth;
2523 if (PyInt_Check(o)) {
2524 fd = PyInt_AsLong(o);
2526 else if (PyLong_Check(o)) {
2527 fd = PyLong_AsLong(o);
2529 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2531 PyObject *fno = PyEval_CallObject(meth, NULL);
2532 Py_DECREF(meth);
2533 if (fno == NULL)
2534 return -1;
2536 if (PyInt_Check(fno)) {
2537 fd = PyInt_AsLong(fno);
2538 Py_DECREF(fno);
2540 else if (PyLong_Check(fno)) {
2541 fd = PyLong_AsLong(fno);
2542 Py_DECREF(fno);
2544 else {
2545 PyErr_SetString(PyExc_TypeError,
2546 "fileno() returned a non-integer");
2547 Py_DECREF(fno);
2548 return -1;
2551 else {
2552 PyErr_SetString(PyExc_TypeError,
2553 "argument must be an int, or have a fileno() method.");
2554 return -1;
2557 if (fd < 0) {
2558 PyErr_Format(PyExc_ValueError,
2559 "file descriptor cannot be a negative integer (%i)",
2560 fd);
2561 return -1;
2563 return fd;
2566 /* From here on we need access to the real fgets and fread */
2567 #undef fgets
2568 #undef fread
2571 ** Py_UniversalNewlineFgets is an fgets variation that understands
2572 ** all of \r, \n and \r\n conventions.
2573 ** The stream should be opened in binary mode.
2574 ** If fobj is NULL the routine always does newline conversion, and
2575 ** it may peek one char ahead to gobble the second char in \r\n.
2576 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2577 ** is no readahead but in stead a flag is used to skip a following
2578 ** \n on the next read. Also, if the file is open in binary mode
2579 ** the whole conversion is skipped. Finally, the routine keeps track of
2580 ** the different types of newlines seen.
2581 ** Note that we need no error handling: fgets() treats error and eof
2582 ** identically.
2584 char *
2585 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2587 char *p = buf;
2588 int c;
2589 int newlinetypes = 0;
2590 int skipnextlf = 0;
2591 int univ_newline = 1;
2593 if (fobj) {
2594 if (!PyFile_Check(fobj)) {
2595 errno = ENXIO; /* What can you do... */
2596 return NULL;
2598 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2599 if ( !univ_newline )
2600 return fgets(buf, n, stream);
2601 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2602 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2604 FLOCKFILE(stream);
2605 c = 'x'; /* Shut up gcc warning */
2606 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2607 if (skipnextlf ) {
2608 skipnextlf = 0;
2609 if (c == '\n') {
2610 /* Seeing a \n here with skipnextlf true
2611 ** means we saw a \r before.
2613 newlinetypes |= NEWLINE_CRLF;
2614 c = GETC(stream);
2615 if (c == EOF) break;
2616 } else {
2618 ** Note that c == EOF also brings us here,
2619 ** so we're okay if the last char in the file
2620 ** is a CR.
2622 newlinetypes |= NEWLINE_CR;
2625 if (c == '\r') {
2626 /* A \r is translated into a \n, and we skip
2627 ** an adjacent \n, if any. We don't set the
2628 ** newlinetypes flag until we've seen the next char.
2630 skipnextlf = 1;
2631 c = '\n';
2632 } else if ( c == '\n') {
2633 newlinetypes |= NEWLINE_LF;
2635 *p++ = c;
2636 if (c == '\n') break;
2638 if ( c == EOF && skipnextlf )
2639 newlinetypes |= NEWLINE_CR;
2640 FUNLOCKFILE(stream);
2641 *p = '\0';
2642 if (fobj) {
2643 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2644 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2645 } else if ( skipnextlf ) {
2646 /* If we have no file object we cannot save the
2647 ** skipnextlf flag. We have to readahead, which
2648 ** will cause a pause if we're reading from an
2649 ** interactive stream, but that is very unlikely
2650 ** unless we're doing something silly like
2651 ** execfile("/dev/tty").
2653 c = GETC(stream);
2654 if ( c != '\n' )
2655 ungetc(c, stream);
2657 if (p == buf)
2658 return NULL;
2659 return buf;
2663 ** Py_UniversalNewlineFread is an fread variation that understands
2664 ** all of \r, \n and \r\n conventions.
2665 ** The stream should be opened in binary mode.
2666 ** fobj must be a PyFileObject. In this case there
2667 ** is no readahead but in stead a flag is used to skip a following
2668 ** \n on the next read. Also, if the file is open in binary mode
2669 ** the whole conversion is skipped. Finally, the routine keeps track of
2670 ** the different types of newlines seen.
2672 size_t
2673 Py_UniversalNewlineFread(char *buf, size_t n,
2674 FILE *stream, PyObject *fobj)
2676 char *dst = buf;
2677 PyFileObject *f = (PyFileObject *)fobj;
2678 int newlinetypes, skipnextlf;
2680 assert(buf != NULL);
2681 assert(stream != NULL);
2683 if (!fobj || !PyFile_Check(fobj)) {
2684 errno = ENXIO; /* What can you do... */
2685 return 0;
2687 if (!f->f_univ_newline)
2688 return fread(buf, 1, n, stream);
2689 newlinetypes = f->f_newlinetypes;
2690 skipnextlf = f->f_skipnextlf;
2691 /* Invariant: n is the number of bytes remaining to be filled
2692 * in the buffer.
2694 while (n) {
2695 size_t nread;
2696 int shortread;
2697 char *src = dst;
2699 nread = fread(dst, 1, n, stream);
2700 assert(nread <= n);
2701 if (nread == 0)
2702 break;
2704 n -= nread; /* assuming 1 byte out for each in; will adjust */
2705 shortread = n != 0; /* true iff EOF or error */
2706 while (nread--) {
2707 char c = *src++;
2708 if (c == '\r') {
2709 /* Save as LF and set flag to skip next LF. */
2710 *dst++ = '\n';
2711 skipnextlf = 1;
2713 else if (skipnextlf && c == '\n') {
2714 /* Skip LF, and remember we saw CR LF. */
2715 skipnextlf = 0;
2716 newlinetypes |= NEWLINE_CRLF;
2717 ++n;
2719 else {
2720 /* Normal char to be stored in buffer. Also
2721 * update the newlinetypes flag if either this
2722 * is an LF or the previous char was a CR.
2724 if (c == '\n')
2725 newlinetypes |= NEWLINE_LF;
2726 else if (skipnextlf)
2727 newlinetypes |= NEWLINE_CR;
2728 *dst++ = c;
2729 skipnextlf = 0;
2732 if (shortread) {
2733 /* If this is EOF, update type flags. */
2734 if (skipnextlf && feof(stream))
2735 newlinetypes |= NEWLINE_CR;
2736 break;
2739 f->f_newlinetypes = newlinetypes;
2740 f->f_skipnextlf = skipnextlf;
2741 return dst - buf;
2744 #ifdef __cplusplus
2746 #endif