1 /* File object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
7 #ifndef DONT_HAVE_SYS_TYPES_H
9 #endif /* DONT_HAVE_SYS_TYPES_H */
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
20 /* Need GetVersion to see if on NT so safe to use _wfopen */
21 #define WIN32_LEAN_AND_MEAN
25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
31 #ifndef DONT_HAVE_ERRNO_H
35 #ifdef HAVE_GETC_UNLOCKED
36 #define GETC(f) getc_unlocked(f)
37 #define FLOCKFILE(f) flockfile(f)
38 #define FUNLOCKFILE(f) funlockfile(f)
40 #define GETC(f) getc(f)
42 #define FUNLOCKFILE(f)
45 /* Bits in f_newlinetypes */
46 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47 #define NEWLINE_CR 1 /* \r newline seen */
48 #define NEWLINE_LF 2 /* \n newline seen */
49 #define NEWLINE_CRLF 4 /* \r\n newline seen */
52 PyFile_AsFile(PyObject
*f
)
54 if (f
== NULL
|| !PyFile_Check(f
))
57 return ((PyFileObject
*)f
)->f_fp
;
61 PyFile_Name(PyObject
*f
)
63 if (f
== NULL
|| !PyFile_Check(f
))
66 return ((PyFileObject
*)f
)->f_name
;
69 /* On Unix, fopen will succeed for directories.
70 In Python, there should be no file objects referring to
71 directories, so we need a check. */
74 dircheck(PyFileObject
* f
)
76 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
80 if (fstat(fileno(f
->f_fp
), &buf
) == 0 &&
81 S_ISDIR(buf
.st_mode
)) {
83 char *msg
= strerror(EISDIR
);
85 char *msg
= "Is a directory";
87 PyObject
*exc
= PyObject_CallFunction(PyExc_IOError
, "(is)",
89 PyErr_SetObject(PyExc_IOError
, exc
);
99 fill_file_fields(PyFileObject
*f
, FILE *fp
, PyObject
*name
, char *mode
,
100 int (*close
)(FILE *))
103 assert(PyFile_Check(f
));
104 assert(f
->f_fp
== NULL
);
106 Py_DECREF(f
->f_name
);
107 Py_DECREF(f
->f_mode
);
108 Py_DECREF(f
->f_encoding
);
113 f
->f_mode
= PyString_FromString(mode
);
117 f
->f_binary
= strchr(mode
,'b') != NULL
;
119 f
->f_univ_newline
= (strchr(mode
, 'U') != NULL
);
120 f
->f_newlinetypes
= NEWLINE_UNKNOWN
;
123 f
->f_encoding
= Py_None
;
125 if (f
->f_name
== NULL
|| f
->f_mode
== NULL
)
129 return (PyObject
*) f
;
132 /* check for known incorrect mode strings - problem is, platforms are
133 free to accept any mode characters they like and are supposed to
134 ignore stuff they don't understand... write or append mode with
135 universal newline support is expressly forbidden by PEP 278. */
136 /* zero return is kewl - one is un-kewl */
138 check_the_mode(char *mode
)
140 size_t len
= strlen(mode
);
144 PyErr_SetString(PyExc_ValueError
, "empty mode string");
152 if (mode
[1] == 'U') {
153 PyErr_SetString(PyExc_ValueError
,
154 "invalid mode string");
161 /* reject w+U, a+U, wU+, aU+ */
166 if ((mode
[1] == '+' && mode
[2] == 'U') ||
167 (mode
[1] == 'U' && mode
[2] == '+')) {
168 PyErr_SetString(PyExc_ValueError
,
169 "invalid mode string");
181 open_the_file(PyFileObject
*f
, char *name
, char *mode
)
184 assert(PyFile_Check(f
));
186 /* windows ignores the passed name in order to support Unicode */
187 assert(f
->f_name
!= NULL
);
189 assert(name
!= NULL
);
191 assert(mode
!= NULL
);
192 assert(f
->f_fp
== NULL
);
194 if (check_the_mode(mode
))
197 /* rexec.py can't stop a user from getting the file() constructor --
198 all they have to do is get *any* file object f, and then do
199 type(f). Here we prevent them from doing damage with it. */
200 if (PyEval_GetRestricted()) {
201 PyErr_SetString(PyExc_IOError
,
202 "file() constructor not accessible in restricted mode");
207 if (strcmp(mode
, "U") == 0 || strcmp(mode
, "rU") == 0)
210 if (PyUnicode_Check(f
->f_name
)) {
212 wmode
= PyUnicode_DecodeASCII(mode
, strlen(mode
), NULL
);
213 if (f
->f_name
&& wmode
) {
214 Py_BEGIN_ALLOW_THREADS
215 /* PyUnicode_AS_UNICODE OK without thread
216 lock as it is a simple dereference. */
217 f
->f_fp
= _wfopen(PyUnicode_AS_UNICODE(f
->f_name
),
218 PyUnicode_AS_UNICODE(wmode
));
224 if (NULL
== f
->f_fp
&& NULL
!= name
) {
225 Py_BEGIN_ALLOW_THREADS
226 f
->f_fp
= fopen(name
, mode
);
230 if (f
->f_fp
== NULL
) {
232 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
233 * across all Windows flavors. When it sets EINVAL varies
234 * across Windows flavors, the exact conditions aren't
235 * documented, and the answer lies in the OS's implementation
236 * of Win32's CreateFile function (whose source is secret).
237 * Seems the best we can do is map EINVAL to ENOENT.
239 if (errno
== 0) /* bad mode string */
241 else if (errno
== EINVAL
) /* unknown, but not a mode string */
245 PyErr_Format(PyExc_IOError
, "invalid mode: %s",
248 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError
, f
->f_name
);
253 return (PyObject
*)f
;
257 PyFile_FromFile(FILE *fp
, char *name
, char *mode
, int (*close
)(FILE *))
259 PyFileObject
*f
= (PyFileObject
*)PyFile_Type
.tp_new(&PyFile_Type
,
262 PyObject
*o_name
= PyString_FromString(name
);
263 if (fill_file_fields(f
, fp
, o_name
, mode
, close
) == NULL
) {
269 return (PyObject
*) f
;
273 PyFile_FromString(char *name
, char *mode
)
275 extern int fclose(FILE *);
278 f
= (PyFileObject
*)PyFile_FromFile((FILE *)NULL
, name
, mode
, fclose
);
280 if (open_the_file(f
, name
, mode
) == NULL
) {
285 return (PyObject
*)f
;
289 PyFile_SetBufSize(PyObject
*f
, int bufsize
)
291 PyFileObject
*file
= (PyFileObject
*)f
;
312 if (type
== _IONBF
) {
313 PyMem_Free(file
->f_setbuf
);
314 file
->f_setbuf
= NULL
;
316 file
->f_setbuf
= PyMem_Realloc(file
->f_setbuf
, bufsize
);
319 setvbuf(file
->f_fp
, file
->f_setbuf
, type
, bufsize
);
320 #else /* !HAVE_SETVBUF */
321 setbuf(file
->f_fp
, file
->f_setbuf
);
322 #endif /* !HAVE_SETVBUF */
326 /* Set the encoding used to output Unicode strings.
327 Returh 1 on success, 0 on failure. */
330 PyFile_SetEncoding(PyObject
*f
, const char *enc
)
332 PyFileObject
*file
= (PyFileObject
*)f
;
333 PyObject
*str
= PyString_FromString(enc
);
336 Py_DECREF(file
->f_encoding
);
337 file
->f_encoding
= str
;
344 PyErr_SetString(PyExc_ValueError
, "I/O operation on closed file");
348 /* Refuse regular file I/O if there's data in the iteration-buffer.
349 * Mixing them would cause data to arrive out of order, as the read*
350 * methods don't use the iteration buffer. */
352 err_iterbuffered(void)
354 PyErr_SetString(PyExc_ValueError
,
355 "Mixing iteration and read methods would lose data");
359 static void drop_readahead(PyFileObject
*);
364 file_dealloc(PyFileObject
*f
)
367 if (f
->weakreflist
!= NULL
)
368 PyObject_ClearWeakRefs((PyObject
*) f
);
369 if (f
->f_fp
!= NULL
&& f
->f_close
!= NULL
) {
370 Py_BEGIN_ALLOW_THREADS
371 sts
= (*f
->f_close
)(f
->f_fp
);
375 PySys_WriteStderr("close failed: [Errno %d] %s\n", errno
, strerror(errno
));
377 PySys_WriteStderr("close failed: [Errno %d]\n", errno
);
380 PyMem_Free(f
->f_setbuf
);
381 Py_XDECREF(f
->f_name
);
382 Py_XDECREF(f
->f_mode
);
383 Py_XDECREF(f
->f_encoding
);
385 f
->ob_type
->tp_free((PyObject
*)f
);
389 file_repr(PyFileObject
*f
)
391 if (PyUnicode_Check(f
->f_name
)) {
392 #ifdef Py_USING_UNICODE
393 PyObject
*ret
= NULL
;
395 name
= PyUnicode_AsUnicodeEscapeString(f
->f_name
);
396 ret
= PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
397 f
->f_fp
== NULL
? "closed" : "open",
398 PyString_AsString(name
),
399 PyString_AsString(f
->f_mode
),
405 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
406 f
->f_fp
== NULL
? "closed" : "open",
407 PyString_AsString(f
->f_name
),
408 PyString_AsString(f
->f_mode
),
414 file_close(PyFileObject
*f
)
417 if (f
->f_fp
!= NULL
) {
418 if (f
->f_close
!= NULL
) {
419 Py_BEGIN_ALLOW_THREADS
421 sts
= (*f
->f_close
)(f
->f_fp
);
426 PyMem_Free(f
->f_setbuf
);
429 return PyErr_SetFromErrno(PyExc_IOError
);
431 return PyInt_FromLong((long)sts
);
437 /* Our very own off_t-like type, 64-bit if possible */
438 #if !defined(HAVE_LARGEFILE_SUPPORT)
439 typedef off_t Py_off_t
;
440 #elif SIZEOF_OFF_T >= 8
441 typedef off_t Py_off_t
;
442 #elif SIZEOF_FPOS_T >= 8
443 typedef fpos_t Py_off_t
;
445 #error "Large file support, but neither off_t nor fpos_t is large enough."
449 /* a portable fseek() function
450 return 0 on success, non-zero on failure (with errno set) */
452 _portable_fseek(FILE *fp
, Py_off_t offset
, int whence
)
454 #if !defined(HAVE_LARGEFILE_SUPPORT)
455 return fseek(fp
, offset
, whence
);
456 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
457 return fseeko(fp
, offset
, whence
);
458 #elif defined(HAVE_FSEEK64)
459 return fseek64(fp
, offset
, whence
);
460 #elif defined(__BEOS__)
461 return _fseek(fp
, offset
, whence
);
462 #elif SIZEOF_FPOS_T >= 8
463 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
464 and fgetpos() to implement fseek()*/
470 if (_lseeki64(fileno(fp
), 0, 2) == -1)
473 if (fseek(fp
, 0, SEEK_END
) != 0)
478 if (fgetpos(fp
, &pos
) != 0)
482 /* case SEEK_SET: break; */
484 return fsetpos(fp
, &offset
);
486 #error "Large file support, but no way to fseek."
491 /* a portable ftell() function
492 Return -1 on failure with errno set appropriately, current file
493 position on success */
495 _portable_ftell(FILE* fp
)
497 #if !defined(HAVE_LARGEFILE_SUPPORT)
499 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
501 #elif defined(HAVE_FTELL64)
503 #elif SIZEOF_FPOS_T >= 8
505 if (fgetpos(fp
, &pos
) != 0)
509 #error "Large file support, but no way to ftell."
515 file_seek(PyFileObject
*f
, PyObject
*args
)
526 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &whence
))
528 #if !defined(HAVE_LARGEFILE_SUPPORT)
529 offset
= PyInt_AsLong(offobj
);
531 offset
= PyLong_Check(offobj
) ?
532 PyLong_AsLongLong(offobj
) : PyInt_AsLong(offobj
);
534 if (PyErr_Occurred())
537 Py_BEGIN_ALLOW_THREADS
539 ret
= _portable_fseek(f
->f_fp
, offset
, whence
);
543 PyErr_SetFromErrno(PyExc_IOError
);
553 #ifdef HAVE_FTRUNCATE
555 file_truncate(PyFileObject
*f
, PyObject
*args
)
558 PyObject
*newsizeobj
= NULL
;
564 if (!PyArg_UnpackTuple(args
, "truncate", 0, 1, &newsizeobj
))
567 /* Get current file position. If the file happens to be open for
568 * update and the last operation was an input operation, C doesn't
569 * define what the later fflush() will do, but we promise truncate()
570 * won't change the current position (and fflush() *does* change it
571 * then at least on Windows). The easiest thing is to capture
572 * current pos now and seek back to it at the end.
574 Py_BEGIN_ALLOW_THREADS
576 initialpos
= _portable_ftell(f
->f_fp
);
578 if (initialpos
== -1)
581 /* Set newsize to current postion if newsizeobj NULL, else to the
584 if (newsizeobj
!= NULL
) {
585 #if !defined(HAVE_LARGEFILE_SUPPORT)
586 newsize
= PyInt_AsLong(newsizeobj
);
588 newsize
= PyLong_Check(newsizeobj
) ?
589 PyLong_AsLongLong(newsizeobj
) :
590 PyInt_AsLong(newsizeobj
);
592 if (PyErr_Occurred())
595 else /* default to current position */
596 newsize
= initialpos
;
598 /* Flush the stream. We're mixing stream-level I/O with lower-level
599 * I/O, and a flush may be necessary to synch both platform views
600 * of the current file state.
602 Py_BEGIN_ALLOW_THREADS
604 ret
= fflush(f
->f_fp
);
610 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
611 so don't even try using it. */
615 /* Have to move current pos to desired endpoint on Windows. */
616 Py_BEGIN_ALLOW_THREADS
618 ret
= _portable_fseek(f
->f_fp
, newsize
, SEEK_SET
) != 0;
623 /* Truncate. Note that this may grow the file! */
624 Py_BEGIN_ALLOW_THREADS
626 hFile
= (HANDLE
)_get_osfhandle(fileno(f
->f_fp
));
627 ret
= hFile
== (HANDLE
)-1;
629 ret
= SetEndOfFile(hFile
) == 0;
638 Py_BEGIN_ALLOW_THREADS
640 ret
= ftruncate(fileno(f
->f_fp
), newsize
);
644 #endif /* !MS_WINDOWS */
646 /* Restore original file position. */
647 Py_BEGIN_ALLOW_THREADS
649 ret
= _portable_fseek(f
->f_fp
, initialpos
, SEEK_SET
) != 0;
658 PyErr_SetFromErrno(PyExc_IOError
);
662 #endif /* HAVE_FTRUNCATE */
665 file_tell(PyFileObject
*f
)
671 Py_BEGIN_ALLOW_THREADS
673 pos
= _portable_ftell(f
->f_fp
);
676 PyErr_SetFromErrno(PyExc_IOError
);
680 if (f
->f_skipnextlf
) {
686 } else if (c
!= EOF
) ungetc(c
, f
->f_fp
);
688 #if !defined(HAVE_LARGEFILE_SUPPORT)
689 return PyInt_FromLong(pos
);
691 return PyLong_FromLongLong(pos
);
696 file_fileno(PyFileObject
*f
)
700 return PyInt_FromLong((long) fileno(f
->f_fp
));
704 file_flush(PyFileObject
*f
)
710 Py_BEGIN_ALLOW_THREADS
712 res
= fflush(f
->f_fp
);
715 PyErr_SetFromErrno(PyExc_IOError
);
724 file_isatty(PyFileObject
*f
)
729 Py_BEGIN_ALLOW_THREADS
730 res
= isatty((int)fileno(f
->f_fp
));
732 return PyBool_FromLong(res
);
737 #define SMALLCHUNK 8192
739 #define SMALLCHUNK BUFSIZ
743 #define BIGCHUNK (512 * 32)
745 #define BIGCHUNK (512 * 1024)
749 new_buffersize(PyFileObject
*f
, size_t currentsize
)
754 if (fstat(fileno(f
->f_fp
), &st
) == 0) {
756 /* The following is not a bug: we really need to call lseek()
757 *and* ftell(). The reason is that some stdio libraries
758 mistakenly flush their buffer when ftell() is called and
759 the lseek() call it makes fails, thereby throwing away
760 data that cannot be recovered in any way. To avoid this,
761 we first test lseek(), and only call ftell() if lseek()
762 works. We can't use the lseek() value either, because we
763 need to take the amount of buffered data into account.
764 (Yet another reason why stdio stinks. :-) */
765 pos
= lseek(fileno(f
->f_fp
), 0L, SEEK_CUR
);
767 pos
= ftell(f
->f_fp
);
771 if (end
> pos
&& pos
>= 0)
772 return currentsize
+ end
- pos
+ 1;
773 /* Add 1 so if the file were to grow we'd notice. */
776 if (currentsize
> SMALLCHUNK
) {
777 /* Keep doubling until we reach BIGCHUNK;
778 then keep adding BIGCHUNK. */
779 if (currentsize
<= BIGCHUNK
)
780 return currentsize
+ currentsize
;
782 return currentsize
+ BIGCHUNK
;
784 return currentsize
+ SMALLCHUNK
;
787 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
788 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
791 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
794 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
796 #define BLOCKED_ERRNO(x) 0
802 file_read(PyFileObject
*f
, PyObject
*args
)
804 long bytesrequested
= -1;
805 size_t bytesread
, buffersize
, chunksize
;
810 /* refuse to mix with f.next() */
811 if (f
->f_buf
!= NULL
&&
812 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
814 return err_iterbuffered();
815 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
817 if (bytesrequested
< 0)
818 buffersize
= new_buffersize(f
, (size_t)0);
820 buffersize
= bytesrequested
;
821 if (buffersize
> INT_MAX
) {
822 PyErr_SetString(PyExc_OverflowError
,
823 "requested number of bytes is more than a Python string can hold");
826 v
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
831 Py_BEGIN_ALLOW_THREADS
833 chunksize
= Py_UniversalNewlineFread(BUF(v
) + bytesread
,
834 buffersize
- bytesread
, f
->f_fp
, (PyObject
*)f
);
836 if (chunksize
== 0) {
837 if (!ferror(f
->f_fp
))
840 /* When in non-blocking mode, data shouldn't
841 * be discarded if a blocking signal was
842 * received. That will also happen if
843 * chunksize != 0, but bytesread < buffersize. */
844 if (bytesread
> 0 && BLOCKED_ERRNO(errno
))
846 PyErr_SetFromErrno(PyExc_IOError
);
850 bytesread
+= chunksize
;
851 if (bytesread
< buffersize
) {
855 if (bytesrequested
< 0) {
856 buffersize
= new_buffersize(f
, buffersize
);
857 if (_PyString_Resize(&v
, buffersize
) < 0)
860 /* Got what was requested. */
864 if (bytesread
!= buffersize
)
865 _PyString_Resize(&v
, bytesread
);
870 file_readinto(PyFileObject
*f
, PyObject
*args
)
874 Py_ssize_t ndone
, nnow
;
878 /* refuse to mix with f.next() */
879 if (f
->f_buf
!= NULL
&&
880 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
882 return err_iterbuffered();
883 if (!PyArg_ParseTuple(args
, "w#", &ptr
, &ntodo
))
887 Py_BEGIN_ALLOW_THREADS
889 nnow
= Py_UniversalNewlineFread(ptr
+ndone
, ntodo
, f
->f_fp
,
893 if (!ferror(f
->f_fp
))
895 PyErr_SetFromErrno(PyExc_IOError
);
902 return PyInt_FromLong((long)ndone
);
905 /**************************************************************************
906 Routine to get next line using platform fgets().
910 + MS threadsafe getc is very slow (multiple layers of function calls before+
911 after each character, to lock+unlock the stream).
912 + The stream-locking functions are MS-internal -- can't access them from user
914 + There's nothing Tim could find in the MS C or platform SDK libraries that
915 can worm around this.
916 + MS fgets locks/unlocks only once per line; it's the only hook we have.
918 So we use fgets for speed(!), despite that it's painful.
920 MS realloc is also slow.
922 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
926 Tru64 Unix getline_via_fgets significantly faster
928 CAUTION: The C std isn't clear about this: in those cases where fgets
929 writes something into the buffer, can it write into any position beyond the
930 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
931 known on which it does; and it would be a strange way to code fgets. Still,
932 getline_via_fgets may not work correctly if it does. The std test
933 test_bufio.py should fail if platform fgets() routinely writes beyond the
934 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
935 **************************************************************************/
937 /* Use this routine if told to, or by default on non-get_unlocked()
938 * platforms unless told not to. Yikes! Let's spell that out:
939 * On a platform with getc_unlocked():
940 * By default, use getc_unlocked().
941 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
942 * On a platform without getc_unlocked():
943 * By default, use fgets().
944 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
946 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
947 #define USE_FGETS_IN_GETLINE
950 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
951 #undef USE_FGETS_IN_GETLINE
954 #ifdef USE_FGETS_IN_GETLINE
956 getline_via_fgets(FILE *fp
)
958 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
959 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
960 * to fill this much of the buffer with a known value in order to figure out
961 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
962 * than "most" lines, we waste time filling unused buffer slots. 100 is
963 * surely adequate for most peoples' email archives, chewing over source code,
964 * etc -- "regular old text files".
965 * MAXBUFSIZE is the maximum line length that lets us get away with the less
966 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
967 * cautions about boosting that. 300 was chosen because the worst real-life
968 * text-crunching job reported on Python-Dev was a mail-log crawler where over
969 * half the lines were 254 chars.
971 #define INITBUFSIZE 100
972 #define MAXBUFSIZE 300
974 char buf
[MAXBUFSIZE
];
975 PyObject
* v
; /* the string object result */
976 char* pvfree
; /* address of next free slot */
977 char* pvend
; /* address one beyond last free slot */
978 size_t nfree
; /* # of free buffer slots; pvend-pvfree */
979 size_t total_v_size
; /* total # of slots in buffer */
980 size_t increment
; /* amount to increment the buffer */
982 /* Optimize for normal case: avoid _PyString_Resize if at all
983 * possible via first reading into stack buffer "buf".
985 total_v_size
= INITBUFSIZE
; /* start small and pray */
988 Py_BEGIN_ALLOW_THREADS
989 pvend
= buf
+ total_v_size
;
990 nfree
= pvend
- pvfree
;
991 memset(pvfree
, '\n', nfree
);
992 assert(nfree
< INT_MAX
); /* Should be atmost MAXBUFSIZE */
993 p
= fgets(pvfree
, (int)nfree
, fp
);
998 if (PyErr_CheckSignals())
1000 v
= PyString_FromStringAndSize(buf
, pvfree
- buf
);
1003 /* fgets read *something* */
1004 p
= memchr(pvfree
, '\n', nfree
);
1006 /* Did the \n come from fgets or from us?
1007 * Since fgets stops at the first \n, and then writes
1008 * \0, if it's from fgets a \0 must be next. But if
1009 * that's so, it could not have come from us, since
1010 * the \n's we filled the buffer with have only more
1011 * \n's to the right.
1013 if (p
+1 < pvend
&& *(p
+1) == '\0') {
1014 /* It's from fgets: we win! In particular,
1015 * we haven't done any mallocs yet, and can
1016 * build the final result on the first try.
1018 ++p
; /* include \n from fgets */
1021 /* Must be from us: fgets didn't fill the
1022 * buffer and didn't find a newline, so it
1023 * must be the last and newline-free line of
1026 assert(p
> pvfree
&& *(p
-1) == '\0');
1027 --p
; /* don't include \0 from fgets */
1029 v
= PyString_FromStringAndSize(buf
, p
- buf
);
1032 /* yuck: fgets overwrote all the newlines, i.e. the entire
1033 * buffer. So this line isn't over yet, or maybe it is but
1034 * we're exactly at EOF. If we haven't already, try using the
1035 * rest of the stack buffer.
1037 assert(*(pvend
-1) == '\0');
1038 if (pvfree
== buf
) {
1039 pvfree
= pvend
- 1; /* overwrite trailing null */
1040 total_v_size
= MAXBUFSIZE
;
1046 /* The stack buffer isn't big enough; malloc a string object and read
1049 total_v_size
= MAXBUFSIZE
<< 1;
1050 v
= PyString_FromStringAndSize((char*)NULL
, (int)total_v_size
);
1053 /* copy over everything except the last null byte */
1054 memcpy(BUF(v
), buf
, MAXBUFSIZE
-1);
1055 pvfree
= BUF(v
) + MAXBUFSIZE
- 1;
1057 /* Keep reading stuff into v; if it ever ends successfully, break
1058 * after setting p one beyond the end of the line. The code here is
1059 * very much like the code above, except reads into v's buffer; see
1060 * the code above for detailed comments about the logic.
1063 Py_BEGIN_ALLOW_THREADS
1064 pvend
= BUF(v
) + total_v_size
;
1065 nfree
= pvend
- pvfree
;
1066 memset(pvfree
, '\n', nfree
);
1067 assert(nfree
< INT_MAX
);
1068 p
= fgets(pvfree
, (int)nfree
, fp
);
1069 Py_END_ALLOW_THREADS
1073 if (PyErr_CheckSignals()) {
1080 p
= memchr(pvfree
, '\n', nfree
);
1082 if (p
+1 < pvend
&& *(p
+1) == '\0') {
1083 /* \n came from fgets */
1087 /* \n came from us; last line of file, no newline */
1088 assert(p
> pvfree
&& *(p
-1) == '\0');
1092 /* expand buffer and try again */
1093 assert(*(pvend
-1) == '\0');
1094 increment
= total_v_size
>> 2; /* mild exponential growth */
1095 total_v_size
+= increment
;
1096 if (total_v_size
> INT_MAX
) {
1097 PyErr_SetString(PyExc_OverflowError
,
1098 "line is longer than a Python string can hold");
1102 if (_PyString_Resize(&v
, (int)total_v_size
) < 0)
1104 /* overwrite the trailing null byte */
1105 pvfree
= BUF(v
) + (total_v_size
- increment
- 1);
1107 if (BUF(v
) + total_v_size
!= p
)
1108 _PyString_Resize(&v
, p
- BUF(v
));
1113 #endif /* ifdef USE_FGETS_IN_GETLINE */
1115 /* Internal routine to get a line.
1116 Size argument interpretation:
1118 <= 0: read arbitrary line
1122 get_line(PyFileObject
*f
, int n
)
1127 size_t total_v_size
; /* total # of slots in buffer */
1128 size_t used_v_size
; /* # used slots in buffer */
1129 size_t increment
; /* amount to increment the buffer */
1131 int newlinetypes
= f
->f_newlinetypes
;
1132 int skipnextlf
= f
->f_skipnextlf
;
1133 int univ_newline
= f
->f_univ_newline
;
1135 #if defined(USE_FGETS_IN_GETLINE)
1136 if (n
<= 0 && !univ_newline
)
1137 return getline_via_fgets(fp
);
1139 total_v_size
= n
> 0 ? n
: 100;
1140 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
1144 end
= buf
+ total_v_size
;
1147 Py_BEGIN_ALLOW_THREADS
1150 c
= 'x'; /* Shut up gcc warning */
1151 while ( buf
!= end
&& (c
= GETC(fp
)) != EOF
) {
1155 /* Seeing a \n here with
1156 * skipnextlf true means we
1159 newlinetypes
|= NEWLINE_CRLF
;
1161 if (c
== EOF
) break;
1163 newlinetypes
|= NEWLINE_CR
;
1169 } else if ( c
== '\n')
1170 newlinetypes
|= NEWLINE_LF
;
1172 if (c
== '\n') break;
1174 if ( c
== EOF
&& skipnextlf
)
1175 newlinetypes
|= NEWLINE_CR
;
1176 } else /* If not universal newlines use the normal loop */
1177 while ((c
= GETC(fp
)) != EOF
&&
1178 (*buf
++ = c
) != '\n' &&
1182 Py_END_ALLOW_THREADS
1183 f
->f_newlinetypes
= newlinetypes
;
1184 f
->f_skipnextlf
= skipnextlf
;
1189 PyErr_SetFromErrno(PyExc_IOError
);
1195 if (PyErr_CheckSignals()) {
1201 /* Must be because buf == end */
1204 used_v_size
= total_v_size
;
1205 increment
= total_v_size
>> 2; /* mild exponential growth */
1206 total_v_size
+= increment
;
1207 if (total_v_size
> INT_MAX
) {
1208 PyErr_SetString(PyExc_OverflowError
,
1209 "line is longer than a Python string can hold");
1213 if (_PyString_Resize(&v
, total_v_size
) < 0)
1215 buf
= BUF(v
) + used_v_size
;
1216 end
= BUF(v
) + total_v_size
;
1219 used_v_size
= buf
- BUF(v
);
1220 if (used_v_size
!= total_v_size
)
1221 _PyString_Resize(&v
, used_v_size
);
1225 /* External C interface */
1228 PyFile_GetLine(PyObject
*f
, int n
)
1233 PyErr_BadInternalCall();
1237 if (PyFile_Check(f
)) {
1238 PyFileObject
*fo
= (PyFileObject
*)f
;
1239 if (fo
->f_fp
== NULL
)
1240 return err_closed();
1241 /* refuse to mix with f.next() */
1242 if (fo
->f_buf
!= NULL
&&
1243 (fo
->f_bufend
- fo
->f_bufptr
) > 0 &&
1244 fo
->f_buf
[0] != '\0')
1245 return err_iterbuffered();
1246 result
= get_line(fo
, n
);
1252 reader
= PyObject_GetAttrString(f
, "readline");
1256 args
= PyTuple_New(0);
1258 args
= Py_BuildValue("(i)", n
);
1263 result
= PyEval_CallObject(reader
, args
);
1266 if (result
!= NULL
&& !PyString_Check(result
) &&
1267 !PyUnicode_Check(result
)) {
1270 PyErr_SetString(PyExc_TypeError
,
1271 "object.readline() returned non-string");
1275 if (n
< 0 && result
!= NULL
&& PyString_Check(result
)) {
1276 char *s
= PyString_AS_STRING(result
);
1277 Py_ssize_t len
= PyString_GET_SIZE(result
);
1281 PyErr_SetString(PyExc_EOFError
,
1282 "EOF when reading a line");
1284 else if (s
[len
-1] == '\n') {
1285 if (result
->ob_refcnt
== 1)
1286 _PyString_Resize(&result
, len
-1);
1289 v
= PyString_FromStringAndSize(s
, len
-1);
1295 #ifdef Py_USING_UNICODE
1296 if (n
< 0 && result
!= NULL
&& PyUnicode_Check(result
)) {
1297 Py_UNICODE
*s
= PyUnicode_AS_UNICODE(result
);
1298 Py_ssize_t len
= PyUnicode_GET_SIZE(result
);
1302 PyErr_SetString(PyExc_EOFError
,
1303 "EOF when reading a line");
1305 else if (s
[len
-1] == '\n') {
1306 if (result
->ob_refcnt
== 1)
1307 PyUnicode_Resize(&result
, len
-1);
1310 v
= PyUnicode_FromUnicode(s
, len
-1);
1323 file_readline(PyFileObject
*f
, PyObject
*args
)
1327 if (f
->f_fp
== NULL
)
1328 return err_closed();
1329 /* refuse to mix with f.next() */
1330 if (f
->f_buf
!= NULL
&&
1331 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
1332 f
->f_buf
[0] != '\0')
1333 return err_iterbuffered();
1334 if (!PyArg_ParseTuple(args
, "|i:readline", &n
))
1337 return PyString_FromString("");
1340 return get_line(f
, n
);
1344 file_readlines(PyFileObject
*f
, PyObject
*args
)
1349 char small_buffer
[SMALLCHUNK
];
1350 char *buffer
= small_buffer
;
1351 size_t buffersize
= SMALLCHUNK
;
1352 PyObject
*big_buffer
= NULL
;
1355 size_t totalread
= 0;
1360 if (f
->f_fp
== NULL
)
1361 return err_closed();
1362 /* refuse to mix with f.next() */
1363 if (f
->f_buf
!= NULL
&&
1364 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
1365 f
->f_buf
[0] != '\0')
1366 return err_iterbuffered();
1367 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
1369 if ((list
= PyList_New(0)) == NULL
)
1375 Py_BEGIN_ALLOW_THREADS
1377 nread
= Py_UniversalNewlineFread(buffer
+nfilled
,
1378 buffersize
-nfilled
, f
->f_fp
, (PyObject
*)f
);
1379 Py_END_ALLOW_THREADS
1380 shortread
= (nread
< buffersize
-nfilled
);
1384 if (!ferror(f
->f_fp
))
1386 PyErr_SetFromErrno(PyExc_IOError
);
1394 p
= memchr(buffer
+nfilled
, '\n', nread
);
1396 /* Need a larger buffer to fit this line */
1399 if (buffersize
> INT_MAX
) {
1400 PyErr_SetString(PyExc_OverflowError
,
1401 "line is longer than a Python string can hold");
1404 if (big_buffer
== NULL
) {
1405 /* Create the big buffer */
1406 big_buffer
= PyString_FromStringAndSize(
1408 if (big_buffer
== NULL
)
1410 buffer
= PyString_AS_STRING(big_buffer
);
1411 memcpy(buffer
, small_buffer
, nfilled
);
1414 /* Grow the big buffer */
1415 if ( _PyString_Resize(&big_buffer
, buffersize
) < 0 )
1417 buffer
= PyString_AS_STRING(big_buffer
);
1421 end
= buffer
+nfilled
+nread
;
1424 /* Process complete lines */
1426 line
= PyString_FromStringAndSize(q
, p
-q
);
1429 err
= PyList_Append(list
, line
);
1434 p
= memchr(q
, '\n', end
-q
);
1435 } while (p
!= NULL
);
1436 /* Move the remaining incomplete line to the start */
1438 memmove(buffer
, q
, nfilled
);
1440 if (totalread
>= (size_t)sizehint
)
1444 /* Partial last line */
1445 line
= PyString_FromStringAndSize(buffer
, nfilled
);
1449 /* Need to complete the last line */
1450 PyObject
*rest
= get_line(f
, 0);
1455 PyString_Concat(&line
, rest
);
1460 err
= PyList_Append(list
, line
);
1466 Py_XDECREF(big_buffer
);
1471 file_write(PyFileObject
*f
, PyObject
*args
)
1475 if (f
->f_fp
== NULL
)
1476 return err_closed();
1477 if (!PyArg_ParseTuple(args
, f
->f_binary
? "s#" : "t#", &s
, &n
))
1480 Py_BEGIN_ALLOW_THREADS
1482 n2
= fwrite(s
, 1, n
, f
->f_fp
);
1483 Py_END_ALLOW_THREADS
1485 PyErr_SetFromErrno(PyExc_IOError
);
1494 file_writelines(PyFileObject
*f
, PyObject
*seq
)
1496 #define CHUNKSIZE 1000
1497 PyObject
*list
, *line
;
1498 PyObject
*it
; /* iter(seq) */
1501 Py_ssize_t i
, j
, nwritten
, len
;
1503 assert(seq
!= NULL
);
1504 if (f
->f_fp
== NULL
)
1505 return err_closed();
1509 islist
= PyList_Check(seq
);
1513 it
= PyObject_GetIter(seq
);
1515 PyErr_SetString(PyExc_TypeError
,
1516 "writelines() requires an iterable argument");
1519 /* From here on, fail by going to error, to reclaim "it". */
1520 list
= PyList_New(CHUNKSIZE
);
1525 /* Strategy: slurp CHUNKSIZE lines into a private list,
1526 checking that they are all strings, then write that list
1527 without holding the interpreter lock, then come back for more. */
1528 for (index
= 0; ; index
+= CHUNKSIZE
) {
1531 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
1534 j
= PyList_GET_SIZE(list
);
1537 for (j
= 0; j
< CHUNKSIZE
; j
++) {
1538 line
= PyIter_Next(it
);
1540 if (PyErr_Occurred())
1544 PyList_SetItem(list
, j
, line
);
1550 /* Check that all entries are indeed strings. If not,
1551 apply the same rules as for file.write() and
1552 convert the results to strings. This is slow, but
1553 seems to be the only way since all conversion APIs
1554 could potentially execute Python code. */
1555 for (i
= 0; i
< j
; i
++) {
1556 PyObject
*v
= PyList_GET_ITEM(list
, i
);
1557 if (!PyString_Check(v
)) {
1559 if (((f
->f_binary
&&
1560 PyObject_AsReadBuffer(v
,
1561 (const void**)&buffer
,
1563 PyObject_AsCharBuffer(v
,
1566 PyErr_SetString(PyExc_TypeError
,
1567 "writelines() argument must be a sequence of strings");
1570 line
= PyString_FromStringAndSize(buffer
,
1575 PyList_SET_ITEM(list
, i
, line
);
1579 /* Since we are releasing the global lock, the
1580 following code may *not* execute Python code. */
1581 Py_BEGIN_ALLOW_THREADS
1584 for (i
= 0; i
< j
; i
++) {
1585 line
= PyList_GET_ITEM(list
, i
);
1586 len
= PyString_GET_SIZE(line
);
1587 nwritten
= fwrite(PyString_AS_STRING(line
),
1589 if (nwritten
!= len
) {
1591 PyErr_SetFromErrno(PyExc_IOError
);
1596 Py_END_ALLOW_THREADS
1612 file_self(PyFileObject
*f
)
1614 if (f
->f_fp
== NULL
)
1615 return err_closed();
1617 return (PyObject
*)f
;
1620 PyDoc_STRVAR(readline_doc
,
1621 "readline([size]) -> next line from the file, as a string.\n"
1623 "Retain newline. A non-negative size argument limits the maximum\n"
1624 "number of bytes to return (an incomplete line may be returned then).\n"
1625 "Return an empty string at EOF.");
1627 PyDoc_STRVAR(read_doc
,
1628 "read([size]) -> read at most size bytes, returned as a string.\n"
1630 "If the size argument is negative or omitted, read until EOF is reached.\n"
1631 "Notice that when in non-blocking mode, less data than what was requested\n"
1632 "may be returned, even if no size parameter was given.");
1634 PyDoc_STRVAR(write_doc
,
1635 "write(str) -> None. Write string str to file.\n"
1637 "Note that due to buffering, flush() or close() may be needed before\n"
1638 "the file on disk reflects the data written.");
1640 PyDoc_STRVAR(fileno_doc
,
1641 "fileno() -> integer \"file descriptor\".\n"
1643 "This is needed for lower-level file interfaces, such os.read().");
1645 PyDoc_STRVAR(seek_doc
,
1646 "seek(offset[, whence]) -> None. Move to new file position.\n"
1648 "Argument offset is a byte count. Optional argument whence defaults to\n"
1649 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1650 "(move relative to current position, positive or negative), and 2 (move\n"
1651 "relative to end of file, usually negative, although many platforms allow\n"
1652 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1653 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1654 "undefined behavior."
1656 "Note that not all file objects are seekable.");
1658 #ifdef HAVE_FTRUNCATE
1659 PyDoc_STRVAR(truncate_doc
,
1660 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1662 "Size defaults to the current file position, as returned by tell().");
1665 PyDoc_STRVAR(tell_doc
,
1666 "tell() -> current file position, an integer (may be a long integer).");
1668 PyDoc_STRVAR(readinto_doc
,
1669 "readinto() -> Undocumented. Don't use this; it may go away.");
1671 PyDoc_STRVAR(readlines_doc
,
1672 "readlines([size]) -> list of strings, each a line from the file.\n"
1674 "Call readline() repeatedly and return a list of the lines so read.\n"
1675 "The optional size argument, if given, is an approximate bound on the\n"
1676 "total number of bytes in the lines returned.");
1678 PyDoc_STRVAR(xreadlines_doc
,
1679 "xreadlines() -> returns self.\n"
1681 "For backward compatibility. File objects now include the performance\n"
1682 "optimizations previously implemented in the xreadlines module.");
1684 PyDoc_STRVAR(writelines_doc
,
1685 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1687 "Note that newlines are not added. The sequence can be any iterable object\n"
1688 "producing strings. This is equivalent to calling write() for each string.");
1690 PyDoc_STRVAR(flush_doc
,
1691 "flush() -> None. Flush the internal I/O buffer.");
1693 PyDoc_STRVAR(close_doc
,
1694 "close() -> None or (perhaps) an integer. Close the file.\n"
1696 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1697 "further I/O operations. close() may be called more than once without\n"
1698 "error. Some kinds of file objects (for example, opened by popen())\n"
1699 "may return an exit status upon closing.");
1701 PyDoc_STRVAR(isatty_doc
,
1702 "isatty() -> true or false. True if the file is connected to a tty device.");
1704 PyDoc_STRVAR(context_doc
,
1705 "__context__() -> self.");
1707 PyDoc_STRVAR(enter_doc
,
1708 "__enter__() -> self.");
1710 static PyMethodDef file_methods
[] = {
1711 {"readline", (PyCFunction
)file_readline
, METH_VARARGS
, readline_doc
},
1712 {"read", (PyCFunction
)file_read
, METH_VARARGS
, read_doc
},
1713 {"write", (PyCFunction
)file_write
, METH_VARARGS
, write_doc
},
1714 {"fileno", (PyCFunction
)file_fileno
, METH_NOARGS
, fileno_doc
},
1715 {"seek", (PyCFunction
)file_seek
, METH_VARARGS
, seek_doc
},
1716 #ifdef HAVE_FTRUNCATE
1717 {"truncate", (PyCFunction
)file_truncate
, METH_VARARGS
, truncate_doc
},
1719 {"tell", (PyCFunction
)file_tell
, METH_NOARGS
, tell_doc
},
1720 {"readinto", (PyCFunction
)file_readinto
, METH_VARARGS
, readinto_doc
},
1721 {"readlines", (PyCFunction
)file_readlines
,METH_VARARGS
, readlines_doc
},
1722 {"xreadlines",(PyCFunction
)file_self
, METH_NOARGS
, xreadlines_doc
},
1723 {"writelines",(PyCFunction
)file_writelines
, METH_O
, writelines_doc
},
1724 {"flush", (PyCFunction
)file_flush
, METH_NOARGS
, flush_doc
},
1725 {"close", (PyCFunction
)file_close
, METH_NOARGS
, close_doc
},
1726 {"isatty", (PyCFunction
)file_isatty
, METH_NOARGS
, isatty_doc
},
1727 {"__context__", (PyCFunction
)file_self
, METH_NOARGS
, context_doc
},
1728 {"__enter__", (PyCFunction
)file_self
, METH_NOARGS
, enter_doc
},
1729 {"__exit__", (PyCFunction
)file_close
, METH_VARARGS
, close_doc
},
1730 {NULL
, NULL
} /* sentinel */
1733 #define OFF(x) offsetof(PyFileObject, x)
1735 static PyMemberDef file_memberlist
[] = {
1736 {"softspace", T_INT
, OFF(f_softspace
), 0,
1737 "flag indicating that a space needs to be printed; used by print"},
1738 {"mode", T_OBJECT
, OFF(f_mode
), RO
,
1739 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1740 {"name", T_OBJECT
, OFF(f_name
), RO
,
1742 {"encoding", T_OBJECT
, OFF(f_encoding
), RO
,
1744 /* getattr(f, "closed") is implemented without this table */
1745 {NULL
} /* Sentinel */
1749 get_closed(PyFileObject
*f
, void *closure
)
1751 return PyBool_FromLong((long)(f
->f_fp
== 0));
1754 get_newlines(PyFileObject
*f
, void *closure
)
1756 switch (f
->f_newlinetypes
) {
1757 case NEWLINE_UNKNOWN
:
1761 return PyString_FromString("\r");
1763 return PyString_FromString("\n");
1764 case NEWLINE_CR
|NEWLINE_LF
:
1765 return Py_BuildValue("(ss)", "\r", "\n");
1767 return PyString_FromString("\r\n");
1768 case NEWLINE_CR
|NEWLINE_CRLF
:
1769 return Py_BuildValue("(ss)", "\r", "\r\n");
1770 case NEWLINE_LF
|NEWLINE_CRLF
:
1771 return Py_BuildValue("(ss)", "\n", "\r\n");
1772 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1773 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1775 PyErr_Format(PyExc_SystemError
,
1776 "Unknown newlines value 0x%x\n",
1782 static PyGetSetDef file_getsetlist
[] = {
1783 {"closed", (getter
)get_closed
, NULL
, "True if the file is closed"},
1784 {"newlines", (getter
)get_newlines
, NULL
,
1785 "end-of-line convention used in this file"},
1790 drop_readahead(PyFileObject
*f
)
1792 if (f
->f_buf
!= NULL
) {
1793 PyMem_Free(f
->f_buf
);
1798 /* Make sure that file has a readahead buffer with at least one byte
1799 (unless at EOF) and no more than bufsize. Returns negative value on
1800 error, will set MemoryError if bufsize bytes cannot be allocated. */
1802 readahead(PyFileObject
*f
, int bufsize
)
1804 Py_ssize_t chunksize
;
1806 if (f
->f_buf
!= NULL
) {
1807 if( (f
->f_bufend
- f
->f_bufptr
) >= 1)
1812 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
1816 Py_BEGIN_ALLOW_THREADS
1818 chunksize
= Py_UniversalNewlineFread(
1819 f
->f_buf
, bufsize
, f
->f_fp
, (PyObject
*)f
);
1820 Py_END_ALLOW_THREADS
1821 if (chunksize
== 0) {
1822 if (ferror(f
->f_fp
)) {
1823 PyErr_SetFromErrno(PyExc_IOError
);
1829 f
->f_bufptr
= f
->f_buf
;
1830 f
->f_bufend
= f
->f_buf
+ chunksize
;
1834 /* Used by file_iternext. The returned string will start with 'skip'
1835 uninitialized bytes followed by the remainder of the line. Don't be
1836 horrified by the recursive call: maximum recursion depth is limited by
1837 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1839 static PyStringObject
*
1840 readahead_get_line_skip(PyFileObject
*f
, int skip
, int bufsize
)
1847 if (f
->f_buf
== NULL
)
1848 if (readahead(f
, bufsize
) < 0)
1851 len
= f
->f_bufend
- f
->f_bufptr
;
1853 return (PyStringObject
*)
1854 PyString_FromStringAndSize(NULL
, skip
);
1855 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
1856 if (bufptr
!= NULL
) {
1857 bufptr
++; /* Count the '\n' */
1858 len
= bufptr
- f
->f_bufptr
;
1859 s
= (PyStringObject
*)
1860 PyString_FromStringAndSize(NULL
, skip
+len
);
1863 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
1864 f
->f_bufptr
= bufptr
;
1865 if (bufptr
== f
->f_bufend
)
1868 bufptr
= f
->f_bufptr
;
1870 f
->f_buf
= NULL
; /* Force new readahead buffer */
1871 assert(skip
+len
< INT_MAX
);
1872 s
= readahead_get_line_skip(
1873 f
, (int)(skip
+len
), bufsize
+ (bufsize
>>2) );
1878 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
1884 /* A larger buffer size may actually decrease performance. */
1885 #define READAHEAD_BUFSIZE 8192
1888 file_iternext(PyFileObject
*f
)
1892 if (f
->f_fp
== NULL
)
1893 return err_closed();
1895 l
= readahead_get_line_skip(f
, 0, READAHEAD_BUFSIZE
);
1896 if (l
== NULL
|| PyString_GET_SIZE(l
) == 0) {
1900 return (PyObject
*)l
;
1905 file_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1908 static PyObject
*not_yet_string
;
1910 assert(type
!= NULL
&& type
->tp_alloc
!= NULL
);
1912 if (not_yet_string
== NULL
) {
1913 not_yet_string
= PyString_FromString("<uninitialized file>");
1914 if (not_yet_string
== NULL
)
1918 self
= type
->tp_alloc(type
, 0);
1920 /* Always fill in the name and mode, so that nobody else
1921 needs to special-case NULLs there. */
1922 Py_INCREF(not_yet_string
);
1923 ((PyFileObject
*)self
)->f_name
= not_yet_string
;
1924 Py_INCREF(not_yet_string
);
1925 ((PyFileObject
*)self
)->f_mode
= not_yet_string
;
1927 ((PyFileObject
*)self
)->f_encoding
= Py_None
;
1928 ((PyFileObject
*)self
)->weakreflist
= NULL
;
1934 file_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1936 PyFileObject
*foself
= (PyFileObject
*)self
;
1938 static char *kwlist
[] = {"name", "mode", "buffering", 0};
1942 int wideargument
= 0;
1944 assert(PyFile_Check(self
));
1945 if (foself
->f_fp
!= NULL
) {
1946 /* Have to close the existing file first. */
1947 PyObject
*closeresult
= file_close(foself
);
1948 if (closeresult
== NULL
)
1950 Py_DECREF(closeresult
);
1953 #ifdef Py_WIN_WIDE_FILENAMES
1954 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
1956 if (PyArg_ParseTupleAndKeywords(args
, kwds
, "U|si:file",
1957 kwlist
, &po
, &mode
, &bufsize
)) {
1959 if (fill_file_fields(foself
, NULL
, po
, mode
,
1963 /* Drop the argument parsing error as narrow
1964 strings are also valid. */
1970 if (!wideargument
) {
1973 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "et|si:file", kwlist
,
1974 Py_FileSystemDefaultEncoding
,
1979 /* We parse again to get the name as a PyObject */
1980 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|si:file",
1981 kwlist
, &o_name
, &mode
,
1985 if (fill_file_fields(foself
, NULL
, o_name
, mode
,
1989 if (open_the_file(foself
, name
, mode
) == NULL
)
1991 foself
->f_setbuf
= NULL
;
1992 PyFile_SetBufSize(self
, bufsize
);
1999 PyMem_Free(name
); /* free the encoded string */
2003 PyDoc_VAR(file_doc
) =
2005 "file(name[, mode[, buffering]]) -> file object\n"
2007 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2008 "writing or appending. The file will be created if it doesn't exist\n"
2009 "when opened for writing or appending; it will be truncated when\n"
2010 "opened for writing. Add a 'b' to the mode for binary files.\n"
2011 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2012 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2013 "buffered, and larger numbers specify the buffer size.\n"
2016 "Add a 'U' to mode to open the file for input with universal newline\n"
2017 "support. Any line ending in the input file will be seen as a '\\n'\n"
2018 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
2019 "the value for this attribute is one of None (no newline read yet),\n"
2020 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2022 "'U' cannot be combined with 'w' or '+' mode.\n"
2026 "Note: open() is an alias for file()."
2029 PyTypeObject PyFile_Type
= {
2030 PyObject_HEAD_INIT(&PyType_Type
)
2033 sizeof(PyFileObject
),
2035 (destructor
)file_dealloc
, /* tp_dealloc */
2040 (reprfunc
)file_repr
, /* tp_repr */
2041 0, /* tp_as_number */
2042 0, /* tp_as_sequence */
2043 0, /* tp_as_mapping */
2047 PyObject_GenericGetAttr
, /* tp_getattro */
2048 /* softspace is writable: we must supply tp_setattro */
2049 PyObject_GenericSetAttr
, /* tp_setattro */
2050 0, /* tp_as_buffer */
2051 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_HAVE_WEAKREFS
, /* tp_flags */
2052 file_doc
, /* tp_doc */
2053 0, /* tp_traverse */
2055 0, /* tp_richcompare */
2056 offsetof(PyFileObject
, weakreflist
), /* tp_weaklistoffset */
2057 (getiterfunc
)file_self
, /* tp_iter */
2058 (iternextfunc
)file_iternext
, /* tp_iternext */
2059 file_methods
, /* tp_methods */
2060 file_memberlist
, /* tp_members */
2061 file_getsetlist
, /* tp_getset */
2064 0, /* tp_descr_get */
2065 0, /* tp_descr_set */
2066 0, /* tp_dictoffset */
2067 file_init
, /* tp_init */
2068 PyType_GenericAlloc
, /* tp_alloc */
2069 file_new
, /* tp_new */
2070 PyObject_Del
, /* tp_free */
2073 /* Interface for the 'soft space' between print items. */
2076 PyFile_SoftSpace(PyObject
*f
, int newflag
)
2082 else if (PyFile_Check(f
)) {
2083 oldflag
= ((PyFileObject
*)f
)->f_softspace
;
2084 ((PyFileObject
*)f
)->f_softspace
= newflag
;
2088 v
= PyObject_GetAttrString(f
, "softspace");
2093 oldflag
= PyInt_AsLong(v
);
2094 assert(oldflag
< INT_MAX
);
2097 v
= PyInt_FromLong((long)newflag
);
2101 if (PyObject_SetAttrString(f
, "softspace", v
) != 0)
2106 return (int)oldflag
;
2109 /* Interfaces to write objects/strings to file-like objects */
2112 PyFile_WriteObject(PyObject
*v
, PyObject
*f
, int flags
)
2114 PyObject
*writer
, *value
, *args
, *result
;
2116 PyErr_SetString(PyExc_TypeError
, "writeobject with NULL file");
2119 else if (PyFile_Check(f
)) {
2120 FILE *fp
= PyFile_AsFile(f
);
2121 #ifdef Py_USING_UNICODE
2122 PyObject
*enc
= ((PyFileObject
*)f
)->f_encoding
;
2129 #ifdef Py_USING_UNICODE
2130 if ((flags
& Py_PRINT_RAW
) &&
2131 PyUnicode_Check(v
) && enc
!= Py_None
) {
2132 char *cenc
= PyString_AS_STRING(enc
);
2133 value
= PyUnicode_AsEncodedString(v
, cenc
, "strict");
2140 result
= PyObject_Print(value
, fp
, flags
);
2144 return PyObject_Print(v
, fp
, flags
);
2147 writer
= PyObject_GetAttrString(f
, "write");
2150 if (flags
& Py_PRINT_RAW
) {
2151 if (PyUnicode_Check(v
)) {
2155 value
= PyObject_Str(v
);
2158 value
= PyObject_Repr(v
);
2159 if (value
== NULL
) {
2163 args
= PyTuple_Pack(1, value
);
2169 result
= PyEval_CallObject(writer
, args
);
2180 PyFile_WriteString(const char *s
, PyObject
*f
)
2183 /* Should be caused by a pre-existing error */
2184 if (!PyErr_Occurred())
2185 PyErr_SetString(PyExc_SystemError
,
2186 "null file for PyFile_WriteString");
2189 else if (PyFile_Check(f
)) {
2190 FILE *fp
= PyFile_AsFile(f
);
2198 else if (!PyErr_Occurred()) {
2199 PyObject
*v
= PyString_FromString(s
);
2203 err
= PyFile_WriteObject(v
, f
, Py_PRINT_RAW
);
2211 /* Try to get a file-descriptor from a Python object. If the object
2212 is an integer or long integer, its value is returned. If not, the
2213 object's fileno() method is called if it exists; the method must return
2214 an integer or long integer, which is returned as the file descriptor value.
2215 -1 is returned on failure.
2218 int PyObject_AsFileDescriptor(PyObject
*o
)
2223 if (PyInt_Check(o
)) {
2224 fd
= PyInt_AsLong(o
);
2226 else if (PyLong_Check(o
)) {
2227 fd
= PyLong_AsLong(o
);
2229 else if ((meth
= PyObject_GetAttrString(o
, "fileno")) != NULL
)
2231 PyObject
*fno
= PyEval_CallObject(meth
, NULL
);
2236 if (PyInt_Check(fno
)) {
2237 fd
= PyInt_AsLong(fno
);
2240 else if (PyLong_Check(fno
)) {
2241 fd
= PyLong_AsLong(fno
);
2245 PyErr_SetString(PyExc_TypeError
,
2246 "fileno() returned a non-integer");
2252 PyErr_SetString(PyExc_TypeError
,
2253 "argument must be an int, or have a fileno() method.");
2258 PyErr_Format(PyExc_ValueError
,
2259 "file descriptor cannot be a negative integer (%i)",
2266 /* From here on we need access to the real fgets and fread */
2271 ** Py_UniversalNewlineFgets is an fgets variation that understands
2272 ** all of \r, \n and \r\n conventions.
2273 ** The stream should be opened in binary mode.
2274 ** If fobj is NULL the routine always does newline conversion, and
2275 ** it may peek one char ahead to gobble the second char in \r\n.
2276 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2277 ** is no readahead but in stead a flag is used to skip a following
2278 ** \n on the next read. Also, if the file is open in binary mode
2279 ** the whole conversion is skipped. Finally, the routine keeps track of
2280 ** the different types of newlines seen.
2281 ** Note that we need no error handling: fgets() treats error and eof
2285 Py_UniversalNewlineFgets(char *buf
, int n
, FILE *stream
, PyObject
*fobj
)
2289 int newlinetypes
= 0;
2291 int univ_newline
= 1;
2294 if (!PyFile_Check(fobj
)) {
2295 errno
= ENXIO
; /* What can you do... */
2298 univ_newline
= ((PyFileObject
*)fobj
)->f_univ_newline
;
2299 if ( !univ_newline
)
2300 return fgets(buf
, n
, stream
);
2301 newlinetypes
= ((PyFileObject
*)fobj
)->f_newlinetypes
;
2302 skipnextlf
= ((PyFileObject
*)fobj
)->f_skipnextlf
;
2305 c
= 'x'; /* Shut up gcc warning */
2306 while (--n
> 0 && (c
= GETC(stream
)) != EOF
) {
2310 /* Seeing a \n here with skipnextlf true
2311 ** means we saw a \r before.
2313 newlinetypes
|= NEWLINE_CRLF
;
2315 if (c
== EOF
) break;
2318 ** Note that c == EOF also brings us here,
2319 ** so we're okay if the last char in the file
2322 newlinetypes
|= NEWLINE_CR
;
2326 /* A \r is translated into a \n, and we skip
2327 ** an adjacent \n, if any. We don't set the
2328 ** newlinetypes flag until we've seen the next char.
2332 } else if ( c
== '\n') {
2333 newlinetypes
|= NEWLINE_LF
;
2336 if (c
== '\n') break;
2338 if ( c
== EOF
&& skipnextlf
)
2339 newlinetypes
|= NEWLINE_CR
;
2340 FUNLOCKFILE(stream
);
2343 ((PyFileObject
*)fobj
)->f_newlinetypes
= newlinetypes
;
2344 ((PyFileObject
*)fobj
)->f_skipnextlf
= skipnextlf
;
2345 } else if ( skipnextlf
) {
2346 /* If we have no file object we cannot save the
2347 ** skipnextlf flag. We have to readahead, which
2348 ** will cause a pause if we're reading from an
2349 ** interactive stream, but that is very unlikely
2350 ** unless we're doing something silly like
2351 ** execfile("/dev/tty").
2363 ** Py_UniversalNewlineFread is an fread variation that understands
2364 ** all of \r, \n and \r\n conventions.
2365 ** The stream should be opened in binary mode.
2366 ** fobj must be a PyFileObject. In this case there
2367 ** is no readahead but in stead a flag is used to skip a following
2368 ** \n on the next read. Also, if the file is open in binary mode
2369 ** the whole conversion is skipped. Finally, the routine keeps track of
2370 ** the different types of newlines seen.
2373 Py_UniversalNewlineFread(char *buf
, size_t n
,
2374 FILE *stream
, PyObject
*fobj
)
2377 PyFileObject
*f
= (PyFileObject
*)fobj
;
2378 int newlinetypes
, skipnextlf
;
2380 assert(buf
!= NULL
);
2381 assert(stream
!= NULL
);
2383 if (!fobj
|| !PyFile_Check(fobj
)) {
2384 errno
= ENXIO
; /* What can you do... */
2387 if (!f
->f_univ_newline
)
2388 return fread(buf
, 1, n
, stream
);
2389 newlinetypes
= f
->f_newlinetypes
;
2390 skipnextlf
= f
->f_skipnextlf
;
2391 /* Invariant: n is the number of bytes remaining to be filled
2399 nread
= fread(dst
, 1, n
, stream
);
2404 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
2405 shortread
= n
!= 0; /* true iff EOF or error */
2409 /* Save as LF and set flag to skip next LF. */
2413 else if (skipnextlf
&& c
== '\n') {
2414 /* Skip LF, and remember we saw CR LF. */
2416 newlinetypes
|= NEWLINE_CRLF
;
2420 /* Normal char to be stored in buffer. Also
2421 * update the newlinetypes flag if either this
2422 * is an LF or the previous char was a CR.
2425 newlinetypes
|= NEWLINE_LF
;
2426 else if (skipnextlf
)
2427 newlinetypes
|= NEWLINE_CR
;
2433 /* If this is EOF, update type flags. */
2434 if (skipnextlf
&& feof(stream
))
2435 newlinetypes
|= NEWLINE_CR
;
2439 f
->f_newlinetypes
= newlinetypes
;
2440 f
->f_skipnextlf
= skipnextlf
;