1 /* File object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
7 #ifdef HAVE_SYS_TYPES_H
9 #endif /* HAVE_SYS_TYPES_H */
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
20 /* Need GetVersion to see if on NT so safe to use _wfopen */
21 #define WIN32_LEAN_AND_MEAN
25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
31 #ifndef DONT_HAVE_ERRNO_H
35 #ifdef HAVE_GETC_UNLOCKED
36 #define GETC(f) getc_unlocked(f)
37 #define FLOCKFILE(f) flockfile(f)
38 #define FUNLOCKFILE(f) funlockfile(f)
40 #define GETC(f) getc(f)
42 #define FUNLOCKFILE(f)
45 /* Bits in f_newlinetypes */
46 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47 #define NEWLINE_CR 1 /* \r newline seen */
48 #define NEWLINE_LF 2 /* \n newline seen */
49 #define NEWLINE_CRLF 4 /* \r\n newline seen */
56 PyFile_AsFile(PyObject
*f
)
58 if (f
== NULL
|| !PyFile_Check(f
))
61 return ((PyFileObject
*)f
)->f_fp
;
65 PyFile_Name(PyObject
*f
)
67 if (f
== NULL
|| !PyFile_Check(f
))
70 return ((PyFileObject
*)f
)->f_name
;
73 /* On Unix, fopen will succeed for directories.
74 In Python, there should be no file objects referring to
75 directories, so we need a check. */
78 dircheck(PyFileObject
* f
)
80 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
84 if (fstat(fileno(f
->f_fp
), &buf
) == 0 &&
85 S_ISDIR(buf
.st_mode
)) {
86 char *msg
= strerror(EISDIR
);
87 PyObject
*exc
= PyObject_CallFunction(PyExc_IOError
, "(is)",
89 PyErr_SetObject(PyExc_IOError
, exc
);
99 fill_file_fields(PyFileObject
*f
, FILE *fp
, PyObject
*name
, char *mode
,
100 int (*close
)(FILE *))
102 assert(name
!= NULL
);
104 assert(PyFile_Check(f
));
105 assert(f
->f_fp
== NULL
);
107 Py_DECREF(f
->f_name
);
108 Py_DECREF(f
->f_mode
);
109 Py_DECREF(f
->f_encoding
);
114 f
->f_mode
= PyString_FromString(mode
);
118 f
->f_binary
= strchr(mode
,'b') != NULL
;
120 f
->f_univ_newline
= (strchr(mode
, 'U') != NULL
);
121 f
->f_newlinetypes
= NEWLINE_UNKNOWN
;
124 f
->f_encoding
= Py_None
;
126 if (f
->f_mode
== NULL
)
130 return (PyObject
*) f
;
133 /* check for known incorrect mode strings - problem is, platforms are
134 free to accept any mode characters they like and are supposed to
135 ignore stuff they don't understand... write or append mode with
136 universal newline support is expressly forbidden by PEP 278.
137 Additionally, remove the 'U' from the mode string as platforms
138 won't know what it is. Non-zero return signals an exception */
140 _PyFile_SanitizeMode(char *mode
)
143 size_t len
= strlen(mode
);
146 PyErr_SetString(PyExc_ValueError
, "empty mode string");
150 upos
= strchr(mode
, 'U');
152 memmove(upos
, upos
+1, len
-(upos
-mode
)); /* incl null char */
154 if (mode
[0] == 'w' || mode
[0] == 'a') {
155 PyErr_Format(PyExc_ValueError
, "universal newline "
156 "mode can only be used with modes "
157 "starting with 'r'");
161 if (mode
[0] != 'r') {
162 memmove(mode
+1, mode
, strlen(mode
)+1);
166 if (!strchr(mode
, 'b')) {
167 memmove(mode
+2, mode
+1, strlen(mode
));
170 } else if (mode
[0] != 'r' && mode
[0] != 'w' && mode
[0] != 'a') {
171 PyErr_Format(PyExc_ValueError
, "mode string must begin with "
172 "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode
);
180 open_the_file(PyFileObject
*f
, char *name
, char *mode
)
184 assert(PyFile_Check(f
));
186 /* windows ignores the passed name in order to support Unicode */
187 assert(f
->f_name
!= NULL
);
189 assert(name
!= NULL
);
191 assert(mode
!= NULL
);
192 assert(f
->f_fp
== NULL
);
194 /* probably need to replace 'U' by 'rb' */
195 newmode
= PyMem_MALLOC(strlen(mode
) + 3);
200 strcpy(newmode
, mode
);
202 if (_PyFile_SanitizeMode(newmode
)) {
207 /* rexec.py can't stop a user from getting the file() constructor --
208 all they have to do is get *any* file object f, and then do
209 type(f). Here we prevent them from doing damage with it. */
210 if (PyEval_GetRestricted()) {
211 PyErr_SetString(PyExc_IOError
,
212 "file() constructor not accessible in restricted mode");
219 if (PyUnicode_Check(f
->f_name
)) {
221 wmode
= PyUnicode_DecodeASCII(newmode
, strlen(newmode
), NULL
);
222 if (f
->f_name
&& wmode
) {
223 Py_BEGIN_ALLOW_THREADS
224 /* PyUnicode_AS_UNICODE OK without thread
225 lock as it is a simple dereference. */
226 f
->f_fp
= _wfopen(PyUnicode_AS_UNICODE(f
->f_name
),
227 PyUnicode_AS_UNICODE(wmode
));
233 if (NULL
== f
->f_fp
&& NULL
!= name
) {
234 Py_BEGIN_ALLOW_THREADS
235 f
->f_fp
= fopen(name
, newmode
);
239 if (f
->f_fp
== NULL
) {
240 #if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
241 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
242 * across all Windows flavors. When it sets EINVAL varies
243 * across Windows flavors, the exact conditions aren't
244 * documented, and the answer lies in the OS's implementation
245 * of Win32's CreateFile function (whose source is secret).
246 * Seems the best we can do is map EINVAL to ENOENT.
247 * Starting with Visual Studio .NET 2005, EINVAL is correctly
248 * set by our CRT error handler (set in exceptions.c.)
250 if (errno
== 0) /* bad mode string */
252 else if (errno
== EINVAL
) /* unknown, but not a mode string */
255 /* EINVAL is returned when an invalid filename or
256 * an invalid mode is supplied. */
258 PyErr_Format(PyExc_IOError
,
259 "invalid filename: %s or mode: %s",
262 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError
, f
->f_name
);
271 return (PyObject
*)f
;
275 PyFile_FromFile(FILE *fp
, char *name
, char *mode
, int (*close
)(FILE *))
277 PyFileObject
*f
= (PyFileObject
*)PyFile_Type
.tp_new(&PyFile_Type
,
280 PyObject
*o_name
= PyString_FromString(name
);
283 if (fill_file_fields(f
, fp
, o_name
, mode
, close
) == NULL
) {
289 return (PyObject
*) f
;
293 PyFile_FromString(char *name
, char *mode
)
295 extern int fclose(FILE *);
298 f
= (PyFileObject
*)PyFile_FromFile((FILE *)NULL
, name
, mode
, fclose
);
300 if (open_the_file(f
, name
, mode
) == NULL
) {
305 return (PyObject
*)f
;
309 PyFile_SetBufSize(PyObject
*f
, int bufsize
)
311 PyFileObject
*file
= (PyFileObject
*)f
;
332 if (type
== _IONBF
) {
333 PyMem_Free(file
->f_setbuf
);
334 file
->f_setbuf
= NULL
;
336 file
->f_setbuf
= (char *)PyMem_Realloc(file
->f_setbuf
,
340 setvbuf(file
->f_fp
, file
->f_setbuf
, type
, bufsize
);
341 #else /* !HAVE_SETVBUF */
342 setbuf(file
->f_fp
, file
->f_setbuf
);
343 #endif /* !HAVE_SETVBUF */
347 /* Set the encoding used to output Unicode strings.
348 Returh 1 on success, 0 on failure. */
351 PyFile_SetEncoding(PyObject
*f
, const char *enc
)
353 PyFileObject
*file
= (PyFileObject
*)f
;
354 PyObject
*str
= PyString_FromString(enc
);
356 assert(PyFile_Check(f
));
359 Py_DECREF(file
->f_encoding
);
360 file
->f_encoding
= str
;
367 PyErr_SetString(PyExc_ValueError
, "I/O operation on closed file");
371 /* Refuse regular file I/O if there's data in the iteration-buffer.
372 * Mixing them would cause data to arrive out of order, as the read*
373 * methods don't use the iteration buffer. */
375 err_iterbuffered(void)
377 PyErr_SetString(PyExc_ValueError
,
378 "Mixing iteration and read methods would lose data");
382 static void drop_readahead(PyFileObject
*);
387 file_dealloc(PyFileObject
*f
)
390 if (f
->weakreflist
!= NULL
)
391 PyObject_ClearWeakRefs((PyObject
*) f
);
392 if (f
->f_fp
!= NULL
&& f
->f_close
!= NULL
) {
393 Py_BEGIN_ALLOW_THREADS
394 sts
= (*f
->f_close
)(f
->f_fp
);
397 PySys_WriteStderr("close failed: [Errno %d] %s\n", errno
, strerror(errno
));
399 PyMem_Free(f
->f_setbuf
);
400 Py_XDECREF(f
->f_name
);
401 Py_XDECREF(f
->f_mode
);
402 Py_XDECREF(f
->f_encoding
);
404 Py_TYPE(f
)->tp_free((PyObject
*)f
);
408 file_repr(PyFileObject
*f
)
410 if (PyUnicode_Check(f
->f_name
)) {
411 #ifdef Py_USING_UNICODE
412 PyObject
*ret
= NULL
;
413 PyObject
*name
= PyUnicode_AsUnicodeEscapeString(f
->f_name
);
414 const char *name_str
= name
? PyString_AsString(name
) : "?";
415 ret
= PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
416 f
->f_fp
== NULL
? "closed" : "open",
418 PyString_AsString(f
->f_mode
),
424 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
425 f
->f_fp
== NULL
? "closed" : "open",
426 PyString_AsString(f
->f_name
),
427 PyString_AsString(f
->f_mode
),
433 file_close(PyFileObject
*f
)
436 if (f
->f_fp
!= NULL
) {
437 if (f
->f_close
!= NULL
) {
438 Py_BEGIN_ALLOW_THREADS
440 sts
= (*f
->f_close
)(f
->f_fp
);
445 PyMem_Free(f
->f_setbuf
);
448 return PyErr_SetFromErrno(PyExc_IOError
);
450 return PyInt_FromLong((long)sts
);
456 /* Our very own off_t-like type, 64-bit if possible */
457 #if !defined(HAVE_LARGEFILE_SUPPORT)
458 typedef off_t Py_off_t
;
459 #elif SIZEOF_OFF_T >= 8
460 typedef off_t Py_off_t
;
461 #elif SIZEOF_FPOS_T >= 8
462 typedef fpos_t Py_off_t
;
464 #error "Large file support, but neither off_t nor fpos_t is large enough."
468 /* a portable fseek() function
469 return 0 on success, non-zero on failure (with errno set) */
471 _portable_fseek(FILE *fp
, Py_off_t offset
, int whence
)
473 #if !defined(HAVE_LARGEFILE_SUPPORT)
474 return fseek(fp
, offset
, whence
);
475 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
476 return fseeko(fp
, offset
, whence
);
477 #elif defined(HAVE_FSEEK64)
478 return fseek64(fp
, offset
, whence
);
479 #elif defined(__BEOS__)
480 return _fseek(fp
, offset
, whence
);
481 #elif SIZEOF_FPOS_T >= 8
482 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
483 and fgetpos() to implement fseek()*/
489 if (_lseeki64(fileno(fp
), 0, 2) == -1)
492 if (fseek(fp
, 0, SEEK_END
) != 0)
497 if (fgetpos(fp
, &pos
) != 0)
501 /* case SEEK_SET: break; */
503 return fsetpos(fp
, &offset
);
505 #error "Large file support, but no way to fseek."
510 /* a portable ftell() function
511 Return -1 on failure with errno set appropriately, current file
512 position on success */
514 _portable_ftell(FILE* fp
)
516 #if !defined(HAVE_LARGEFILE_SUPPORT)
518 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
520 #elif defined(HAVE_FTELL64)
522 #elif SIZEOF_FPOS_T >= 8
524 if (fgetpos(fp
, &pos
) != 0)
528 #error "Large file support, but no way to ftell."
534 file_seek(PyFileObject
*f
, PyObject
*args
)
539 PyObject
*offobj
, *off_index
;
545 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &whence
))
547 off_index
= PyNumber_Index(offobj
);
549 if (!PyFloat_Check(offobj
))
551 /* Deprecated in 2.6 */
553 if (PyErr_Warn(PyExc_DeprecationWarning
,
554 "integer argument expected, got float"))
559 #if !defined(HAVE_LARGEFILE_SUPPORT)
560 offset
= PyInt_AsLong(off_index
);
562 offset
= PyLong_Check(off_index
) ?
563 PyLong_AsLongLong(off_index
) : PyInt_AsLong(off_index
);
565 Py_DECREF(off_index
);
566 if (PyErr_Occurred())
569 Py_BEGIN_ALLOW_THREADS
571 ret
= _portable_fseek(f
->f_fp
, offset
, whence
);
575 PyErr_SetFromErrno(PyExc_IOError
);
585 #ifdef HAVE_FTRUNCATE
587 file_truncate(PyFileObject
*f
, PyObject
*args
)
590 PyObject
*newsizeobj
= NULL
;
596 if (!PyArg_UnpackTuple(args
, "truncate", 0, 1, &newsizeobj
))
599 /* Get current file position. If the file happens to be open for
600 * update and the last operation was an input operation, C doesn't
601 * define what the later fflush() will do, but we promise truncate()
602 * won't change the current position (and fflush() *does* change it
603 * then at least on Windows). The easiest thing is to capture
604 * current pos now and seek back to it at the end.
606 Py_BEGIN_ALLOW_THREADS
608 initialpos
= _portable_ftell(f
->f_fp
);
610 if (initialpos
== -1)
613 /* Set newsize to current postion if newsizeobj NULL, else to the
616 if (newsizeobj
!= NULL
) {
617 #if !defined(HAVE_LARGEFILE_SUPPORT)
618 newsize
= PyInt_AsLong(newsizeobj
);
620 newsize
= PyLong_Check(newsizeobj
) ?
621 PyLong_AsLongLong(newsizeobj
) :
622 PyInt_AsLong(newsizeobj
);
624 if (PyErr_Occurred())
627 else /* default to current position */
628 newsize
= initialpos
;
630 /* Flush the stream. We're mixing stream-level I/O with lower-level
631 * I/O, and a flush may be necessary to synch both platform views
632 * of the current file state.
634 Py_BEGIN_ALLOW_THREADS
636 ret
= fflush(f
->f_fp
);
642 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
643 so don't even try using it. */
647 /* Have to move current pos to desired endpoint on Windows. */
648 Py_BEGIN_ALLOW_THREADS
650 ret
= _portable_fseek(f
->f_fp
, newsize
, SEEK_SET
) != 0;
655 /* Truncate. Note that this may grow the file! */
656 Py_BEGIN_ALLOW_THREADS
658 hFile
= (HANDLE
)_get_osfhandle(fileno(f
->f_fp
));
659 ret
= hFile
== (HANDLE
)-1;
661 ret
= SetEndOfFile(hFile
) == 0;
670 Py_BEGIN_ALLOW_THREADS
672 ret
= ftruncate(fileno(f
->f_fp
), newsize
);
676 #endif /* !MS_WINDOWS */
678 /* Restore original file position. */
679 Py_BEGIN_ALLOW_THREADS
681 ret
= _portable_fseek(f
->f_fp
, initialpos
, SEEK_SET
) != 0;
690 PyErr_SetFromErrno(PyExc_IOError
);
694 #endif /* HAVE_FTRUNCATE */
697 file_tell(PyFileObject
*f
)
703 Py_BEGIN_ALLOW_THREADS
705 pos
= _portable_ftell(f
->f_fp
);
708 PyErr_SetFromErrno(PyExc_IOError
);
712 if (f
->f_skipnextlf
) {
716 f
->f_newlinetypes
|= NEWLINE_CRLF
;
719 } else if (c
!= EOF
) ungetc(c
, f
->f_fp
);
721 #if !defined(HAVE_LARGEFILE_SUPPORT)
722 return PyInt_FromLong(pos
);
724 return PyLong_FromLongLong(pos
);
729 file_fileno(PyFileObject
*f
)
733 return PyInt_FromLong((long) fileno(f
->f_fp
));
737 file_flush(PyFileObject
*f
)
743 Py_BEGIN_ALLOW_THREADS
745 res
= fflush(f
->f_fp
);
748 PyErr_SetFromErrno(PyExc_IOError
);
757 file_isatty(PyFileObject
*f
)
762 Py_BEGIN_ALLOW_THREADS
763 res
= isatty((int)fileno(f
->f_fp
));
765 return PyBool_FromLong(res
);
770 #define SMALLCHUNK 8192
772 #define SMALLCHUNK BUFSIZ
776 #define BIGCHUNK (512 * 32)
778 #define BIGCHUNK (512 * 1024)
782 new_buffersize(PyFileObject
*f
, size_t currentsize
)
787 if (fstat(fileno(f
->f_fp
), &st
) == 0) {
789 /* The following is not a bug: we really need to call lseek()
790 *and* ftell(). The reason is that some stdio libraries
791 mistakenly flush their buffer when ftell() is called and
792 the lseek() call it makes fails, thereby throwing away
793 data that cannot be recovered in any way. To avoid this,
794 we first test lseek(), and only call ftell() if lseek()
795 works. We can't use the lseek() value either, because we
796 need to take the amount of buffered data into account.
797 (Yet another reason why stdio stinks. :-) */
798 pos
= lseek(fileno(f
->f_fp
), 0L, SEEK_CUR
);
800 pos
= ftell(f
->f_fp
);
804 if (end
> pos
&& pos
>= 0)
805 return currentsize
+ end
- pos
+ 1;
806 /* Add 1 so if the file were to grow we'd notice. */
809 if (currentsize
> SMALLCHUNK
) {
810 /* Keep doubling until we reach BIGCHUNK;
811 then keep adding BIGCHUNK. */
812 if (currentsize
<= BIGCHUNK
)
813 return currentsize
+ currentsize
;
815 return currentsize
+ BIGCHUNK
;
817 return currentsize
+ SMALLCHUNK
;
820 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
821 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
824 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
827 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
829 #define BLOCKED_ERRNO(x) 0
835 file_read(PyFileObject
*f
, PyObject
*args
)
837 long bytesrequested
= -1;
838 size_t bytesread
, buffersize
, chunksize
;
843 /* refuse to mix with f.next() */
844 if (f
->f_buf
!= NULL
&&
845 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
847 return err_iterbuffered();
848 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
850 if (bytesrequested
< 0)
851 buffersize
= new_buffersize(f
, (size_t)0);
853 buffersize
= bytesrequested
;
854 if (buffersize
> PY_SSIZE_T_MAX
) {
855 PyErr_SetString(PyExc_OverflowError
,
856 "requested number of bytes is more than a Python string can hold");
859 v
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
864 Py_BEGIN_ALLOW_THREADS
866 chunksize
= Py_UniversalNewlineFread(BUF(v
) + bytesread
,
867 buffersize
- bytesread
, f
->f_fp
, (PyObject
*)f
);
869 if (chunksize
== 0) {
870 if (!ferror(f
->f_fp
))
873 /* When in non-blocking mode, data shouldn't
874 * be discarded if a blocking signal was
875 * received. That will also happen if
876 * chunksize != 0, but bytesread < buffersize. */
877 if (bytesread
> 0 && BLOCKED_ERRNO(errno
))
879 PyErr_SetFromErrno(PyExc_IOError
);
883 bytesread
+= chunksize
;
884 if (bytesread
< buffersize
) {
888 if (bytesrequested
< 0) {
889 buffersize
= new_buffersize(f
, buffersize
);
890 if (_PyString_Resize(&v
, buffersize
) < 0)
893 /* Got what was requested. */
897 if (bytesread
!= buffersize
)
898 _PyString_Resize(&v
, bytesread
);
903 file_readinto(PyFileObject
*f
, PyObject
*args
)
907 Py_ssize_t ndone
, nnow
;
911 /* refuse to mix with f.next() */
912 if (f
->f_buf
!= NULL
&&
913 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
915 return err_iterbuffered();
916 if (!PyArg_ParseTuple(args
, "w#", &ptr
, &ntodo
))
920 Py_BEGIN_ALLOW_THREADS
922 nnow
= Py_UniversalNewlineFread(ptr
+ndone
, ntodo
, f
->f_fp
,
926 if (!ferror(f
->f_fp
))
928 PyErr_SetFromErrno(PyExc_IOError
);
935 return PyInt_FromSsize_t(ndone
);
938 /**************************************************************************
939 Routine to get next line using platform fgets().
943 + MS threadsafe getc is very slow (multiple layers of function calls before+
944 after each character, to lock+unlock the stream).
945 + The stream-locking functions are MS-internal -- can't access them from user
947 + There's nothing Tim could find in the MS C or platform SDK libraries that
948 can worm around this.
949 + MS fgets locks/unlocks only once per line; it's the only hook we have.
951 So we use fgets for speed(!), despite that it's painful.
953 MS realloc is also slow.
955 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
959 Tru64 Unix getline_via_fgets significantly faster
961 CAUTION: The C std isn't clear about this: in those cases where fgets
962 writes something into the buffer, can it write into any position beyond the
963 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
964 known on which it does; and it would be a strange way to code fgets. Still,
965 getline_via_fgets may not work correctly if it does. The std test
966 test_bufio.py should fail if platform fgets() routinely writes beyond the
967 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
968 **************************************************************************/
970 /* Use this routine if told to, or by default on non-get_unlocked()
971 * platforms unless told not to. Yikes! Let's spell that out:
972 * On a platform with getc_unlocked():
973 * By default, use getc_unlocked().
974 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
975 * On a platform without getc_unlocked():
976 * By default, use fgets().
977 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
979 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
980 #define USE_FGETS_IN_GETLINE
983 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
984 #undef USE_FGETS_IN_GETLINE
987 #ifdef USE_FGETS_IN_GETLINE
989 getline_via_fgets(FILE *fp
)
991 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
992 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
993 * to fill this much of the buffer with a known value in order to figure out
994 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
995 * than "most" lines, we waste time filling unused buffer slots. 100 is
996 * surely adequate for most peoples' email archives, chewing over source code,
997 * etc -- "regular old text files".
998 * MAXBUFSIZE is the maximum line length that lets us get away with the less
999 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
1000 * cautions about boosting that. 300 was chosen because the worst real-life
1001 * text-crunching job reported on Python-Dev was a mail-log crawler where over
1002 * half the lines were 254 chars.
1004 #define INITBUFSIZE 100
1005 #define MAXBUFSIZE 300
1007 char buf
[MAXBUFSIZE
];
1008 PyObject
* v
; /* the string object result */
1009 char* pvfree
; /* address of next free slot */
1010 char* pvend
; /* address one beyond last free slot */
1011 size_t nfree
; /* # of free buffer slots; pvend-pvfree */
1012 size_t total_v_size
; /* total # of slots in buffer */
1013 size_t increment
; /* amount to increment the buffer */
1016 /* Optimize for normal case: avoid _PyString_Resize if at all
1017 * possible via first reading into stack buffer "buf".
1019 total_v_size
= INITBUFSIZE
; /* start small and pray */
1022 Py_BEGIN_ALLOW_THREADS
1023 pvend
= buf
+ total_v_size
;
1024 nfree
= pvend
- pvfree
;
1025 memset(pvfree
, '\n', nfree
);
1026 assert(nfree
< INT_MAX
); /* Should be atmost MAXBUFSIZE */
1027 p
= fgets(pvfree
, (int)nfree
, fp
);
1028 Py_END_ALLOW_THREADS
1032 if (PyErr_CheckSignals())
1034 v
= PyString_FromStringAndSize(buf
, pvfree
- buf
);
1037 /* fgets read *something* */
1038 p
= memchr(pvfree
, '\n', nfree
);
1040 /* Did the \n come from fgets or from us?
1041 * Since fgets stops at the first \n, and then writes
1042 * \0, if it's from fgets a \0 must be next. But if
1043 * that's so, it could not have come from us, since
1044 * the \n's we filled the buffer with have only more
1045 * \n's to the right.
1047 if (p
+1 < pvend
&& *(p
+1) == '\0') {
1048 /* It's from fgets: we win! In particular,
1049 * we haven't done any mallocs yet, and can
1050 * build the final result on the first try.
1052 ++p
; /* include \n from fgets */
1055 /* Must be from us: fgets didn't fill the
1056 * buffer and didn't find a newline, so it
1057 * must be the last and newline-free line of
1060 assert(p
> pvfree
&& *(p
-1) == '\0');
1061 --p
; /* don't include \0 from fgets */
1063 v
= PyString_FromStringAndSize(buf
, p
- buf
);
1066 /* yuck: fgets overwrote all the newlines, i.e. the entire
1067 * buffer. So this line isn't over yet, or maybe it is but
1068 * we're exactly at EOF. If we haven't already, try using the
1069 * rest of the stack buffer.
1071 assert(*(pvend
-1) == '\0');
1072 if (pvfree
== buf
) {
1073 pvfree
= pvend
- 1; /* overwrite trailing null */
1074 total_v_size
= MAXBUFSIZE
;
1080 /* The stack buffer isn't big enough; malloc a string object and read
1083 total_v_size
= MAXBUFSIZE
<< 1;
1084 v
= PyString_FromStringAndSize((char*)NULL
, (int)total_v_size
);
1087 /* copy over everything except the last null byte */
1088 memcpy(BUF(v
), buf
, MAXBUFSIZE
-1);
1089 pvfree
= BUF(v
) + MAXBUFSIZE
- 1;
1091 /* Keep reading stuff into v; if it ever ends successfully, break
1092 * after setting p one beyond the end of the line. The code here is
1093 * very much like the code above, except reads into v's buffer; see
1094 * the code above for detailed comments about the logic.
1097 Py_BEGIN_ALLOW_THREADS
1098 pvend
= BUF(v
) + total_v_size
;
1099 nfree
= pvend
- pvfree
;
1100 memset(pvfree
, '\n', nfree
);
1101 assert(nfree
< INT_MAX
);
1102 p
= fgets(pvfree
, (int)nfree
, fp
);
1103 Py_END_ALLOW_THREADS
1107 if (PyErr_CheckSignals()) {
1114 p
= memchr(pvfree
, '\n', nfree
);
1116 if (p
+1 < pvend
&& *(p
+1) == '\0') {
1117 /* \n came from fgets */
1121 /* \n came from us; last line of file, no newline */
1122 assert(p
> pvfree
&& *(p
-1) == '\0');
1126 /* expand buffer and try again */
1127 assert(*(pvend
-1) == '\0');
1128 increment
= total_v_size
>> 2; /* mild exponential growth */
1129 prev_v_size
= total_v_size
;
1130 total_v_size
+= increment
;
1131 /* check for overflow */
1132 if (total_v_size
<= prev_v_size
||
1133 total_v_size
> PY_SSIZE_T_MAX
) {
1134 PyErr_SetString(PyExc_OverflowError
,
1135 "line is longer than a Python string can hold");
1139 if (_PyString_Resize(&v
, (int)total_v_size
) < 0)
1141 /* overwrite the trailing null byte */
1142 pvfree
= BUF(v
) + (prev_v_size
- 1);
1144 if (BUF(v
) + total_v_size
!= p
)
1145 _PyString_Resize(&v
, p
- BUF(v
));
1150 #endif /* ifdef USE_FGETS_IN_GETLINE */
1152 /* Internal routine to get a line.
1153 Size argument interpretation:
1155 <= 0: read arbitrary line
1159 get_line(PyFileObject
*f
, int n
)
1164 size_t total_v_size
; /* total # of slots in buffer */
1165 size_t used_v_size
; /* # used slots in buffer */
1166 size_t increment
; /* amount to increment the buffer */
1168 int newlinetypes
= f
->f_newlinetypes
;
1169 int skipnextlf
= f
->f_skipnextlf
;
1170 int univ_newline
= f
->f_univ_newline
;
1172 #if defined(USE_FGETS_IN_GETLINE)
1173 if (n
<= 0 && !univ_newline
)
1174 return getline_via_fgets(fp
);
1176 total_v_size
= n
> 0 ? n
: 100;
1177 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
1181 end
= buf
+ total_v_size
;
1184 Py_BEGIN_ALLOW_THREADS
1187 c
= 'x'; /* Shut up gcc warning */
1188 while ( buf
!= end
&& (c
= GETC(fp
)) != EOF
) {
1192 /* Seeing a \n here with
1193 * skipnextlf true means we
1196 newlinetypes
|= NEWLINE_CRLF
;
1198 if (c
== EOF
) break;
1200 newlinetypes
|= NEWLINE_CR
;
1206 } else if ( c
== '\n')
1207 newlinetypes
|= NEWLINE_LF
;
1209 if (c
== '\n') break;
1211 if ( c
== EOF
&& skipnextlf
)
1212 newlinetypes
|= NEWLINE_CR
;
1213 } else /* If not universal newlines use the normal loop */
1214 while ((c
= GETC(fp
)) != EOF
&&
1215 (*buf
++ = c
) != '\n' &&
1219 Py_END_ALLOW_THREADS
1220 f
->f_newlinetypes
= newlinetypes
;
1221 f
->f_skipnextlf
= skipnextlf
;
1226 PyErr_SetFromErrno(PyExc_IOError
);
1232 if (PyErr_CheckSignals()) {
1238 /* Must be because buf == end */
1241 used_v_size
= total_v_size
;
1242 increment
= total_v_size
>> 2; /* mild exponential growth */
1243 total_v_size
+= increment
;
1244 if (total_v_size
> PY_SSIZE_T_MAX
) {
1245 PyErr_SetString(PyExc_OverflowError
,
1246 "line is longer than a Python string can hold");
1250 if (_PyString_Resize(&v
, total_v_size
) < 0)
1252 buf
= BUF(v
) + used_v_size
;
1253 end
= BUF(v
) + total_v_size
;
1256 used_v_size
= buf
- BUF(v
);
1257 if (used_v_size
!= total_v_size
)
1258 _PyString_Resize(&v
, used_v_size
);
1262 /* External C interface */
1265 PyFile_GetLine(PyObject
*f
, int n
)
1270 PyErr_BadInternalCall();
1274 if (PyFile_Check(f
)) {
1275 PyFileObject
*fo
= (PyFileObject
*)f
;
1276 if (fo
->f_fp
== NULL
)
1277 return err_closed();
1278 /* refuse to mix with f.next() */
1279 if (fo
->f_buf
!= NULL
&&
1280 (fo
->f_bufend
- fo
->f_bufptr
) > 0 &&
1281 fo
->f_buf
[0] != '\0')
1282 return err_iterbuffered();
1283 result
= get_line(fo
, n
);
1289 reader
= PyObject_GetAttrString(f
, "readline");
1293 args
= PyTuple_New(0);
1295 args
= Py_BuildValue("(i)", n
);
1300 result
= PyEval_CallObject(reader
, args
);
1303 if (result
!= NULL
&& !PyString_Check(result
) &&
1304 !PyUnicode_Check(result
)) {
1307 PyErr_SetString(PyExc_TypeError
,
1308 "object.readline() returned non-string");
1312 if (n
< 0 && result
!= NULL
&& PyString_Check(result
)) {
1313 char *s
= PyString_AS_STRING(result
);
1314 Py_ssize_t len
= PyString_GET_SIZE(result
);
1318 PyErr_SetString(PyExc_EOFError
,
1319 "EOF when reading a line");
1321 else if (s
[len
-1] == '\n') {
1322 if (result
->ob_refcnt
== 1)
1323 _PyString_Resize(&result
, len
-1);
1326 v
= PyString_FromStringAndSize(s
, len
-1);
1332 #ifdef Py_USING_UNICODE
1333 if (n
< 0 && result
!= NULL
&& PyUnicode_Check(result
)) {
1334 Py_UNICODE
*s
= PyUnicode_AS_UNICODE(result
);
1335 Py_ssize_t len
= PyUnicode_GET_SIZE(result
);
1339 PyErr_SetString(PyExc_EOFError
,
1340 "EOF when reading a line");
1342 else if (s
[len
-1] == '\n') {
1343 if (result
->ob_refcnt
== 1)
1344 PyUnicode_Resize(&result
, len
-1);
1347 v
= PyUnicode_FromUnicode(s
, len
-1);
1360 file_readline(PyFileObject
*f
, PyObject
*args
)
1364 if (f
->f_fp
== NULL
)
1365 return err_closed();
1366 /* refuse to mix with f.next() */
1367 if (f
->f_buf
!= NULL
&&
1368 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
1369 f
->f_buf
[0] != '\0')
1370 return err_iterbuffered();
1371 if (!PyArg_ParseTuple(args
, "|i:readline", &n
))
1374 return PyString_FromString("");
1377 return get_line(f
, n
);
1381 file_readlines(PyFileObject
*f
, PyObject
*args
)
1386 char small_buffer
[SMALLCHUNK
];
1387 char *buffer
= small_buffer
;
1388 size_t buffersize
= SMALLCHUNK
;
1389 PyObject
*big_buffer
= NULL
;
1392 size_t totalread
= 0;
1397 if (f
->f_fp
== NULL
)
1398 return err_closed();
1399 /* refuse to mix with f.next() */
1400 if (f
->f_buf
!= NULL
&&
1401 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
1402 f
->f_buf
[0] != '\0')
1403 return err_iterbuffered();
1404 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
1406 if ((list
= PyList_New(0)) == NULL
)
1412 Py_BEGIN_ALLOW_THREADS
1414 nread
= Py_UniversalNewlineFread(buffer
+nfilled
,
1415 buffersize
-nfilled
, f
->f_fp
, (PyObject
*)f
);
1416 Py_END_ALLOW_THREADS
1417 shortread
= (nread
< buffersize
-nfilled
);
1421 if (!ferror(f
->f_fp
))
1423 PyErr_SetFromErrno(PyExc_IOError
);
1431 p
= (char *)memchr(buffer
+nfilled
, '\n', nread
);
1433 /* Need a larger buffer to fit this line */
1436 if (buffersize
> PY_SSIZE_T_MAX
) {
1437 PyErr_SetString(PyExc_OverflowError
,
1438 "line is longer than a Python string can hold");
1441 if (big_buffer
== NULL
) {
1442 /* Create the big buffer */
1443 big_buffer
= PyString_FromStringAndSize(
1445 if (big_buffer
== NULL
)
1447 buffer
= PyString_AS_STRING(big_buffer
);
1448 memcpy(buffer
, small_buffer
, nfilled
);
1451 /* Grow the big buffer */
1452 if ( _PyString_Resize(&big_buffer
, buffersize
) < 0 )
1454 buffer
= PyString_AS_STRING(big_buffer
);
1458 end
= buffer
+nfilled
+nread
;
1461 /* Process complete lines */
1463 line
= PyString_FromStringAndSize(q
, p
-q
);
1466 err
= PyList_Append(list
, line
);
1471 p
= (char *)memchr(q
, '\n', end
-q
);
1472 } while (p
!= NULL
);
1473 /* Move the remaining incomplete line to the start */
1475 memmove(buffer
, q
, nfilled
);
1477 if (totalread
>= (size_t)sizehint
)
1481 /* Partial last line */
1482 line
= PyString_FromStringAndSize(buffer
, nfilled
);
1486 /* Need to complete the last line */
1487 PyObject
*rest
= get_line(f
, 0);
1492 PyString_Concat(&line
, rest
);
1497 err
= PyList_Append(list
, line
);
1503 Py_XDECREF(big_buffer
);
1508 file_write(PyFileObject
*f
, PyObject
*args
)
1512 if (f
->f_fp
== NULL
)
1513 return err_closed();
1514 if (!PyArg_ParseTuple(args
, f
->f_binary
? "s#" : "t#", &s
, &n
))
1517 Py_BEGIN_ALLOW_THREADS
1519 n2
= fwrite(s
, 1, n
, f
->f_fp
);
1520 Py_END_ALLOW_THREADS
1522 PyErr_SetFromErrno(PyExc_IOError
);
1531 file_writelines(PyFileObject
*f
, PyObject
*seq
)
1533 #define CHUNKSIZE 1000
1534 PyObject
*list
, *line
;
1535 PyObject
*it
; /* iter(seq) */
1538 Py_ssize_t i
, j
, nwritten
, len
;
1540 assert(seq
!= NULL
);
1541 if (f
->f_fp
== NULL
)
1542 return err_closed();
1546 islist
= PyList_Check(seq
);
1550 it
= PyObject_GetIter(seq
);
1552 PyErr_SetString(PyExc_TypeError
,
1553 "writelines() requires an iterable argument");
1556 /* From here on, fail by going to error, to reclaim "it". */
1557 list
= PyList_New(CHUNKSIZE
);
1562 /* Strategy: slurp CHUNKSIZE lines into a private list,
1563 checking that they are all strings, then write that list
1564 without holding the interpreter lock, then come back for more. */
1565 for (index
= 0; ; index
+= CHUNKSIZE
) {
1568 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
1571 j
= PyList_GET_SIZE(list
);
1574 for (j
= 0; j
< CHUNKSIZE
; j
++) {
1575 line
= PyIter_Next(it
);
1577 if (PyErr_Occurred())
1581 PyList_SetItem(list
, j
, line
);
1587 /* Check that all entries are indeed strings. If not,
1588 apply the same rules as for file.write() and
1589 convert the results to strings. This is slow, but
1590 seems to be the only way since all conversion APIs
1591 could potentially execute Python code. */
1592 for (i
= 0; i
< j
; i
++) {
1593 PyObject
*v
= PyList_GET_ITEM(list
, i
);
1594 if (!PyString_Check(v
)) {
1596 if (((f
->f_binary
&&
1597 PyObject_AsReadBuffer(v
,
1598 (const void**)&buffer
,
1600 PyObject_AsCharBuffer(v
,
1603 PyErr_SetString(PyExc_TypeError
,
1604 "writelines() argument must be a sequence of strings");
1607 line
= PyString_FromStringAndSize(buffer
,
1612 PyList_SET_ITEM(list
, i
, line
);
1616 /* Since we are releasing the global lock, the
1617 following code may *not* execute Python code. */
1618 Py_BEGIN_ALLOW_THREADS
1621 for (i
= 0; i
< j
; i
++) {
1622 line
= PyList_GET_ITEM(list
, i
);
1623 len
= PyString_GET_SIZE(line
);
1624 nwritten
= fwrite(PyString_AS_STRING(line
),
1626 if (nwritten
!= len
) {
1628 PyErr_SetFromErrno(PyExc_IOError
);
1633 Py_END_ALLOW_THREADS
1649 file_self(PyFileObject
*f
)
1651 if (f
->f_fp
== NULL
)
1652 return err_closed();
1654 return (PyObject
*)f
;
1658 file_exit(PyObject
*f
, PyObject
*args
)
1660 PyObject
*ret
= PyObject_CallMethod(f
, "close", NULL
);
1662 /* If error occurred, pass through */
1665 /* We cannot return the result of close since a true
1666 * value will be interpreted as "yes, swallow the
1667 * exception if one was raised inside the with block". */
1671 PyDoc_STRVAR(readline_doc
,
1672 "readline([size]) -> next line from the file, as a string.\n"
1674 "Retain newline. A non-negative size argument limits the maximum\n"
1675 "number of bytes to return (an incomplete line may be returned then).\n"
1676 "Return an empty string at EOF.");
1678 PyDoc_STRVAR(read_doc
,
1679 "read([size]) -> read at most size bytes, returned as a string.\n"
1681 "If the size argument is negative or omitted, read until EOF is reached.\n"
1682 "Notice that when in non-blocking mode, less data than what was requested\n"
1683 "may be returned, even if no size parameter was given.");
1685 PyDoc_STRVAR(write_doc
,
1686 "write(str) -> None. Write string str to file.\n"
1688 "Note that due to buffering, flush() or close() may be needed before\n"
1689 "the file on disk reflects the data written.");
1691 PyDoc_STRVAR(fileno_doc
,
1692 "fileno() -> integer \"file descriptor\".\n"
1694 "This is needed for lower-level file interfaces, such os.read().");
1696 PyDoc_STRVAR(seek_doc
,
1697 "seek(offset[, whence]) -> None. Move to new file position.\n"
1699 "Argument offset is a byte count. Optional argument whence defaults to\n"
1700 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1701 "(move relative to current position, positive or negative), and 2 (move\n"
1702 "relative to end of file, usually negative, although many platforms allow\n"
1703 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1704 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1705 "undefined behavior."
1707 "Note that not all file objects are seekable.");
1709 #ifdef HAVE_FTRUNCATE
1710 PyDoc_STRVAR(truncate_doc
,
1711 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1713 "Size defaults to the current file position, as returned by tell().");
1716 PyDoc_STRVAR(tell_doc
,
1717 "tell() -> current file position, an integer (may be a long integer).");
1719 PyDoc_STRVAR(readinto_doc
,
1720 "readinto() -> Undocumented. Don't use this; it may go away.");
1722 PyDoc_STRVAR(readlines_doc
,
1723 "readlines([size]) -> list of strings, each a line from the file.\n"
1725 "Call readline() repeatedly and return a list of the lines so read.\n"
1726 "The optional size argument, if given, is an approximate bound on the\n"
1727 "total number of bytes in the lines returned.");
1729 PyDoc_STRVAR(xreadlines_doc
,
1730 "xreadlines() -> returns self.\n"
1732 "For backward compatibility. File objects now include the performance\n"
1733 "optimizations previously implemented in the xreadlines module.");
1735 PyDoc_STRVAR(writelines_doc
,
1736 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1738 "Note that newlines are not added. The sequence can be any iterable object\n"
1739 "producing strings. This is equivalent to calling write() for each string.");
1741 PyDoc_STRVAR(flush_doc
,
1742 "flush() -> None. Flush the internal I/O buffer.");
1744 PyDoc_STRVAR(close_doc
,
1745 "close() -> None or (perhaps) an integer. Close the file.\n"
1747 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1748 "further I/O operations. close() may be called more than once without\n"
1749 "error. Some kinds of file objects (for example, opened by popen())\n"
1750 "may return an exit status upon closing.");
1752 PyDoc_STRVAR(isatty_doc
,
1753 "isatty() -> true or false. True if the file is connected to a tty device.");
1755 PyDoc_STRVAR(enter_doc
,
1756 "__enter__() -> self.");
1758 PyDoc_STRVAR(exit_doc
,
1759 "__exit__(*excinfo) -> None. Closes the file.");
1761 static PyMethodDef file_methods
[] = {
1762 {"readline", (PyCFunction
)file_readline
, METH_VARARGS
, readline_doc
},
1763 {"read", (PyCFunction
)file_read
, METH_VARARGS
, read_doc
},
1764 {"write", (PyCFunction
)file_write
, METH_VARARGS
, write_doc
},
1765 {"fileno", (PyCFunction
)file_fileno
, METH_NOARGS
, fileno_doc
},
1766 {"seek", (PyCFunction
)file_seek
, METH_VARARGS
, seek_doc
},
1767 #ifdef HAVE_FTRUNCATE
1768 {"truncate", (PyCFunction
)file_truncate
, METH_VARARGS
, truncate_doc
},
1770 {"tell", (PyCFunction
)file_tell
, METH_NOARGS
, tell_doc
},
1771 {"readinto", (PyCFunction
)file_readinto
, METH_VARARGS
, readinto_doc
},
1772 {"readlines", (PyCFunction
)file_readlines
,METH_VARARGS
, readlines_doc
},
1773 {"xreadlines",(PyCFunction
)file_self
, METH_NOARGS
, xreadlines_doc
},
1774 {"writelines",(PyCFunction
)file_writelines
, METH_O
, writelines_doc
},
1775 {"flush", (PyCFunction
)file_flush
, METH_NOARGS
, flush_doc
},
1776 {"close", (PyCFunction
)file_close
, METH_NOARGS
, close_doc
},
1777 {"isatty", (PyCFunction
)file_isatty
, METH_NOARGS
, isatty_doc
},
1778 {"__enter__", (PyCFunction
)file_self
, METH_NOARGS
, enter_doc
},
1779 {"__exit__", (PyCFunction
)file_exit
, METH_VARARGS
, exit_doc
},
1780 {NULL
, NULL
} /* sentinel */
1783 #define OFF(x) offsetof(PyFileObject, x)
1785 static PyMemberDef file_memberlist
[] = {
1786 {"softspace", T_INT
, OFF(f_softspace
), 0,
1787 "flag indicating that a space needs to be printed; used by print"},
1788 {"mode", T_OBJECT
, OFF(f_mode
), RO
,
1789 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1790 {"name", T_OBJECT
, OFF(f_name
), RO
,
1792 {"encoding", T_OBJECT
, OFF(f_encoding
), RO
,
1794 /* getattr(f, "closed") is implemented without this table */
1795 {NULL
} /* Sentinel */
1799 get_closed(PyFileObject
*f
, void *closure
)
1801 return PyBool_FromLong((long)(f
->f_fp
== 0));
1804 get_newlines(PyFileObject
*f
, void *closure
)
1806 switch (f
->f_newlinetypes
) {
1807 case NEWLINE_UNKNOWN
:
1811 return PyString_FromString("\r");
1813 return PyString_FromString("\n");
1814 case NEWLINE_CR
|NEWLINE_LF
:
1815 return Py_BuildValue("(ss)", "\r", "\n");
1817 return PyString_FromString("\r\n");
1818 case NEWLINE_CR
|NEWLINE_CRLF
:
1819 return Py_BuildValue("(ss)", "\r", "\r\n");
1820 case NEWLINE_LF
|NEWLINE_CRLF
:
1821 return Py_BuildValue("(ss)", "\n", "\r\n");
1822 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1823 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1825 PyErr_Format(PyExc_SystemError
,
1826 "Unknown newlines value 0x%x\n",
1832 static PyGetSetDef file_getsetlist
[] = {
1833 {"closed", (getter
)get_closed
, NULL
, "True if the file is closed"},
1834 {"newlines", (getter
)get_newlines
, NULL
,
1835 "end-of-line convention used in this file"},
1840 drop_readahead(PyFileObject
*f
)
1842 if (f
->f_buf
!= NULL
) {
1843 PyMem_Free(f
->f_buf
);
1848 /* Make sure that file has a readahead buffer with at least one byte
1849 (unless at EOF) and no more than bufsize. Returns negative value on
1850 error, will set MemoryError if bufsize bytes cannot be allocated. */
1852 readahead(PyFileObject
*f
, int bufsize
)
1854 Py_ssize_t chunksize
;
1856 if (f
->f_buf
!= NULL
) {
1857 if( (f
->f_bufend
- f
->f_bufptr
) >= 1)
1862 if ((f
->f_buf
= (char *)PyMem_Malloc(bufsize
)) == NULL
) {
1866 Py_BEGIN_ALLOW_THREADS
1868 chunksize
= Py_UniversalNewlineFread(
1869 f
->f_buf
, bufsize
, f
->f_fp
, (PyObject
*)f
);
1870 Py_END_ALLOW_THREADS
1871 if (chunksize
== 0) {
1872 if (ferror(f
->f_fp
)) {
1873 PyErr_SetFromErrno(PyExc_IOError
);
1879 f
->f_bufptr
= f
->f_buf
;
1880 f
->f_bufend
= f
->f_buf
+ chunksize
;
1884 /* Used by file_iternext. The returned string will start with 'skip'
1885 uninitialized bytes followed by the remainder of the line. Don't be
1886 horrified by the recursive call: maximum recursion depth is limited by
1887 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1889 static PyStringObject
*
1890 readahead_get_line_skip(PyFileObject
*f
, int skip
, int bufsize
)
1897 if (f
->f_buf
== NULL
)
1898 if (readahead(f
, bufsize
) < 0)
1901 len
= f
->f_bufend
- f
->f_bufptr
;
1903 return (PyStringObject
*)
1904 PyString_FromStringAndSize(NULL
, skip
);
1905 bufptr
= (char *)memchr(f
->f_bufptr
, '\n', len
);
1906 if (bufptr
!= NULL
) {
1907 bufptr
++; /* Count the '\n' */
1908 len
= bufptr
- f
->f_bufptr
;
1909 s
= (PyStringObject
*)
1910 PyString_FromStringAndSize(NULL
, skip
+len
);
1913 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
1914 f
->f_bufptr
= bufptr
;
1915 if (bufptr
== f
->f_bufend
)
1918 bufptr
= f
->f_bufptr
;
1920 f
->f_buf
= NULL
; /* Force new readahead buffer */
1921 assert(skip
+len
< INT_MAX
);
1922 s
= readahead_get_line_skip(
1923 f
, (int)(skip
+len
), bufsize
+ (bufsize
>>2) );
1928 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
1934 /* A larger buffer size may actually decrease performance. */
1935 #define READAHEAD_BUFSIZE 8192
1938 file_iternext(PyFileObject
*f
)
1942 if (f
->f_fp
== NULL
)
1943 return err_closed();
1945 l
= readahead_get_line_skip(f
, 0, READAHEAD_BUFSIZE
);
1946 if (l
== NULL
|| PyString_GET_SIZE(l
) == 0) {
1950 return (PyObject
*)l
;
1955 file_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1958 static PyObject
*not_yet_string
;
1960 assert(type
!= NULL
&& type
->tp_alloc
!= NULL
);
1962 if (not_yet_string
== NULL
) {
1963 not_yet_string
= PyString_InternFromString("<uninitialized file>");
1964 if (not_yet_string
== NULL
)
1968 self
= type
->tp_alloc(type
, 0);
1970 /* Always fill in the name and mode, so that nobody else
1971 needs to special-case NULLs there. */
1972 Py_INCREF(not_yet_string
);
1973 ((PyFileObject
*)self
)->f_name
= not_yet_string
;
1974 Py_INCREF(not_yet_string
);
1975 ((PyFileObject
*)self
)->f_mode
= not_yet_string
;
1977 ((PyFileObject
*)self
)->f_encoding
= Py_None
;
1978 ((PyFileObject
*)self
)->weakreflist
= NULL
;
1984 file_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1986 PyFileObject
*foself
= (PyFileObject
*)self
;
1988 static char *kwlist
[] = {"name", "mode", "buffering", 0};
1992 int wideargument
= 0;
1994 assert(PyFile_Check(self
));
1995 if (foself
->f_fp
!= NULL
) {
1996 /* Have to close the existing file first. */
1997 PyObject
*closeresult
= file_close(foself
);
1998 if (closeresult
== NULL
)
2000 Py_DECREF(closeresult
);
2003 #ifdef Py_WIN_WIDE_FILENAMES
2004 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
2006 if (PyArg_ParseTupleAndKeywords(args
, kwds
, "U|si:file",
2007 kwlist
, &po
, &mode
, &bufsize
)) {
2009 if (fill_file_fields(foself
, NULL
, po
, mode
,
2013 /* Drop the argument parsing error as narrow
2014 strings are also valid. */
2020 if (!wideargument
) {
2023 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "et|si:file", kwlist
,
2024 Py_FileSystemDefaultEncoding
,
2029 /* We parse again to get the name as a PyObject */
2030 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|si:file",
2031 kwlist
, &o_name
, &mode
,
2035 if (fill_file_fields(foself
, NULL
, o_name
, mode
,
2039 if (open_the_file(foself
, name
, mode
) == NULL
)
2041 foself
->f_setbuf
= NULL
;
2042 PyFile_SetBufSize(self
, bufsize
);
2049 PyMem_Free(name
); /* free the encoded string */
2053 PyDoc_VAR(file_doc
) =
2055 "file(name[, mode[, buffering]]) -> file object\n"
2057 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2058 "writing or appending. The file will be created if it doesn't exist\n"
2059 "when opened for writing or appending; it will be truncated when\n"
2060 "opened for writing. Add a 'b' to the mode for binary files.\n"
2061 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2062 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2063 "buffered, and larger numbers specify the buffer size. The preferred way\n"
2064 "to open a file is with the builtin open() function.\n"
2067 "Add a 'U' to mode to open the file for input with universal newline\n"
2068 "support. Any line ending in the input file will be seen as a '\\n'\n"
2069 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
2070 "the value for this attribute is one of None (no newline read yet),\n"
2071 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2073 "'U' cannot be combined with 'w' or '+' mode.\n"
2076 PyTypeObject PyFile_Type
= {
2077 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
2079 sizeof(PyFileObject
),
2081 (destructor
)file_dealloc
, /* tp_dealloc */
2086 (reprfunc
)file_repr
, /* tp_repr */
2087 0, /* tp_as_number */
2088 0, /* tp_as_sequence */
2089 0, /* tp_as_mapping */
2093 PyObject_GenericGetAttr
, /* tp_getattro */
2094 /* softspace is writable: we must supply tp_setattro */
2095 PyObject_GenericSetAttr
, /* tp_setattro */
2096 0, /* tp_as_buffer */
2097 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_HAVE_WEAKREFS
, /* tp_flags */
2098 file_doc
, /* tp_doc */
2099 0, /* tp_traverse */
2101 0, /* tp_richcompare */
2102 offsetof(PyFileObject
, weakreflist
), /* tp_weaklistoffset */
2103 (getiterfunc
)file_self
, /* tp_iter */
2104 (iternextfunc
)file_iternext
, /* tp_iternext */
2105 file_methods
, /* tp_methods */
2106 file_memberlist
, /* tp_members */
2107 file_getsetlist
, /* tp_getset */
2110 0, /* tp_descr_get */
2111 0, /* tp_descr_set */
2112 0, /* tp_dictoffset */
2113 file_init
, /* tp_init */
2114 PyType_GenericAlloc
, /* tp_alloc */
2115 file_new
, /* tp_new */
2116 PyObject_Del
, /* tp_free */
2119 /* Interface for the 'soft space' between print items. */
2122 PyFile_SoftSpace(PyObject
*f
, int newflag
)
2128 else if (PyFile_Check(f
)) {
2129 oldflag
= ((PyFileObject
*)f
)->f_softspace
;
2130 ((PyFileObject
*)f
)->f_softspace
= newflag
;
2134 v
= PyObject_GetAttrString(f
, "softspace");
2139 oldflag
= PyInt_AsLong(v
);
2140 assert(oldflag
< INT_MAX
);
2143 v
= PyInt_FromLong((long)newflag
);
2147 if (PyObject_SetAttrString(f
, "softspace", v
) != 0)
2152 return (int)oldflag
;
2155 /* Interfaces to write objects/strings to file-like objects */
2158 PyFile_WriteObject(PyObject
*v
, PyObject
*f
, int flags
)
2160 PyObject
*writer
, *value
, *args
, *result
;
2162 PyErr_SetString(PyExc_TypeError
, "writeobject with NULL file");
2165 else if (PyFile_Check(f
)) {
2166 FILE *fp
= PyFile_AsFile(f
);
2167 #ifdef Py_USING_UNICODE
2168 PyObject
*enc
= ((PyFileObject
*)f
)->f_encoding
;
2175 #ifdef Py_USING_UNICODE
2176 if ((flags
& Py_PRINT_RAW
) &&
2177 PyUnicode_Check(v
) && enc
!= Py_None
) {
2178 char *cenc
= PyString_AS_STRING(enc
);
2179 value
= PyUnicode_AsEncodedString(v
, cenc
, "strict");
2186 result
= PyObject_Print(value
, fp
, flags
);
2190 return PyObject_Print(v
, fp
, flags
);
2193 writer
= PyObject_GetAttrString(f
, "write");
2196 if (flags
& Py_PRINT_RAW
) {
2197 if (PyUnicode_Check(v
)) {
2201 value
= PyObject_Str(v
);
2204 value
= PyObject_Repr(v
);
2205 if (value
== NULL
) {
2209 args
= PyTuple_Pack(1, value
);
2215 result
= PyEval_CallObject(writer
, args
);
2226 PyFile_WriteString(const char *s
, PyObject
*f
)
2229 /* Should be caused by a pre-existing error */
2230 if (!PyErr_Occurred())
2231 PyErr_SetString(PyExc_SystemError
,
2232 "null file for PyFile_WriteString");
2235 else if (PyFile_Check(f
)) {
2236 FILE *fp
= PyFile_AsFile(f
);
2241 Py_BEGIN_ALLOW_THREADS
2243 Py_END_ALLOW_THREADS
2246 else if (!PyErr_Occurred()) {
2247 PyObject
*v
= PyString_FromString(s
);
2251 err
= PyFile_WriteObject(v
, f
, Py_PRINT_RAW
);
2259 /* Try to get a file-descriptor from a Python object. If the object
2260 is an integer or long integer, its value is returned. If not, the
2261 object's fileno() method is called if it exists; the method must return
2262 an integer or long integer, which is returned as the file descriptor value.
2263 -1 is returned on failure.
2266 int PyObject_AsFileDescriptor(PyObject
*o
)
2271 if (PyInt_Check(o
)) {
2272 fd
= PyInt_AsLong(o
);
2274 else if (PyLong_Check(o
)) {
2275 fd
= PyLong_AsLong(o
);
2277 else if ((meth
= PyObject_GetAttrString(o
, "fileno")) != NULL
)
2279 PyObject
*fno
= PyEval_CallObject(meth
, NULL
);
2284 if (PyInt_Check(fno
)) {
2285 fd
= PyInt_AsLong(fno
);
2288 else if (PyLong_Check(fno
)) {
2289 fd
= PyLong_AsLong(fno
);
2293 PyErr_SetString(PyExc_TypeError
,
2294 "fileno() returned a non-integer");
2300 PyErr_SetString(PyExc_TypeError
,
2301 "argument must be an int, or have a fileno() method.");
2306 PyErr_Format(PyExc_ValueError
,
2307 "file descriptor cannot be a negative integer (%i)",
2314 /* From here on we need access to the real fgets and fread */
2319 ** Py_UniversalNewlineFgets is an fgets variation that understands
2320 ** all of \r, \n and \r\n conventions.
2321 ** The stream should be opened in binary mode.
2322 ** If fobj is NULL the routine always does newline conversion, and
2323 ** it may peek one char ahead to gobble the second char in \r\n.
2324 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2325 ** is no readahead but in stead a flag is used to skip a following
2326 ** \n on the next read. Also, if the file is open in binary mode
2327 ** the whole conversion is skipped. Finally, the routine keeps track of
2328 ** the different types of newlines seen.
2329 ** Note that we need no error handling: fgets() treats error and eof
2333 Py_UniversalNewlineFgets(char *buf
, int n
, FILE *stream
, PyObject
*fobj
)
2337 int newlinetypes
= 0;
2339 int univ_newline
= 1;
2342 if (!PyFile_Check(fobj
)) {
2343 errno
= ENXIO
; /* What can you do... */
2346 univ_newline
= ((PyFileObject
*)fobj
)->f_univ_newline
;
2347 if ( !univ_newline
)
2348 return fgets(buf
, n
, stream
);
2349 newlinetypes
= ((PyFileObject
*)fobj
)->f_newlinetypes
;
2350 skipnextlf
= ((PyFileObject
*)fobj
)->f_skipnextlf
;
2353 c
= 'x'; /* Shut up gcc warning */
2354 while (--n
> 0 && (c
= GETC(stream
)) != EOF
) {
2358 /* Seeing a \n here with skipnextlf true
2359 ** means we saw a \r before.
2361 newlinetypes
|= NEWLINE_CRLF
;
2363 if (c
== EOF
) break;
2366 ** Note that c == EOF also brings us here,
2367 ** so we're okay if the last char in the file
2370 newlinetypes
|= NEWLINE_CR
;
2374 /* A \r is translated into a \n, and we skip
2375 ** an adjacent \n, if any. We don't set the
2376 ** newlinetypes flag until we've seen the next char.
2380 } else if ( c
== '\n') {
2381 newlinetypes
|= NEWLINE_LF
;
2384 if (c
== '\n') break;
2386 if ( c
== EOF
&& skipnextlf
)
2387 newlinetypes
|= NEWLINE_CR
;
2388 FUNLOCKFILE(stream
);
2391 ((PyFileObject
*)fobj
)->f_newlinetypes
= newlinetypes
;
2392 ((PyFileObject
*)fobj
)->f_skipnextlf
= skipnextlf
;
2393 } else if ( skipnextlf
) {
2394 /* If we have no file object we cannot save the
2395 ** skipnextlf flag. We have to readahead, which
2396 ** will cause a pause if we're reading from an
2397 ** interactive stream, but that is very unlikely
2398 ** unless we're doing something silly like
2399 ** execfile("/dev/tty").
2411 ** Py_UniversalNewlineFread is an fread variation that understands
2412 ** all of \r, \n and \r\n conventions.
2413 ** The stream should be opened in binary mode.
2414 ** fobj must be a PyFileObject. In this case there
2415 ** is no readahead but in stead a flag is used to skip a following
2416 ** \n on the next read. Also, if the file is open in binary mode
2417 ** the whole conversion is skipped. Finally, the routine keeps track of
2418 ** the different types of newlines seen.
2421 Py_UniversalNewlineFread(char *buf
, size_t n
,
2422 FILE *stream
, PyObject
*fobj
)
2425 PyFileObject
*f
= (PyFileObject
*)fobj
;
2426 int newlinetypes
, skipnextlf
;
2428 assert(buf
!= NULL
);
2429 assert(stream
!= NULL
);
2431 if (!fobj
|| !PyFile_Check(fobj
)) {
2432 errno
= ENXIO
; /* What can you do... */
2435 if (!f
->f_univ_newline
)
2436 return fread(buf
, 1, n
, stream
);
2437 newlinetypes
= f
->f_newlinetypes
;
2438 skipnextlf
= f
->f_skipnextlf
;
2439 /* Invariant: n is the number of bytes remaining to be filled
2447 nread
= fread(dst
, 1, n
, stream
);
2452 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
2453 shortread
= n
!= 0; /* true iff EOF or error */
2457 /* Save as LF and set flag to skip next LF. */
2461 else if (skipnextlf
&& c
== '\n') {
2462 /* Skip LF, and remember we saw CR LF. */
2464 newlinetypes
|= NEWLINE_CRLF
;
2468 /* Normal char to be stored in buffer. Also
2469 * update the newlinetypes flag if either this
2470 * is an LF or the previous char was a CR.
2473 newlinetypes
|= NEWLINE_LF
;
2474 else if (skipnextlf
)
2475 newlinetypes
|= NEWLINE_CR
;
2481 /* If this is EOF, update type flags. */
2482 if (skipnextlf
&& feof(stream
))
2483 newlinetypes
|= NEWLINE_CR
;
2487 f
->f_newlinetypes
= newlinetypes
;
2488 f
->f_skipnextlf
= skipnextlf
;