1 /* File object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "structmember.h"
7 #ifdef HAVE_SYS_TYPES_H
9 #endif /* HAVE_SYS_TYPES_H */
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
20 /* Need GetVersion to see if on NT so safe to use _wfopen */
21 #define WIN32_LEAN_AND_MEAN
25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
31 #ifndef DONT_HAVE_ERRNO_H
35 #ifdef HAVE_GETC_UNLOCKED
36 #define GETC(f) getc_unlocked(f)
37 #define FLOCKFILE(f) flockfile(f)
38 #define FUNLOCKFILE(f) funlockfile(f)
40 #define GETC(f) getc(f)
42 #define FUNLOCKFILE(f)
45 /* Bits in f_newlinetypes */
46 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47 #define NEWLINE_CR 1 /* \r newline seen */
48 #define NEWLINE_LF 2 /* \n newline seen */
49 #define NEWLINE_CRLF 4 /* \r\n newline seen */
56 PyFile_AsFile(PyObject
*f
)
58 if (f
== NULL
|| !PyFile_Check(f
))
61 return ((PyFileObject
*)f
)->f_fp
;
65 PyFile_Name(PyObject
*f
)
67 if (f
== NULL
|| !PyFile_Check(f
))
70 return ((PyFileObject
*)f
)->f_name
;
73 /* On Unix, fopen will succeed for directories.
74 In Python, there should be no file objects referring to
75 directories, so we need a check. */
78 dircheck(PyFileObject
* f
)
80 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
84 if (fstat(fileno(f
->f_fp
), &buf
) == 0 &&
85 S_ISDIR(buf
.st_mode
)) {
87 char *msg
= strerror(EISDIR
);
89 char *msg
= "Is a directory";
91 PyObject
*exc
= PyObject_CallFunction(PyExc_IOError
, "(is)",
93 PyErr_SetObject(PyExc_IOError
, exc
);
103 fill_file_fields(PyFileObject
*f
, FILE *fp
, PyObject
*name
, char *mode
,
104 int (*close
)(FILE *))
106 assert(name
!= NULL
);
108 assert(PyFile_Check(f
));
109 assert(f
->f_fp
== NULL
);
111 Py_DECREF(f
->f_name
);
112 Py_DECREF(f
->f_mode
);
113 Py_DECREF(f
->f_encoding
);
118 f
->f_mode
= PyString_FromString(mode
);
122 f
->f_binary
= strchr(mode
,'b') != NULL
;
124 f
->f_univ_newline
= (strchr(mode
, 'U') != NULL
);
125 f
->f_newlinetypes
= NEWLINE_UNKNOWN
;
128 f
->f_encoding
= Py_None
;
130 if (f
->f_mode
== NULL
)
134 return (PyObject
*) f
;
137 /* check for known incorrect mode strings - problem is, platforms are
138 free to accept any mode characters they like and are supposed to
139 ignore stuff they don't understand... write or append mode with
140 universal newline support is expressly forbidden by PEP 278.
141 Additionally, remove the 'U' from the mode string as platforms
142 won't know what it is. Non-zero return signals an exception */
144 _PyFile_SanitizeMode(char *mode
)
147 size_t len
= strlen(mode
);
150 PyErr_SetString(PyExc_ValueError
, "empty mode string");
154 upos
= strchr(mode
, 'U');
156 memmove(upos
, upos
+1, len
-(upos
-mode
)); /* incl null char */
158 if (mode
[0] == 'w' || mode
[0] == 'a') {
159 PyErr_Format(PyExc_ValueError
, "universal newline "
160 "mode can only be used with modes "
161 "starting with 'r'");
165 if (mode
[0] != 'r') {
166 memmove(mode
+1, mode
, strlen(mode
)+1);
170 if (!strchr(mode
, 'b')) {
171 memmove(mode
+2, mode
+1, strlen(mode
));
174 } else if (mode
[0] != 'r' && mode
[0] != 'w' && mode
[0] != 'a') {
175 PyErr_Format(PyExc_ValueError
, "mode string must begin with "
176 "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode
);
184 open_the_file(PyFileObject
*f
, char *name
, char *mode
)
188 assert(PyFile_Check(f
));
190 /* windows ignores the passed name in order to support Unicode */
191 assert(f
->f_name
!= NULL
);
193 assert(name
!= NULL
);
195 assert(mode
!= NULL
);
196 assert(f
->f_fp
== NULL
);
198 /* probably need to replace 'U' by 'rb' */
199 newmode
= PyMem_MALLOC(strlen(mode
) + 3);
204 strcpy(newmode
, mode
);
206 if (_PyFile_SanitizeMode(newmode
)) {
211 /* rexec.py can't stop a user from getting the file() constructor --
212 all they have to do is get *any* file object f, and then do
213 type(f). Here we prevent them from doing damage with it. */
214 if (PyEval_GetRestricted()) {
215 PyErr_SetString(PyExc_IOError
,
216 "file() constructor not accessible in restricted mode");
223 if (PyUnicode_Check(f
->f_name
)) {
225 wmode
= PyUnicode_DecodeASCII(newmode
, strlen(newmode
), NULL
);
226 if (f
->f_name
&& wmode
) {
227 Py_BEGIN_ALLOW_THREADS
228 /* PyUnicode_AS_UNICODE OK without thread
229 lock as it is a simple dereference. */
230 f
->f_fp
= _wfopen(PyUnicode_AS_UNICODE(f
->f_name
),
231 PyUnicode_AS_UNICODE(wmode
));
237 if (NULL
== f
->f_fp
&& NULL
!= name
) {
238 Py_BEGIN_ALLOW_THREADS
239 f
->f_fp
= fopen(name
, newmode
);
243 if (f
->f_fp
== NULL
) {
244 #if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
245 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
246 * across all Windows flavors. When it sets EINVAL varies
247 * across Windows flavors, the exact conditions aren't
248 * documented, and the answer lies in the OS's implementation
249 * of Win32's CreateFile function (whose source is secret).
250 * Seems the best we can do is map EINVAL to ENOENT.
251 * Starting with Visual Studio .NET 2005, EINVAL is correctly
252 * set by our CRT error handler (set in exceptions.c.)
254 if (errno
== 0) /* bad mode string */
256 else if (errno
== EINVAL
) /* unknown, but not a mode string */
260 PyErr_Format(PyExc_IOError
, "invalid mode: %s",
263 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError
, f
->f_name
);
272 return (PyObject
*)f
;
276 PyFile_FromFile(FILE *fp
, char *name
, char *mode
, int (*close
)(FILE *))
278 PyFileObject
*f
= (PyFileObject
*)PyFile_Type
.tp_new(&PyFile_Type
,
281 PyObject
*o_name
= PyString_FromString(name
);
284 if (fill_file_fields(f
, fp
, o_name
, mode
, close
) == NULL
) {
290 return (PyObject
*) f
;
294 PyFile_FromString(char *name
, char *mode
)
296 extern int fclose(FILE *);
299 f
= (PyFileObject
*)PyFile_FromFile((FILE *)NULL
, name
, mode
, fclose
);
301 if (open_the_file(f
, name
, mode
) == NULL
) {
306 return (PyObject
*)f
;
310 PyFile_SetBufSize(PyObject
*f
, int bufsize
)
312 PyFileObject
*file
= (PyFileObject
*)f
;
333 if (type
== _IONBF
) {
334 PyMem_Free(file
->f_setbuf
);
335 file
->f_setbuf
= NULL
;
337 file
->f_setbuf
= (char *)PyMem_Realloc(file
->f_setbuf
,
341 setvbuf(file
->f_fp
, file
->f_setbuf
, type
, bufsize
);
342 #else /* !HAVE_SETVBUF */
343 setbuf(file
->f_fp
, file
->f_setbuf
);
344 #endif /* !HAVE_SETVBUF */
348 /* Set the encoding used to output Unicode strings.
349 Returh 1 on success, 0 on failure. */
352 PyFile_SetEncoding(PyObject
*f
, const char *enc
)
354 PyFileObject
*file
= (PyFileObject
*)f
;
355 PyObject
*str
= PyString_FromString(enc
);
357 assert(PyFile_Check(f
));
360 Py_DECREF(file
->f_encoding
);
361 file
->f_encoding
= str
;
368 PyErr_SetString(PyExc_ValueError
, "I/O operation on closed file");
372 /* Refuse regular file I/O if there's data in the iteration-buffer.
373 * Mixing them would cause data to arrive out of order, as the read*
374 * methods don't use the iteration buffer. */
376 err_iterbuffered(void)
378 PyErr_SetString(PyExc_ValueError
,
379 "Mixing iteration and read methods would lose data");
383 static void drop_readahead(PyFileObject
*);
388 file_dealloc(PyFileObject
*f
)
391 if (f
->weakreflist
!= NULL
)
392 PyObject_ClearWeakRefs((PyObject
*) f
);
393 if (f
->f_fp
!= NULL
&& f
->f_close
!= NULL
) {
394 Py_BEGIN_ALLOW_THREADS
395 sts
= (*f
->f_close
)(f
->f_fp
);
399 PySys_WriteStderr("close failed: [Errno %d] %s\n", errno
, strerror(errno
));
401 PySys_WriteStderr("close failed: [Errno %d]\n", errno
);
404 PyMem_Free(f
->f_setbuf
);
405 Py_XDECREF(f
->f_name
);
406 Py_XDECREF(f
->f_mode
);
407 Py_XDECREF(f
->f_encoding
);
409 Py_TYPE(f
)->tp_free((PyObject
*)f
);
413 file_repr(PyFileObject
*f
)
415 if (PyUnicode_Check(f
->f_name
)) {
416 #ifdef Py_USING_UNICODE
417 PyObject
*ret
= NULL
;
418 PyObject
*name
= PyUnicode_AsUnicodeEscapeString(f
->f_name
);
419 const char *name_str
= name
? PyString_AsString(name
) : "?";
420 ret
= PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
421 f
->f_fp
== NULL
? "closed" : "open",
423 PyString_AsString(f
->f_mode
),
429 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
430 f
->f_fp
== NULL
? "closed" : "open",
431 PyString_AsString(f
->f_name
),
432 PyString_AsString(f
->f_mode
),
438 file_close(PyFileObject
*f
)
441 if (f
->f_fp
!= NULL
) {
442 if (f
->f_close
!= NULL
) {
443 Py_BEGIN_ALLOW_THREADS
445 sts
= (*f
->f_close
)(f
->f_fp
);
450 PyMem_Free(f
->f_setbuf
);
453 return PyErr_SetFromErrno(PyExc_IOError
);
455 return PyInt_FromLong((long)sts
);
461 /* Our very own off_t-like type, 64-bit if possible */
462 #if !defined(HAVE_LARGEFILE_SUPPORT)
463 typedef off_t Py_off_t
;
464 #elif SIZEOF_OFF_T >= 8
465 typedef off_t Py_off_t
;
466 #elif SIZEOF_FPOS_T >= 8
467 typedef fpos_t Py_off_t
;
469 #error "Large file support, but neither off_t nor fpos_t is large enough."
473 /* a portable fseek() function
474 return 0 on success, non-zero on failure (with errno set) */
476 _portable_fseek(FILE *fp
, Py_off_t offset
, int whence
)
478 #if !defined(HAVE_LARGEFILE_SUPPORT)
479 return fseek(fp
, offset
, whence
);
480 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
481 return fseeko(fp
, offset
, whence
);
482 #elif defined(HAVE_FSEEK64)
483 return fseek64(fp
, offset
, whence
);
484 #elif defined(__BEOS__)
485 return _fseek(fp
, offset
, whence
);
486 #elif SIZEOF_FPOS_T >= 8
487 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
488 and fgetpos() to implement fseek()*/
494 if (_lseeki64(fileno(fp
), 0, 2) == -1)
497 if (fseek(fp
, 0, SEEK_END
) != 0)
502 if (fgetpos(fp
, &pos
) != 0)
506 /* case SEEK_SET: break; */
508 return fsetpos(fp
, &offset
);
510 #error "Large file support, but no way to fseek."
515 /* a portable ftell() function
516 Return -1 on failure with errno set appropriately, current file
517 position on success */
519 _portable_ftell(FILE* fp
)
521 #if !defined(HAVE_LARGEFILE_SUPPORT)
523 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
525 #elif defined(HAVE_FTELL64)
527 #elif SIZEOF_FPOS_T >= 8
529 if (fgetpos(fp
, &pos
) != 0)
533 #error "Large file support, but no way to ftell."
539 file_seek(PyFileObject
*f
, PyObject
*args
)
544 PyObject
*offobj
, *off_index
;
550 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &whence
))
552 off_index
= PyNumber_Index(offobj
);
554 if (!PyFloat_Check(offobj
))
556 /* Deprecated in 2.6 */
558 if (PyErr_Warn(PyExc_DeprecationWarning
,
559 "integer argument expected, got float"))
564 #if !defined(HAVE_LARGEFILE_SUPPORT)
565 offset
= PyInt_AsLong(off_index
);
567 offset
= PyLong_Check(off_index
) ?
568 PyLong_AsLongLong(off_index
) : PyInt_AsLong(off_index
);
570 Py_DECREF(off_index
);
571 if (PyErr_Occurred())
574 Py_BEGIN_ALLOW_THREADS
576 ret
= _portable_fseek(f
->f_fp
, offset
, whence
);
580 PyErr_SetFromErrno(PyExc_IOError
);
590 #ifdef HAVE_FTRUNCATE
592 file_truncate(PyFileObject
*f
, PyObject
*args
)
595 PyObject
*newsizeobj
= NULL
;
601 if (!PyArg_UnpackTuple(args
, "truncate", 0, 1, &newsizeobj
))
604 /* Get current file position. If the file happens to be open for
605 * update and the last operation was an input operation, C doesn't
606 * define what the later fflush() will do, but we promise truncate()
607 * won't change the current position (and fflush() *does* change it
608 * then at least on Windows). The easiest thing is to capture
609 * current pos now and seek back to it at the end.
611 Py_BEGIN_ALLOW_THREADS
613 initialpos
= _portable_ftell(f
->f_fp
);
615 if (initialpos
== -1)
618 /* Set newsize to current postion if newsizeobj NULL, else to the
621 if (newsizeobj
!= NULL
) {
622 #if !defined(HAVE_LARGEFILE_SUPPORT)
623 newsize
= PyInt_AsLong(newsizeobj
);
625 newsize
= PyLong_Check(newsizeobj
) ?
626 PyLong_AsLongLong(newsizeobj
) :
627 PyInt_AsLong(newsizeobj
);
629 if (PyErr_Occurred())
632 else /* default to current position */
633 newsize
= initialpos
;
635 /* Flush the stream. We're mixing stream-level I/O with lower-level
636 * I/O, and a flush may be necessary to synch both platform views
637 * of the current file state.
639 Py_BEGIN_ALLOW_THREADS
641 ret
= fflush(f
->f_fp
);
647 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
648 so don't even try using it. */
652 /* Have to move current pos to desired endpoint on Windows. */
653 Py_BEGIN_ALLOW_THREADS
655 ret
= _portable_fseek(f
->f_fp
, newsize
, SEEK_SET
) != 0;
660 /* Truncate. Note that this may grow the file! */
661 Py_BEGIN_ALLOW_THREADS
663 hFile
= (HANDLE
)_get_osfhandle(fileno(f
->f_fp
));
664 ret
= hFile
== (HANDLE
)-1;
666 ret
= SetEndOfFile(hFile
) == 0;
675 Py_BEGIN_ALLOW_THREADS
677 ret
= ftruncate(fileno(f
->f_fp
), newsize
);
681 #endif /* !MS_WINDOWS */
683 /* Restore original file position. */
684 Py_BEGIN_ALLOW_THREADS
686 ret
= _portable_fseek(f
->f_fp
, initialpos
, SEEK_SET
) != 0;
695 PyErr_SetFromErrno(PyExc_IOError
);
699 #endif /* HAVE_FTRUNCATE */
702 file_tell(PyFileObject
*f
)
708 Py_BEGIN_ALLOW_THREADS
710 pos
= _portable_ftell(f
->f_fp
);
713 PyErr_SetFromErrno(PyExc_IOError
);
717 if (f
->f_skipnextlf
) {
721 f
->f_newlinetypes
|= NEWLINE_CRLF
;
724 } else if (c
!= EOF
) ungetc(c
, f
->f_fp
);
726 #if !defined(HAVE_LARGEFILE_SUPPORT)
727 return PyInt_FromLong(pos
);
729 return PyLong_FromLongLong(pos
);
734 file_fileno(PyFileObject
*f
)
738 return PyInt_FromLong((long) fileno(f
->f_fp
));
742 file_flush(PyFileObject
*f
)
748 Py_BEGIN_ALLOW_THREADS
750 res
= fflush(f
->f_fp
);
753 PyErr_SetFromErrno(PyExc_IOError
);
762 file_isatty(PyFileObject
*f
)
767 Py_BEGIN_ALLOW_THREADS
768 res
= isatty((int)fileno(f
->f_fp
));
770 return PyBool_FromLong(res
);
775 #define SMALLCHUNK 8192
777 #define SMALLCHUNK BUFSIZ
781 #define BIGCHUNK (512 * 32)
783 #define BIGCHUNK (512 * 1024)
787 new_buffersize(PyFileObject
*f
, size_t currentsize
)
792 if (fstat(fileno(f
->f_fp
), &st
) == 0) {
794 /* The following is not a bug: we really need to call lseek()
795 *and* ftell(). The reason is that some stdio libraries
796 mistakenly flush their buffer when ftell() is called and
797 the lseek() call it makes fails, thereby throwing away
798 data that cannot be recovered in any way. To avoid this,
799 we first test lseek(), and only call ftell() if lseek()
800 works. We can't use the lseek() value either, because we
801 need to take the amount of buffered data into account.
802 (Yet another reason why stdio stinks. :-) */
803 pos
= lseek(fileno(f
->f_fp
), 0L, SEEK_CUR
);
805 pos
= ftell(f
->f_fp
);
809 if (end
> pos
&& pos
>= 0)
810 return currentsize
+ end
- pos
+ 1;
811 /* Add 1 so if the file were to grow we'd notice. */
814 if (currentsize
> SMALLCHUNK
) {
815 /* Keep doubling until we reach BIGCHUNK;
816 then keep adding BIGCHUNK. */
817 if (currentsize
<= BIGCHUNK
)
818 return currentsize
+ currentsize
;
820 return currentsize
+ BIGCHUNK
;
822 return currentsize
+ SMALLCHUNK
;
825 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
826 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
829 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
832 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
834 #define BLOCKED_ERRNO(x) 0
840 file_read(PyFileObject
*f
, PyObject
*args
)
842 long bytesrequested
= -1;
843 size_t bytesread
, buffersize
, chunksize
;
848 /* refuse to mix with f.next() */
849 if (f
->f_buf
!= NULL
&&
850 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
852 return err_iterbuffered();
853 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
855 if (bytesrequested
< 0)
856 buffersize
= new_buffersize(f
, (size_t)0);
858 buffersize
= bytesrequested
;
859 if (buffersize
> PY_SSIZE_T_MAX
) {
860 PyErr_SetString(PyExc_OverflowError
,
861 "requested number of bytes is more than a Python string can hold");
864 v
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
869 Py_BEGIN_ALLOW_THREADS
871 chunksize
= Py_UniversalNewlineFread(BUF(v
) + bytesread
,
872 buffersize
- bytesread
, f
->f_fp
, (PyObject
*)f
);
874 if (chunksize
== 0) {
875 if (!ferror(f
->f_fp
))
878 /* When in non-blocking mode, data shouldn't
879 * be discarded if a blocking signal was
880 * received. That will also happen if
881 * chunksize != 0, but bytesread < buffersize. */
882 if (bytesread
> 0 && BLOCKED_ERRNO(errno
))
884 PyErr_SetFromErrno(PyExc_IOError
);
888 bytesread
+= chunksize
;
889 if (bytesread
< buffersize
) {
893 if (bytesrequested
< 0) {
894 buffersize
= new_buffersize(f
, buffersize
);
895 if (_PyString_Resize(&v
, buffersize
) < 0)
898 /* Got what was requested. */
902 if (bytesread
!= buffersize
)
903 _PyString_Resize(&v
, bytesread
);
908 file_readinto(PyFileObject
*f
, PyObject
*args
)
912 Py_ssize_t ndone
, nnow
;
916 /* refuse to mix with f.next() */
917 if (f
->f_buf
!= NULL
&&
918 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
920 return err_iterbuffered();
921 if (!PyArg_ParseTuple(args
, "w#", &ptr
, &ntodo
))
925 Py_BEGIN_ALLOW_THREADS
927 nnow
= Py_UniversalNewlineFread(ptr
+ndone
, ntodo
, f
->f_fp
,
931 if (!ferror(f
->f_fp
))
933 PyErr_SetFromErrno(PyExc_IOError
);
940 return PyInt_FromSsize_t(ndone
);
943 /**************************************************************************
944 Routine to get next line using platform fgets().
948 + MS threadsafe getc is very slow (multiple layers of function calls before+
949 after each character, to lock+unlock the stream).
950 + The stream-locking functions are MS-internal -- can't access them from user
952 + There's nothing Tim could find in the MS C or platform SDK libraries that
953 can worm around this.
954 + MS fgets locks/unlocks only once per line; it's the only hook we have.
956 So we use fgets for speed(!), despite that it's painful.
958 MS realloc is also slow.
960 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
964 Tru64 Unix getline_via_fgets significantly faster
966 CAUTION: The C std isn't clear about this: in those cases where fgets
967 writes something into the buffer, can it write into any position beyond the
968 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
969 known on which it does; and it would be a strange way to code fgets. Still,
970 getline_via_fgets may not work correctly if it does. The std test
971 test_bufio.py should fail if platform fgets() routinely writes beyond the
972 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
973 **************************************************************************/
975 /* Use this routine if told to, or by default on non-get_unlocked()
976 * platforms unless told not to. Yikes! Let's spell that out:
977 * On a platform with getc_unlocked():
978 * By default, use getc_unlocked().
979 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
980 * On a platform without getc_unlocked():
981 * By default, use fgets().
982 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
984 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
985 #define USE_FGETS_IN_GETLINE
988 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
989 #undef USE_FGETS_IN_GETLINE
992 #ifdef USE_FGETS_IN_GETLINE
994 getline_via_fgets(FILE *fp
)
996 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
997 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
998 * to fill this much of the buffer with a known value in order to figure out
999 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
1000 * than "most" lines, we waste time filling unused buffer slots. 100 is
1001 * surely adequate for most peoples' email archives, chewing over source code,
1002 * etc -- "regular old text files".
1003 * MAXBUFSIZE is the maximum line length that lets us get away with the less
1004 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
1005 * cautions about boosting that. 300 was chosen because the worst real-life
1006 * text-crunching job reported on Python-Dev was a mail-log crawler where over
1007 * half the lines were 254 chars.
1009 #define INITBUFSIZE 100
1010 #define MAXBUFSIZE 300
1012 char buf
[MAXBUFSIZE
];
1013 PyObject
* v
; /* the string object result */
1014 char* pvfree
; /* address of next free slot */
1015 char* pvend
; /* address one beyond last free slot */
1016 size_t nfree
; /* # of free buffer slots; pvend-pvfree */
1017 size_t total_v_size
; /* total # of slots in buffer */
1018 size_t increment
; /* amount to increment the buffer */
1021 /* Optimize for normal case: avoid _PyString_Resize if at all
1022 * possible via first reading into stack buffer "buf".
1024 total_v_size
= INITBUFSIZE
; /* start small and pray */
1027 Py_BEGIN_ALLOW_THREADS
1028 pvend
= buf
+ total_v_size
;
1029 nfree
= pvend
- pvfree
;
1030 memset(pvfree
, '\n', nfree
);
1031 assert(nfree
< INT_MAX
); /* Should be atmost MAXBUFSIZE */
1032 p
= fgets(pvfree
, (int)nfree
, fp
);
1033 Py_END_ALLOW_THREADS
1037 if (PyErr_CheckSignals())
1039 v
= PyString_FromStringAndSize(buf
, pvfree
- buf
);
1042 /* fgets read *something* */
1043 p
= memchr(pvfree
, '\n', nfree
);
1045 /* Did the \n come from fgets or from us?
1046 * Since fgets stops at the first \n, and then writes
1047 * \0, if it's from fgets a \0 must be next. But if
1048 * that's so, it could not have come from us, since
1049 * the \n's we filled the buffer with have only more
1050 * \n's to the right.
1052 if (p
+1 < pvend
&& *(p
+1) == '\0') {
1053 /* It's from fgets: we win! In particular,
1054 * we haven't done any mallocs yet, and can
1055 * build the final result on the first try.
1057 ++p
; /* include \n from fgets */
1060 /* Must be from us: fgets didn't fill the
1061 * buffer and didn't find a newline, so it
1062 * must be the last and newline-free line of
1065 assert(p
> pvfree
&& *(p
-1) == '\0');
1066 --p
; /* don't include \0 from fgets */
1068 v
= PyString_FromStringAndSize(buf
, p
- buf
);
1071 /* yuck: fgets overwrote all the newlines, i.e. the entire
1072 * buffer. So this line isn't over yet, or maybe it is but
1073 * we're exactly at EOF. If we haven't already, try using the
1074 * rest of the stack buffer.
1076 assert(*(pvend
-1) == '\0');
1077 if (pvfree
== buf
) {
1078 pvfree
= pvend
- 1; /* overwrite trailing null */
1079 total_v_size
= MAXBUFSIZE
;
1085 /* The stack buffer isn't big enough; malloc a string object and read
1088 total_v_size
= MAXBUFSIZE
<< 1;
1089 v
= PyString_FromStringAndSize((char*)NULL
, (int)total_v_size
);
1092 /* copy over everything except the last null byte */
1093 memcpy(BUF(v
), buf
, MAXBUFSIZE
-1);
1094 pvfree
= BUF(v
) + MAXBUFSIZE
- 1;
1096 /* Keep reading stuff into v; if it ever ends successfully, break
1097 * after setting p one beyond the end of the line. The code here is
1098 * very much like the code above, except reads into v's buffer; see
1099 * the code above for detailed comments about the logic.
1102 Py_BEGIN_ALLOW_THREADS
1103 pvend
= BUF(v
) + total_v_size
;
1104 nfree
= pvend
- pvfree
;
1105 memset(pvfree
, '\n', nfree
);
1106 assert(nfree
< INT_MAX
);
1107 p
= fgets(pvfree
, (int)nfree
, fp
);
1108 Py_END_ALLOW_THREADS
1112 if (PyErr_CheckSignals()) {
1119 p
= memchr(pvfree
, '\n', nfree
);
1121 if (p
+1 < pvend
&& *(p
+1) == '\0') {
1122 /* \n came from fgets */
1126 /* \n came from us; last line of file, no newline */
1127 assert(p
> pvfree
&& *(p
-1) == '\0');
1131 /* expand buffer and try again */
1132 assert(*(pvend
-1) == '\0');
1133 increment
= total_v_size
>> 2; /* mild exponential growth */
1134 prev_v_size
= total_v_size
;
1135 total_v_size
+= increment
;
1136 /* check for overflow */
1137 if (total_v_size
<= prev_v_size
||
1138 total_v_size
> PY_SSIZE_T_MAX
) {
1139 PyErr_SetString(PyExc_OverflowError
,
1140 "line is longer than a Python string can hold");
1144 if (_PyString_Resize(&v
, (int)total_v_size
) < 0)
1146 /* overwrite the trailing null byte */
1147 pvfree
= BUF(v
) + (prev_v_size
- 1);
1149 if (BUF(v
) + total_v_size
!= p
)
1150 _PyString_Resize(&v
, p
- BUF(v
));
1155 #endif /* ifdef USE_FGETS_IN_GETLINE */
1157 /* Internal routine to get a line.
1158 Size argument interpretation:
1160 <= 0: read arbitrary line
1164 get_line(PyFileObject
*f
, int n
)
1169 size_t total_v_size
; /* total # of slots in buffer */
1170 size_t used_v_size
; /* # used slots in buffer */
1171 size_t increment
; /* amount to increment the buffer */
1173 int newlinetypes
= f
->f_newlinetypes
;
1174 int skipnextlf
= f
->f_skipnextlf
;
1175 int univ_newline
= f
->f_univ_newline
;
1177 #if defined(USE_FGETS_IN_GETLINE)
1178 if (n
<= 0 && !univ_newline
)
1179 return getline_via_fgets(fp
);
1181 total_v_size
= n
> 0 ? n
: 100;
1182 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
1186 end
= buf
+ total_v_size
;
1189 Py_BEGIN_ALLOW_THREADS
1192 c
= 'x'; /* Shut up gcc warning */
1193 while ( buf
!= end
&& (c
= GETC(fp
)) != EOF
) {
1197 /* Seeing a \n here with
1198 * skipnextlf true means we
1201 newlinetypes
|= NEWLINE_CRLF
;
1203 if (c
== EOF
) break;
1205 newlinetypes
|= NEWLINE_CR
;
1211 } else if ( c
== '\n')
1212 newlinetypes
|= NEWLINE_LF
;
1214 if (c
== '\n') break;
1216 if ( c
== EOF
&& skipnextlf
)
1217 newlinetypes
|= NEWLINE_CR
;
1218 } else /* If not universal newlines use the normal loop */
1219 while ((c
= GETC(fp
)) != EOF
&&
1220 (*buf
++ = c
) != '\n' &&
1224 Py_END_ALLOW_THREADS
1225 f
->f_newlinetypes
= newlinetypes
;
1226 f
->f_skipnextlf
= skipnextlf
;
1231 PyErr_SetFromErrno(PyExc_IOError
);
1237 if (PyErr_CheckSignals()) {
1243 /* Must be because buf == end */
1246 used_v_size
= total_v_size
;
1247 increment
= total_v_size
>> 2; /* mild exponential growth */
1248 total_v_size
+= increment
;
1249 if (total_v_size
> PY_SSIZE_T_MAX
) {
1250 PyErr_SetString(PyExc_OverflowError
,
1251 "line is longer than a Python string can hold");
1255 if (_PyString_Resize(&v
, total_v_size
) < 0)
1257 buf
= BUF(v
) + used_v_size
;
1258 end
= BUF(v
) + total_v_size
;
1261 used_v_size
= buf
- BUF(v
);
1262 if (used_v_size
!= total_v_size
)
1263 _PyString_Resize(&v
, used_v_size
);
1267 /* External C interface */
1270 PyFile_GetLine(PyObject
*f
, int n
)
1275 PyErr_BadInternalCall();
1279 if (PyFile_Check(f
)) {
1280 PyFileObject
*fo
= (PyFileObject
*)f
;
1281 if (fo
->f_fp
== NULL
)
1282 return err_closed();
1283 /* refuse to mix with f.next() */
1284 if (fo
->f_buf
!= NULL
&&
1285 (fo
->f_bufend
- fo
->f_bufptr
) > 0 &&
1286 fo
->f_buf
[0] != '\0')
1287 return err_iterbuffered();
1288 result
= get_line(fo
, n
);
1294 reader
= PyObject_GetAttrString(f
, "readline");
1298 args
= PyTuple_New(0);
1300 args
= Py_BuildValue("(i)", n
);
1305 result
= PyEval_CallObject(reader
, args
);
1308 if (result
!= NULL
&& !PyString_Check(result
) &&
1309 !PyUnicode_Check(result
)) {
1312 PyErr_SetString(PyExc_TypeError
,
1313 "object.readline() returned non-string");
1317 if (n
< 0 && result
!= NULL
&& PyString_Check(result
)) {
1318 char *s
= PyString_AS_STRING(result
);
1319 Py_ssize_t len
= PyString_GET_SIZE(result
);
1323 PyErr_SetString(PyExc_EOFError
,
1324 "EOF when reading a line");
1326 else if (s
[len
-1] == '\n') {
1327 if (result
->ob_refcnt
== 1)
1328 _PyString_Resize(&result
, len
-1);
1331 v
= PyString_FromStringAndSize(s
, len
-1);
1337 #ifdef Py_USING_UNICODE
1338 if (n
< 0 && result
!= NULL
&& PyUnicode_Check(result
)) {
1339 Py_UNICODE
*s
= PyUnicode_AS_UNICODE(result
);
1340 Py_ssize_t len
= PyUnicode_GET_SIZE(result
);
1344 PyErr_SetString(PyExc_EOFError
,
1345 "EOF when reading a line");
1347 else if (s
[len
-1] == '\n') {
1348 if (result
->ob_refcnt
== 1)
1349 PyUnicode_Resize(&result
, len
-1);
1352 v
= PyUnicode_FromUnicode(s
, len
-1);
1365 file_readline(PyFileObject
*f
, PyObject
*args
)
1369 if (f
->f_fp
== NULL
)
1370 return err_closed();
1371 /* refuse to mix with f.next() */
1372 if (f
->f_buf
!= NULL
&&
1373 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
1374 f
->f_buf
[0] != '\0')
1375 return err_iterbuffered();
1376 if (!PyArg_ParseTuple(args
, "|i:readline", &n
))
1379 return PyString_FromString("");
1382 return get_line(f
, n
);
1386 file_readlines(PyFileObject
*f
, PyObject
*args
)
1391 char small_buffer
[SMALLCHUNK
];
1392 char *buffer
= small_buffer
;
1393 size_t buffersize
= SMALLCHUNK
;
1394 PyObject
*big_buffer
= NULL
;
1397 size_t totalread
= 0;
1402 if (f
->f_fp
== NULL
)
1403 return err_closed();
1404 /* refuse to mix with f.next() */
1405 if (f
->f_buf
!= NULL
&&
1406 (f
->f_bufend
- f
->f_bufptr
) > 0 &&
1407 f
->f_buf
[0] != '\0')
1408 return err_iterbuffered();
1409 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
1411 if ((list
= PyList_New(0)) == NULL
)
1417 Py_BEGIN_ALLOW_THREADS
1419 nread
= Py_UniversalNewlineFread(buffer
+nfilled
,
1420 buffersize
-nfilled
, f
->f_fp
, (PyObject
*)f
);
1421 Py_END_ALLOW_THREADS
1422 shortread
= (nread
< buffersize
-nfilled
);
1426 if (!ferror(f
->f_fp
))
1428 PyErr_SetFromErrno(PyExc_IOError
);
1436 p
= (char *)memchr(buffer
+nfilled
, '\n', nread
);
1438 /* Need a larger buffer to fit this line */
1441 if (buffersize
> PY_SSIZE_T_MAX
) {
1442 PyErr_SetString(PyExc_OverflowError
,
1443 "line is longer than a Python string can hold");
1446 if (big_buffer
== NULL
) {
1447 /* Create the big buffer */
1448 big_buffer
= PyString_FromStringAndSize(
1450 if (big_buffer
== NULL
)
1452 buffer
= PyString_AS_STRING(big_buffer
);
1453 memcpy(buffer
, small_buffer
, nfilled
);
1456 /* Grow the big buffer */
1457 if ( _PyString_Resize(&big_buffer
, buffersize
) < 0 )
1459 buffer
= PyString_AS_STRING(big_buffer
);
1463 end
= buffer
+nfilled
+nread
;
1466 /* Process complete lines */
1468 line
= PyString_FromStringAndSize(q
, p
-q
);
1471 err
= PyList_Append(list
, line
);
1476 p
= (char *)memchr(q
, '\n', end
-q
);
1477 } while (p
!= NULL
);
1478 /* Move the remaining incomplete line to the start */
1480 memmove(buffer
, q
, nfilled
);
1482 if (totalread
>= (size_t)sizehint
)
1486 /* Partial last line */
1487 line
= PyString_FromStringAndSize(buffer
, nfilled
);
1491 /* Need to complete the last line */
1492 PyObject
*rest
= get_line(f
, 0);
1497 PyString_Concat(&line
, rest
);
1502 err
= PyList_Append(list
, line
);
1508 Py_XDECREF(big_buffer
);
1513 file_write(PyFileObject
*f
, PyObject
*args
)
1517 if (f
->f_fp
== NULL
)
1518 return err_closed();
1519 if (!PyArg_ParseTuple(args
, f
->f_binary
? "s#" : "t#", &s
, &n
))
1522 Py_BEGIN_ALLOW_THREADS
1524 n2
= fwrite(s
, 1, n
, f
->f_fp
);
1525 Py_END_ALLOW_THREADS
1527 PyErr_SetFromErrno(PyExc_IOError
);
1536 file_writelines(PyFileObject
*f
, PyObject
*seq
)
1538 #define CHUNKSIZE 1000
1539 PyObject
*list
, *line
;
1540 PyObject
*it
; /* iter(seq) */
1543 Py_ssize_t i
, j
, nwritten
, len
;
1545 assert(seq
!= NULL
);
1546 if (f
->f_fp
== NULL
)
1547 return err_closed();
1551 islist
= PyList_Check(seq
);
1555 it
= PyObject_GetIter(seq
);
1557 PyErr_SetString(PyExc_TypeError
,
1558 "writelines() requires an iterable argument");
1561 /* From here on, fail by going to error, to reclaim "it". */
1562 list
= PyList_New(CHUNKSIZE
);
1567 /* Strategy: slurp CHUNKSIZE lines into a private list,
1568 checking that they are all strings, then write that list
1569 without holding the interpreter lock, then come back for more. */
1570 for (index
= 0; ; index
+= CHUNKSIZE
) {
1573 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
1576 j
= PyList_GET_SIZE(list
);
1579 for (j
= 0; j
< CHUNKSIZE
; j
++) {
1580 line
= PyIter_Next(it
);
1582 if (PyErr_Occurred())
1586 PyList_SetItem(list
, j
, line
);
1592 /* Check that all entries are indeed strings. If not,
1593 apply the same rules as for file.write() and
1594 convert the results to strings. This is slow, but
1595 seems to be the only way since all conversion APIs
1596 could potentially execute Python code. */
1597 for (i
= 0; i
< j
; i
++) {
1598 PyObject
*v
= PyList_GET_ITEM(list
, i
);
1599 if (!PyString_Check(v
)) {
1601 if (((f
->f_binary
&&
1602 PyObject_AsReadBuffer(v
,
1603 (const void**)&buffer
,
1605 PyObject_AsCharBuffer(v
,
1608 PyErr_SetString(PyExc_TypeError
,
1609 "writelines() argument must be a sequence of strings");
1612 line
= PyString_FromStringAndSize(buffer
,
1617 PyList_SET_ITEM(list
, i
, line
);
1621 /* Since we are releasing the global lock, the
1622 following code may *not* execute Python code. */
1623 Py_BEGIN_ALLOW_THREADS
1626 for (i
= 0; i
< j
; i
++) {
1627 line
= PyList_GET_ITEM(list
, i
);
1628 len
= PyString_GET_SIZE(line
);
1629 nwritten
= fwrite(PyString_AS_STRING(line
),
1631 if (nwritten
!= len
) {
1633 PyErr_SetFromErrno(PyExc_IOError
);
1638 Py_END_ALLOW_THREADS
1654 file_self(PyFileObject
*f
)
1656 if (f
->f_fp
== NULL
)
1657 return err_closed();
1659 return (PyObject
*)f
;
1663 file_exit(PyFileObject
*f
, PyObject
*args
)
1665 PyObject
*ret
= file_close(f
);
1667 /* If error occurred, pass through */
1670 /* We cannot return the result of close since a true
1671 * value will be interpreted as "yes, swallow the
1672 * exception if one was raised inside the with block". */
1676 PyDoc_STRVAR(readline_doc
,
1677 "readline([size]) -> next line from the file, as a string.\n"
1679 "Retain newline. A non-negative size argument limits the maximum\n"
1680 "number of bytes to return (an incomplete line may be returned then).\n"
1681 "Return an empty string at EOF.");
1683 PyDoc_STRVAR(read_doc
,
1684 "read([size]) -> read at most size bytes, returned as a string.\n"
1686 "If the size argument is negative or omitted, read until EOF is reached.\n"
1687 "Notice that when in non-blocking mode, less data than what was requested\n"
1688 "may be returned, even if no size parameter was given.");
1690 PyDoc_STRVAR(write_doc
,
1691 "write(str) -> None. Write string str to file.\n"
1693 "Note that due to buffering, flush() or close() may be needed before\n"
1694 "the file on disk reflects the data written.");
1696 PyDoc_STRVAR(fileno_doc
,
1697 "fileno() -> integer \"file descriptor\".\n"
1699 "This is needed for lower-level file interfaces, such os.read().");
1701 PyDoc_STRVAR(seek_doc
,
1702 "seek(offset[, whence]) -> None. Move to new file position.\n"
1704 "Argument offset is a byte count. Optional argument whence defaults to\n"
1705 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1706 "(move relative to current position, positive or negative), and 2 (move\n"
1707 "relative to end of file, usually negative, although many platforms allow\n"
1708 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1709 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1710 "undefined behavior."
1712 "Note that not all file objects are seekable.");
1714 #ifdef HAVE_FTRUNCATE
1715 PyDoc_STRVAR(truncate_doc
,
1716 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1718 "Size defaults to the current file position, as returned by tell().");
1721 PyDoc_STRVAR(tell_doc
,
1722 "tell() -> current file position, an integer (may be a long integer).");
1724 PyDoc_STRVAR(readinto_doc
,
1725 "readinto() -> Undocumented. Don't use this; it may go away.");
1727 PyDoc_STRVAR(readlines_doc
,
1728 "readlines([size]) -> list of strings, each a line from the file.\n"
1730 "Call readline() repeatedly and return a list of the lines so read.\n"
1731 "The optional size argument, if given, is an approximate bound on the\n"
1732 "total number of bytes in the lines returned.");
1734 PyDoc_STRVAR(xreadlines_doc
,
1735 "xreadlines() -> returns self.\n"
1737 "For backward compatibility. File objects now include the performance\n"
1738 "optimizations previously implemented in the xreadlines module.");
1740 PyDoc_STRVAR(writelines_doc
,
1741 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1743 "Note that newlines are not added. The sequence can be any iterable object\n"
1744 "producing strings. This is equivalent to calling write() for each string.");
1746 PyDoc_STRVAR(flush_doc
,
1747 "flush() -> None. Flush the internal I/O buffer.");
1749 PyDoc_STRVAR(close_doc
,
1750 "close() -> None or (perhaps) an integer. Close the file.\n"
1752 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1753 "further I/O operations. close() may be called more than once without\n"
1754 "error. Some kinds of file objects (for example, opened by popen())\n"
1755 "may return an exit status upon closing.");
1757 PyDoc_STRVAR(isatty_doc
,
1758 "isatty() -> true or false. True if the file is connected to a tty device.");
1760 PyDoc_STRVAR(enter_doc
,
1761 "__enter__() -> self.");
1763 PyDoc_STRVAR(exit_doc
,
1764 "__exit__(*excinfo) -> None. Closes the file.");
1766 static PyMethodDef file_methods
[] = {
1767 {"readline", (PyCFunction
)file_readline
, METH_VARARGS
, readline_doc
},
1768 {"read", (PyCFunction
)file_read
, METH_VARARGS
, read_doc
},
1769 {"write", (PyCFunction
)file_write
, METH_VARARGS
, write_doc
},
1770 {"fileno", (PyCFunction
)file_fileno
, METH_NOARGS
, fileno_doc
},
1771 {"seek", (PyCFunction
)file_seek
, METH_VARARGS
, seek_doc
},
1772 #ifdef HAVE_FTRUNCATE
1773 {"truncate", (PyCFunction
)file_truncate
, METH_VARARGS
, truncate_doc
},
1775 {"tell", (PyCFunction
)file_tell
, METH_NOARGS
, tell_doc
},
1776 {"readinto", (PyCFunction
)file_readinto
, METH_VARARGS
, readinto_doc
},
1777 {"readlines", (PyCFunction
)file_readlines
,METH_VARARGS
, readlines_doc
},
1778 {"xreadlines",(PyCFunction
)file_self
, METH_NOARGS
, xreadlines_doc
},
1779 {"writelines",(PyCFunction
)file_writelines
, METH_O
, writelines_doc
},
1780 {"flush", (PyCFunction
)file_flush
, METH_NOARGS
, flush_doc
},
1781 {"close", (PyCFunction
)file_close
, METH_NOARGS
, close_doc
},
1782 {"isatty", (PyCFunction
)file_isatty
, METH_NOARGS
, isatty_doc
},
1783 {"__enter__", (PyCFunction
)file_self
, METH_NOARGS
, enter_doc
},
1784 {"__exit__", (PyCFunction
)file_exit
, METH_VARARGS
, exit_doc
},
1785 {NULL
, NULL
} /* sentinel */
1788 #define OFF(x) offsetof(PyFileObject, x)
1790 static PyMemberDef file_memberlist
[] = {
1791 {"softspace", T_INT
, OFF(f_softspace
), 0,
1792 "flag indicating that a space needs to be printed; used by print"},
1793 {"mode", T_OBJECT
, OFF(f_mode
), RO
,
1794 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1795 {"name", T_OBJECT
, OFF(f_name
), RO
,
1797 {"encoding", T_OBJECT
, OFF(f_encoding
), RO
,
1799 /* getattr(f, "closed") is implemented without this table */
1800 {NULL
} /* Sentinel */
1804 get_closed(PyFileObject
*f
, void *closure
)
1806 return PyBool_FromLong((long)(f
->f_fp
== 0));
1809 get_newlines(PyFileObject
*f
, void *closure
)
1811 switch (f
->f_newlinetypes
) {
1812 case NEWLINE_UNKNOWN
:
1816 return PyString_FromString("\r");
1818 return PyString_FromString("\n");
1819 case NEWLINE_CR
|NEWLINE_LF
:
1820 return Py_BuildValue("(ss)", "\r", "\n");
1822 return PyString_FromString("\r\n");
1823 case NEWLINE_CR
|NEWLINE_CRLF
:
1824 return Py_BuildValue("(ss)", "\r", "\r\n");
1825 case NEWLINE_LF
|NEWLINE_CRLF
:
1826 return Py_BuildValue("(ss)", "\n", "\r\n");
1827 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1828 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1830 PyErr_Format(PyExc_SystemError
,
1831 "Unknown newlines value 0x%x\n",
1837 static PyGetSetDef file_getsetlist
[] = {
1838 {"closed", (getter
)get_closed
, NULL
, "True if the file is closed"},
1839 {"newlines", (getter
)get_newlines
, NULL
,
1840 "end-of-line convention used in this file"},
1845 drop_readahead(PyFileObject
*f
)
1847 if (f
->f_buf
!= NULL
) {
1848 PyMem_Free(f
->f_buf
);
1853 /* Make sure that file has a readahead buffer with at least one byte
1854 (unless at EOF) and no more than bufsize. Returns negative value on
1855 error, will set MemoryError if bufsize bytes cannot be allocated. */
1857 readahead(PyFileObject
*f
, int bufsize
)
1859 Py_ssize_t chunksize
;
1861 if (f
->f_buf
!= NULL
) {
1862 if( (f
->f_bufend
- f
->f_bufptr
) >= 1)
1867 if ((f
->f_buf
= (char *)PyMem_Malloc(bufsize
)) == NULL
) {
1871 Py_BEGIN_ALLOW_THREADS
1873 chunksize
= Py_UniversalNewlineFread(
1874 f
->f_buf
, bufsize
, f
->f_fp
, (PyObject
*)f
);
1875 Py_END_ALLOW_THREADS
1876 if (chunksize
== 0) {
1877 if (ferror(f
->f_fp
)) {
1878 PyErr_SetFromErrno(PyExc_IOError
);
1884 f
->f_bufptr
= f
->f_buf
;
1885 f
->f_bufend
= f
->f_buf
+ chunksize
;
1889 /* Used by file_iternext. The returned string will start with 'skip'
1890 uninitialized bytes followed by the remainder of the line. Don't be
1891 horrified by the recursive call: maximum recursion depth is limited by
1892 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1894 static PyStringObject
*
1895 readahead_get_line_skip(PyFileObject
*f
, int skip
, int bufsize
)
1902 if (f
->f_buf
== NULL
)
1903 if (readahead(f
, bufsize
) < 0)
1906 len
= f
->f_bufend
- f
->f_bufptr
;
1908 return (PyStringObject
*)
1909 PyString_FromStringAndSize(NULL
, skip
);
1910 bufptr
= (char *)memchr(f
->f_bufptr
, '\n', len
);
1911 if (bufptr
!= NULL
) {
1912 bufptr
++; /* Count the '\n' */
1913 len
= bufptr
- f
->f_bufptr
;
1914 s
= (PyStringObject
*)
1915 PyString_FromStringAndSize(NULL
, skip
+len
);
1918 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
1919 f
->f_bufptr
= bufptr
;
1920 if (bufptr
== f
->f_bufend
)
1923 bufptr
= f
->f_bufptr
;
1925 f
->f_buf
= NULL
; /* Force new readahead buffer */
1926 assert(skip
+len
< INT_MAX
);
1927 s
= readahead_get_line_skip(
1928 f
, (int)(skip
+len
), bufsize
+ (bufsize
>>2) );
1933 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
1939 /* A larger buffer size may actually decrease performance. */
1940 #define READAHEAD_BUFSIZE 8192
1943 file_iternext(PyFileObject
*f
)
1947 if (f
->f_fp
== NULL
)
1948 return err_closed();
1950 l
= readahead_get_line_skip(f
, 0, READAHEAD_BUFSIZE
);
1951 if (l
== NULL
|| PyString_GET_SIZE(l
) == 0) {
1955 return (PyObject
*)l
;
1960 file_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1963 static PyObject
*not_yet_string
;
1965 assert(type
!= NULL
&& type
->tp_alloc
!= NULL
);
1967 if (not_yet_string
== NULL
) {
1968 not_yet_string
= PyString_FromString("<uninitialized file>");
1969 if (not_yet_string
== NULL
)
1973 self
= type
->tp_alloc(type
, 0);
1975 /* Always fill in the name and mode, so that nobody else
1976 needs to special-case NULLs there. */
1977 Py_INCREF(not_yet_string
);
1978 ((PyFileObject
*)self
)->f_name
= not_yet_string
;
1979 Py_INCREF(not_yet_string
);
1980 ((PyFileObject
*)self
)->f_mode
= not_yet_string
;
1982 ((PyFileObject
*)self
)->f_encoding
= Py_None
;
1983 ((PyFileObject
*)self
)->weakreflist
= NULL
;
1989 file_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1991 PyFileObject
*foself
= (PyFileObject
*)self
;
1993 static char *kwlist
[] = {"name", "mode", "buffering", 0};
1997 int wideargument
= 0;
1999 assert(PyFile_Check(self
));
2000 if (foself
->f_fp
!= NULL
) {
2001 /* Have to close the existing file first. */
2002 PyObject
*closeresult
= file_close(foself
);
2003 if (closeresult
== NULL
)
2005 Py_DECREF(closeresult
);
2008 #ifdef Py_WIN_WIDE_FILENAMES
2009 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
2011 if (PyArg_ParseTupleAndKeywords(args
, kwds
, "U|si:file",
2012 kwlist
, &po
, &mode
, &bufsize
)) {
2014 if (fill_file_fields(foself
, NULL
, po
, mode
,
2018 /* Drop the argument parsing error as narrow
2019 strings are also valid. */
2025 if (!wideargument
) {
2028 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "et|si:file", kwlist
,
2029 Py_FileSystemDefaultEncoding
,
2034 /* We parse again to get the name as a PyObject */
2035 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O|si:file",
2036 kwlist
, &o_name
, &mode
,
2040 if (fill_file_fields(foself
, NULL
, o_name
, mode
,
2044 if (open_the_file(foself
, name
, mode
) == NULL
)
2046 foself
->f_setbuf
= NULL
;
2047 PyFile_SetBufSize(self
, bufsize
);
2054 PyMem_Free(name
); /* free the encoded string */
2058 PyDoc_VAR(file_doc
) =
2060 "file(name[, mode[, buffering]]) -> file object\n"
2062 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2063 "writing or appending. The file will be created if it doesn't exist\n"
2064 "when opened for writing or appending; it will be truncated when\n"
2065 "opened for writing. Add a 'b' to the mode for binary files.\n"
2066 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2067 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2068 "buffered, and larger numbers specify the buffer size. The preferred way\n"
2069 "to open a file is with the builtin open() function.\n"
2072 "Add a 'U' to mode to open the file for input with universal newline\n"
2073 "support. Any line ending in the input file will be seen as a '\\n'\n"
2074 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
2075 "the value for this attribute is one of None (no newline read yet),\n"
2076 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2078 "'U' cannot be combined with 'w' or '+' mode.\n"
2081 PyTypeObject PyFile_Type
= {
2082 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
2084 sizeof(PyFileObject
),
2086 (destructor
)file_dealloc
, /* tp_dealloc */
2091 (reprfunc
)file_repr
, /* tp_repr */
2092 0, /* tp_as_number */
2093 0, /* tp_as_sequence */
2094 0, /* tp_as_mapping */
2098 PyObject_GenericGetAttr
, /* tp_getattro */
2099 /* softspace is writable: we must supply tp_setattro */
2100 PyObject_GenericSetAttr
, /* tp_setattro */
2101 0, /* tp_as_buffer */
2102 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_HAVE_WEAKREFS
, /* tp_flags */
2103 file_doc
, /* tp_doc */
2104 0, /* tp_traverse */
2106 0, /* tp_richcompare */
2107 offsetof(PyFileObject
, weakreflist
), /* tp_weaklistoffset */
2108 (getiterfunc
)file_self
, /* tp_iter */
2109 (iternextfunc
)file_iternext
, /* tp_iternext */
2110 file_methods
, /* tp_methods */
2111 file_memberlist
, /* tp_members */
2112 file_getsetlist
, /* tp_getset */
2115 0, /* tp_descr_get */
2116 0, /* tp_descr_set */
2117 0, /* tp_dictoffset */
2118 file_init
, /* tp_init */
2119 PyType_GenericAlloc
, /* tp_alloc */
2120 file_new
, /* tp_new */
2121 PyObject_Del
, /* tp_free */
2124 /* Interface for the 'soft space' between print items. */
2127 PyFile_SoftSpace(PyObject
*f
, int newflag
)
2133 else if (PyFile_Check(f
)) {
2134 oldflag
= ((PyFileObject
*)f
)->f_softspace
;
2135 ((PyFileObject
*)f
)->f_softspace
= newflag
;
2139 v
= PyObject_GetAttrString(f
, "softspace");
2144 oldflag
= PyInt_AsLong(v
);
2145 assert(oldflag
< INT_MAX
);
2148 v
= PyInt_FromLong((long)newflag
);
2152 if (PyObject_SetAttrString(f
, "softspace", v
) != 0)
2157 return (int)oldflag
;
2160 /* Interfaces to write objects/strings to file-like objects */
2163 PyFile_WriteObject(PyObject
*v
, PyObject
*f
, int flags
)
2165 PyObject
*writer
, *value
, *args
, *result
;
2167 PyErr_SetString(PyExc_TypeError
, "writeobject with NULL file");
2170 else if (PyFile_Check(f
)) {
2171 FILE *fp
= PyFile_AsFile(f
);
2172 #ifdef Py_USING_UNICODE
2173 PyObject
*enc
= ((PyFileObject
*)f
)->f_encoding
;
2180 #ifdef Py_USING_UNICODE
2181 if ((flags
& Py_PRINT_RAW
) &&
2182 PyUnicode_Check(v
) && enc
!= Py_None
) {
2183 char *cenc
= PyString_AS_STRING(enc
);
2184 value
= PyUnicode_AsEncodedString(v
, cenc
, "strict");
2191 result
= PyObject_Print(value
, fp
, flags
);
2195 return PyObject_Print(v
, fp
, flags
);
2198 writer
= PyObject_GetAttrString(f
, "write");
2201 if (flags
& Py_PRINT_RAW
) {
2202 if (PyUnicode_Check(v
)) {
2206 value
= PyObject_Str(v
);
2209 value
= PyObject_Repr(v
);
2210 if (value
== NULL
) {
2214 args
= PyTuple_Pack(1, value
);
2220 result
= PyEval_CallObject(writer
, args
);
2231 PyFile_WriteString(const char *s
, PyObject
*f
)
2234 /* Should be caused by a pre-existing error */
2235 if (!PyErr_Occurred())
2236 PyErr_SetString(PyExc_SystemError
,
2237 "null file for PyFile_WriteString");
2240 else if (PyFile_Check(f
)) {
2241 FILE *fp
= PyFile_AsFile(f
);
2246 Py_BEGIN_ALLOW_THREADS
2248 Py_END_ALLOW_THREADS
2251 else if (!PyErr_Occurred()) {
2252 PyObject
*v
= PyString_FromString(s
);
2256 err
= PyFile_WriteObject(v
, f
, Py_PRINT_RAW
);
2264 /* Try to get a file-descriptor from a Python object. If the object
2265 is an integer or long integer, its value is returned. If not, the
2266 object's fileno() method is called if it exists; the method must return
2267 an integer or long integer, which is returned as the file descriptor value.
2268 -1 is returned on failure.
2271 int PyObject_AsFileDescriptor(PyObject
*o
)
2276 if (PyInt_Check(o
)) {
2277 fd
= PyInt_AsLong(o
);
2279 else if (PyLong_Check(o
)) {
2280 fd
= PyLong_AsLong(o
);
2282 else if ((meth
= PyObject_GetAttrString(o
, "fileno")) != NULL
)
2284 PyObject
*fno
= PyEval_CallObject(meth
, NULL
);
2289 if (PyInt_Check(fno
)) {
2290 fd
= PyInt_AsLong(fno
);
2293 else if (PyLong_Check(fno
)) {
2294 fd
= PyLong_AsLong(fno
);
2298 PyErr_SetString(PyExc_TypeError
,
2299 "fileno() returned a non-integer");
2305 PyErr_SetString(PyExc_TypeError
,
2306 "argument must be an int, or have a fileno() method.");
2311 PyErr_Format(PyExc_ValueError
,
2312 "file descriptor cannot be a negative integer (%i)",
2319 /* From here on we need access to the real fgets and fread */
2324 ** Py_UniversalNewlineFgets is an fgets variation that understands
2325 ** all of \r, \n and \r\n conventions.
2326 ** The stream should be opened in binary mode.
2327 ** If fobj is NULL the routine always does newline conversion, and
2328 ** it may peek one char ahead to gobble the second char in \r\n.
2329 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2330 ** is no readahead but in stead a flag is used to skip a following
2331 ** \n on the next read. Also, if the file is open in binary mode
2332 ** the whole conversion is skipped. Finally, the routine keeps track of
2333 ** the different types of newlines seen.
2334 ** Note that we need no error handling: fgets() treats error and eof
2338 Py_UniversalNewlineFgets(char *buf
, int n
, FILE *stream
, PyObject
*fobj
)
2342 int newlinetypes
= 0;
2344 int univ_newline
= 1;
2347 if (!PyFile_Check(fobj
)) {
2348 errno
= ENXIO
; /* What can you do... */
2351 univ_newline
= ((PyFileObject
*)fobj
)->f_univ_newline
;
2352 if ( !univ_newline
)
2353 return fgets(buf
, n
, stream
);
2354 newlinetypes
= ((PyFileObject
*)fobj
)->f_newlinetypes
;
2355 skipnextlf
= ((PyFileObject
*)fobj
)->f_skipnextlf
;
2358 c
= 'x'; /* Shut up gcc warning */
2359 while (--n
> 0 && (c
= GETC(stream
)) != EOF
) {
2363 /* Seeing a \n here with skipnextlf true
2364 ** means we saw a \r before.
2366 newlinetypes
|= NEWLINE_CRLF
;
2368 if (c
== EOF
) break;
2371 ** Note that c == EOF also brings us here,
2372 ** so we're okay if the last char in the file
2375 newlinetypes
|= NEWLINE_CR
;
2379 /* A \r is translated into a \n, and we skip
2380 ** an adjacent \n, if any. We don't set the
2381 ** newlinetypes flag until we've seen the next char.
2385 } else if ( c
== '\n') {
2386 newlinetypes
|= NEWLINE_LF
;
2389 if (c
== '\n') break;
2391 if ( c
== EOF
&& skipnextlf
)
2392 newlinetypes
|= NEWLINE_CR
;
2393 FUNLOCKFILE(stream
);
2396 ((PyFileObject
*)fobj
)->f_newlinetypes
= newlinetypes
;
2397 ((PyFileObject
*)fobj
)->f_skipnextlf
= skipnextlf
;
2398 } else if ( skipnextlf
) {
2399 /* If we have no file object we cannot save the
2400 ** skipnextlf flag. We have to readahead, which
2401 ** will cause a pause if we're reading from an
2402 ** interactive stream, but that is very unlikely
2403 ** unless we're doing something silly like
2404 ** execfile("/dev/tty").
2416 ** Py_UniversalNewlineFread is an fread variation that understands
2417 ** all of \r, \n and \r\n conventions.
2418 ** The stream should be opened in binary mode.
2419 ** fobj must be a PyFileObject. In this case there
2420 ** is no readahead but in stead a flag is used to skip a following
2421 ** \n on the next read. Also, if the file is open in binary mode
2422 ** the whole conversion is skipped. Finally, the routine keeps track of
2423 ** the different types of newlines seen.
2426 Py_UniversalNewlineFread(char *buf
, size_t n
,
2427 FILE *stream
, PyObject
*fobj
)
2430 PyFileObject
*f
= (PyFileObject
*)fobj
;
2431 int newlinetypes
, skipnextlf
;
2433 assert(buf
!= NULL
);
2434 assert(stream
!= NULL
);
2436 if (!fobj
|| !PyFile_Check(fobj
)) {
2437 errno
= ENXIO
; /* What can you do... */
2440 if (!f
->f_univ_newline
)
2441 return fread(buf
, 1, n
, stream
);
2442 newlinetypes
= f
->f_newlinetypes
;
2443 skipnextlf
= f
->f_skipnextlf
;
2444 /* Invariant: n is the number of bytes remaining to be filled
2452 nread
= fread(dst
, 1, n
, stream
);
2457 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
2458 shortread
= n
!= 0; /* true iff EOF or error */
2462 /* Save as LF and set flag to skip next LF. */
2466 else if (skipnextlf
&& c
== '\n') {
2467 /* Skip LF, and remember we saw CR LF. */
2469 newlinetypes
|= NEWLINE_CRLF
;
2473 /* Normal char to be stored in buffer. Also
2474 * update the newlinetypes flag if either this
2475 * is an LF or the previous char was a CR.
2478 newlinetypes
|= NEWLINE_LF
;
2479 else if (skipnextlf
)
2480 newlinetypes
|= NEWLINE_CR
;
2486 /* If this is EOF, update type flags. */
2487 if (skipnextlf
&& feof(stream
))
2488 newlinetypes
|= NEWLINE_CR
;
2492 f
->f_newlinetypes
= newlinetypes
;
2493 f
->f_skipnextlf
= skipnextlf
;