Issue #2143: Fix embedded readline() hang on SSL socket EOF.
[python.git] / Objects / fileobject.c
blob9195a24ea113940acee78e6d18efdb4038555c29
1 /* File object implementation */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include "structmember.h"
7 #ifdef HAVE_SYS_TYPES_H
8 #include <sys/types.h>
9 #endif /* HAVE_SYS_TYPES_H */
11 #ifdef MS_WINDOWS
12 #define fileno _fileno
13 /* can simulate truncate with Win32 API functions; see file_truncate */
14 #define HAVE_FTRUNCATE
15 #define WIN32_LEAN_AND_MEAN
16 #include <windows.h>
17 #endif
19 #ifdef _MSC_VER
20 /* Need GetVersion to see if on NT so safe to use _wfopen */
21 #define WIN32_LEAN_AND_MEAN
22 #include <windows.h>
23 #endif /* _MSC_VER */
25 #if defined(PYOS_OS2) && defined(PYCC_GCC)
26 #include <io.h>
27 #endif
29 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
31 #ifndef DONT_HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
35 #ifdef HAVE_GETC_UNLOCKED
36 #define GETC(f) getc_unlocked(f)
37 #define FLOCKFILE(f) flockfile(f)
38 #define FUNLOCKFILE(f) funlockfile(f)
39 #else
40 #define GETC(f) getc(f)
41 #define FLOCKFILE(f)
42 #define FUNLOCKFILE(f)
43 #endif
45 /* Bits in f_newlinetypes */
46 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47 #define NEWLINE_CR 1 /* \r newline seen */
48 #define NEWLINE_LF 2 /* \n newline seen */
49 #define NEWLINE_CRLF 4 /* \r\n newline seen */
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
55 FILE *
56 PyFile_AsFile(PyObject *f)
58 if (f == NULL || !PyFile_Check(f))
59 return NULL;
60 else
61 return ((PyFileObject *)f)->f_fp;
64 PyObject *
65 PyFile_Name(PyObject *f)
67 if (f == NULL || !PyFile_Check(f))
68 return NULL;
69 else
70 return ((PyFileObject *)f)->f_name;
73 /* On Unix, fopen will succeed for directories.
74 In Python, there should be no file objects referring to
75 directories, so we need a check. */
77 static PyFileObject*
78 dircheck(PyFileObject* f)
80 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
81 struct stat buf;
82 if (f->f_fp == NULL)
83 return f;
84 if (fstat(fileno(f->f_fp), &buf) == 0 &&
85 S_ISDIR(buf.st_mode)) {
86 char *msg = strerror(EISDIR);
87 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
88 EISDIR, msg);
89 PyErr_SetObject(PyExc_IOError, exc);
90 Py_XDECREF(exc);
91 return NULL;
93 #endif
94 return f;
98 static PyObject *
99 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
100 int (*close)(FILE *))
102 assert(name != NULL);
103 assert(f != NULL);
104 assert(PyFile_Check(f));
105 assert(f->f_fp == NULL);
107 Py_DECREF(f->f_name);
108 Py_DECREF(f->f_mode);
109 Py_DECREF(f->f_encoding);
111 Py_INCREF(name);
112 f->f_name = name;
114 f->f_mode = PyString_FromString(mode);
116 f->f_close = close;
117 f->f_softspace = 0;
118 f->f_binary = strchr(mode,'b') != NULL;
119 f->f_buf = NULL;
120 f->f_univ_newline = (strchr(mode, 'U') != NULL);
121 f->f_newlinetypes = NEWLINE_UNKNOWN;
122 f->f_skipnextlf = 0;
123 Py_INCREF(Py_None);
124 f->f_encoding = Py_None;
126 if (f->f_mode == NULL)
127 return NULL;
128 f->f_fp = fp;
129 f = dircheck(f);
130 return (PyObject *) f;
133 /* check for known incorrect mode strings - problem is, platforms are
134 free to accept any mode characters they like and are supposed to
135 ignore stuff they don't understand... write or append mode with
136 universal newline support is expressly forbidden by PEP 278.
137 Additionally, remove the 'U' from the mode string as platforms
138 won't know what it is. Non-zero return signals an exception */
140 _PyFile_SanitizeMode(char *mode)
142 char *upos;
143 size_t len = strlen(mode);
145 if (!len) {
146 PyErr_SetString(PyExc_ValueError, "empty mode string");
147 return -1;
150 upos = strchr(mode, 'U');
151 if (upos) {
152 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
154 if (mode[0] == 'w' || mode[0] == 'a') {
155 PyErr_Format(PyExc_ValueError, "universal newline "
156 "mode can only be used with modes "
157 "starting with 'r'");
158 return -1;
161 if (mode[0] != 'r') {
162 memmove(mode+1, mode, strlen(mode)+1);
163 mode[0] = 'r';
166 if (!strchr(mode, 'b')) {
167 memmove(mode+2, mode+1, strlen(mode));
168 mode[1] = 'b';
170 } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
171 PyErr_Format(PyExc_ValueError, "mode string must begin with "
172 "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
173 return -1;
176 return 0;
179 static PyObject *
180 open_the_file(PyFileObject *f, char *name, char *mode)
182 char *newmode;
183 assert(f != NULL);
184 assert(PyFile_Check(f));
185 #ifdef MS_WINDOWS
186 /* windows ignores the passed name in order to support Unicode */
187 assert(f->f_name != NULL);
188 #else
189 assert(name != NULL);
190 #endif
191 assert(mode != NULL);
192 assert(f->f_fp == NULL);
194 /* probably need to replace 'U' by 'rb' */
195 newmode = PyMem_MALLOC(strlen(mode) + 3);
196 if (!newmode) {
197 PyErr_NoMemory();
198 return NULL;
200 strcpy(newmode, mode);
202 if (_PyFile_SanitizeMode(newmode)) {
203 f = NULL;
204 goto cleanup;
207 /* rexec.py can't stop a user from getting the file() constructor --
208 all they have to do is get *any* file object f, and then do
209 type(f). Here we prevent them from doing damage with it. */
210 if (PyEval_GetRestricted()) {
211 PyErr_SetString(PyExc_IOError,
212 "file() constructor not accessible in restricted mode");
213 f = NULL;
214 goto cleanup;
216 errno = 0;
218 #ifdef MS_WINDOWS
219 if (PyUnicode_Check(f->f_name)) {
220 PyObject *wmode;
221 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
222 if (f->f_name && wmode) {
223 Py_BEGIN_ALLOW_THREADS
224 /* PyUnicode_AS_UNICODE OK without thread
225 lock as it is a simple dereference. */
226 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
227 PyUnicode_AS_UNICODE(wmode));
228 Py_END_ALLOW_THREADS
230 Py_XDECREF(wmode);
232 #endif
233 if (NULL == f->f_fp && NULL != name) {
234 Py_BEGIN_ALLOW_THREADS
235 f->f_fp = fopen(name, newmode);
236 Py_END_ALLOW_THREADS
239 if (f->f_fp == NULL) {
240 #if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
241 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
242 * across all Windows flavors. When it sets EINVAL varies
243 * across Windows flavors, the exact conditions aren't
244 * documented, and the answer lies in the OS's implementation
245 * of Win32's CreateFile function (whose source is secret).
246 * Seems the best we can do is map EINVAL to ENOENT.
247 * Starting with Visual Studio .NET 2005, EINVAL is correctly
248 * set by our CRT error handler (set in exceptions.c.)
250 if (errno == 0) /* bad mode string */
251 errno = EINVAL;
252 else if (errno == EINVAL) /* unknown, but not a mode string */
253 errno = ENOENT;
254 #endif
255 /* EINVAL is returned when an invalid filename or
256 * an invalid mode is supplied. */
257 if (errno == EINVAL)
258 PyErr_Format(PyExc_IOError,
259 "invalid filename: %s or mode: %s",
260 name, mode);
261 else
262 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
263 f = NULL;
265 if (f != NULL)
266 f = dircheck(f);
268 cleanup:
269 PyMem_FREE(newmode);
271 return (PyObject *)f;
274 PyObject *
275 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
277 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
278 NULL, NULL);
279 if (f != NULL) {
280 PyObject *o_name = PyString_FromString(name);
281 if (o_name == NULL)
282 return NULL;
283 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
284 Py_DECREF(f);
285 f = NULL;
287 Py_DECREF(o_name);
289 return (PyObject *) f;
292 PyObject *
293 PyFile_FromString(char *name, char *mode)
295 extern int fclose(FILE *);
296 PyFileObject *f;
298 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
299 if (f != NULL) {
300 if (open_the_file(f, name, mode) == NULL) {
301 Py_DECREF(f);
302 f = NULL;
305 return (PyObject *)f;
308 void
309 PyFile_SetBufSize(PyObject *f, int bufsize)
311 PyFileObject *file = (PyFileObject *)f;
312 if (bufsize >= 0) {
313 int type;
314 switch (bufsize) {
315 case 0:
316 type = _IONBF;
317 break;
318 #ifdef HAVE_SETVBUF
319 case 1:
320 type = _IOLBF;
321 bufsize = BUFSIZ;
322 break;
323 #endif
324 default:
325 type = _IOFBF;
326 #ifndef HAVE_SETVBUF
327 bufsize = BUFSIZ;
328 #endif
329 break;
331 fflush(file->f_fp);
332 if (type == _IONBF) {
333 PyMem_Free(file->f_setbuf);
334 file->f_setbuf = NULL;
335 } else {
336 file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
337 bufsize);
339 #ifdef HAVE_SETVBUF
340 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
341 #else /* !HAVE_SETVBUF */
342 setbuf(file->f_fp, file->f_setbuf);
343 #endif /* !HAVE_SETVBUF */
347 /* Set the encoding used to output Unicode strings.
348 Returh 1 on success, 0 on failure. */
351 PyFile_SetEncoding(PyObject *f, const char *enc)
353 PyFileObject *file = (PyFileObject*)f;
354 PyObject *str = PyString_FromString(enc);
356 assert(PyFile_Check(f));
357 if (!str)
358 return 0;
359 Py_DECREF(file->f_encoding);
360 file->f_encoding = str;
361 return 1;
364 static PyObject *
365 err_closed(void)
367 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
368 return NULL;
371 /* Refuse regular file I/O if there's data in the iteration-buffer.
372 * Mixing them would cause data to arrive out of order, as the read*
373 * methods don't use the iteration buffer. */
374 static PyObject *
375 err_iterbuffered(void)
377 PyErr_SetString(PyExc_ValueError,
378 "Mixing iteration and read methods would lose data");
379 return NULL;
382 static void drop_readahead(PyFileObject *);
384 /* Methods */
386 static void
387 file_dealloc(PyFileObject *f)
389 int sts = 0;
390 if (f->weakreflist != NULL)
391 PyObject_ClearWeakRefs((PyObject *) f);
392 if (f->f_fp != NULL && f->f_close != NULL) {
393 Py_BEGIN_ALLOW_THREADS
394 sts = (*f->f_close)(f->f_fp);
395 Py_END_ALLOW_THREADS
396 if (sts == EOF)
397 PySys_WriteStderr("close failed: [Errno %d] %s\n", errno, strerror(errno));
399 PyMem_Free(f->f_setbuf);
400 Py_XDECREF(f->f_name);
401 Py_XDECREF(f->f_mode);
402 Py_XDECREF(f->f_encoding);
403 drop_readahead(f);
404 Py_TYPE(f)->tp_free((PyObject *)f);
407 static PyObject *
408 file_repr(PyFileObject *f)
410 if (PyUnicode_Check(f->f_name)) {
411 #ifdef Py_USING_UNICODE
412 PyObject *ret = NULL;
413 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
414 const char *name_str = name ? PyString_AsString(name) : "?";
415 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
416 f->f_fp == NULL ? "closed" : "open",
417 name_str,
418 PyString_AsString(f->f_mode),
420 Py_XDECREF(name);
421 return ret;
422 #endif
423 } else {
424 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
425 f->f_fp == NULL ? "closed" : "open",
426 PyString_AsString(f->f_name),
427 PyString_AsString(f->f_mode),
432 static PyObject *
433 file_close(PyFileObject *f)
435 int sts = 0;
436 if (f->f_fp != NULL) {
437 if (f->f_close != NULL) {
438 Py_BEGIN_ALLOW_THREADS
439 errno = 0;
440 sts = (*f->f_close)(f->f_fp);
441 Py_END_ALLOW_THREADS
443 f->f_fp = NULL;
445 PyMem_Free(f->f_setbuf);
446 f->f_setbuf = NULL;
447 if (sts == EOF)
448 return PyErr_SetFromErrno(PyExc_IOError);
449 if (sts != 0)
450 return PyInt_FromLong((long)sts);
451 Py_INCREF(Py_None);
452 return Py_None;
456 /* Our very own off_t-like type, 64-bit if possible */
457 #if !defined(HAVE_LARGEFILE_SUPPORT)
458 typedef off_t Py_off_t;
459 #elif SIZEOF_OFF_T >= 8
460 typedef off_t Py_off_t;
461 #elif SIZEOF_FPOS_T >= 8
462 typedef fpos_t Py_off_t;
463 #else
464 #error "Large file support, but neither off_t nor fpos_t is large enough."
465 #endif
468 /* a portable fseek() function
469 return 0 on success, non-zero on failure (with errno set) */
470 static int
471 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
473 #if !defined(HAVE_LARGEFILE_SUPPORT)
474 return fseek(fp, offset, whence);
475 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
476 return fseeko(fp, offset, whence);
477 #elif defined(HAVE_FSEEK64)
478 return fseek64(fp, offset, whence);
479 #elif defined(__BEOS__)
480 return _fseek(fp, offset, whence);
481 #elif SIZEOF_FPOS_T >= 8
482 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
483 and fgetpos() to implement fseek()*/
484 fpos_t pos;
485 switch (whence) {
486 case SEEK_END:
487 #ifdef MS_WINDOWS
488 fflush(fp);
489 if (_lseeki64(fileno(fp), 0, 2) == -1)
490 return -1;
491 #else
492 if (fseek(fp, 0, SEEK_END) != 0)
493 return -1;
494 #endif
495 /* fall through */
496 case SEEK_CUR:
497 if (fgetpos(fp, &pos) != 0)
498 return -1;
499 offset += pos;
500 break;
501 /* case SEEK_SET: break; */
503 return fsetpos(fp, &offset);
504 #else
505 #error "Large file support, but no way to fseek."
506 #endif
510 /* a portable ftell() function
511 Return -1 on failure with errno set appropriately, current file
512 position on success */
513 static Py_off_t
514 _portable_ftell(FILE* fp)
516 #if !defined(HAVE_LARGEFILE_SUPPORT)
517 return ftell(fp);
518 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
519 return ftello(fp);
520 #elif defined(HAVE_FTELL64)
521 return ftell64(fp);
522 #elif SIZEOF_FPOS_T >= 8
523 fpos_t pos;
524 if (fgetpos(fp, &pos) != 0)
525 return -1;
526 return pos;
527 #else
528 #error "Large file support, but no way to ftell."
529 #endif
533 static PyObject *
534 file_seek(PyFileObject *f, PyObject *args)
536 int whence;
537 int ret;
538 Py_off_t offset;
539 PyObject *offobj, *off_index;
541 if (f->f_fp == NULL)
542 return err_closed();
543 drop_readahead(f);
544 whence = 0;
545 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
546 return NULL;
547 off_index = PyNumber_Index(offobj);
548 if (!off_index) {
549 if (!PyFloat_Check(offobj))
550 return NULL;
551 /* Deprecated in 2.6 */
552 PyErr_Clear();
553 if (PyErr_Warn(PyExc_DeprecationWarning,
554 "integer argument expected, got float"))
555 return NULL;
556 off_index = offobj;
557 Py_INCREF(offobj);
559 #if !defined(HAVE_LARGEFILE_SUPPORT)
560 offset = PyInt_AsLong(off_index);
561 #else
562 offset = PyLong_Check(off_index) ?
563 PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
564 #endif
565 Py_DECREF(off_index);
566 if (PyErr_Occurred())
567 return NULL;
569 Py_BEGIN_ALLOW_THREADS
570 errno = 0;
571 ret = _portable_fseek(f->f_fp, offset, whence);
572 Py_END_ALLOW_THREADS
574 if (ret != 0) {
575 PyErr_SetFromErrno(PyExc_IOError);
576 clearerr(f->f_fp);
577 return NULL;
579 f->f_skipnextlf = 0;
580 Py_INCREF(Py_None);
581 return Py_None;
585 #ifdef HAVE_FTRUNCATE
586 static PyObject *
587 file_truncate(PyFileObject *f, PyObject *args)
589 Py_off_t newsize;
590 PyObject *newsizeobj = NULL;
591 Py_off_t initialpos;
592 int ret;
594 if (f->f_fp == NULL)
595 return err_closed();
596 if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
597 return NULL;
599 /* Get current file position. If the file happens to be open for
600 * update and the last operation was an input operation, C doesn't
601 * define what the later fflush() will do, but we promise truncate()
602 * won't change the current position (and fflush() *does* change it
603 * then at least on Windows). The easiest thing is to capture
604 * current pos now and seek back to it at the end.
606 Py_BEGIN_ALLOW_THREADS
607 errno = 0;
608 initialpos = _portable_ftell(f->f_fp);
609 Py_END_ALLOW_THREADS
610 if (initialpos == -1)
611 goto onioerror;
613 /* Set newsize to current postion if newsizeobj NULL, else to the
614 * specified value.
616 if (newsizeobj != NULL) {
617 #if !defined(HAVE_LARGEFILE_SUPPORT)
618 newsize = PyInt_AsLong(newsizeobj);
619 #else
620 newsize = PyLong_Check(newsizeobj) ?
621 PyLong_AsLongLong(newsizeobj) :
622 PyInt_AsLong(newsizeobj);
623 #endif
624 if (PyErr_Occurred())
625 return NULL;
627 else /* default to current position */
628 newsize = initialpos;
630 /* Flush the stream. We're mixing stream-level I/O with lower-level
631 * I/O, and a flush may be necessary to synch both platform views
632 * of the current file state.
634 Py_BEGIN_ALLOW_THREADS
635 errno = 0;
636 ret = fflush(f->f_fp);
637 Py_END_ALLOW_THREADS
638 if (ret != 0)
639 goto onioerror;
641 #ifdef MS_WINDOWS
642 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
643 so don't even try using it. */
645 HANDLE hFile;
647 /* Have to move current pos to desired endpoint on Windows. */
648 Py_BEGIN_ALLOW_THREADS
649 errno = 0;
650 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
651 Py_END_ALLOW_THREADS
652 if (ret)
653 goto onioerror;
655 /* Truncate. Note that this may grow the file! */
656 Py_BEGIN_ALLOW_THREADS
657 errno = 0;
658 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
659 ret = hFile == (HANDLE)-1;
660 if (ret == 0) {
661 ret = SetEndOfFile(hFile) == 0;
662 if (ret)
663 errno = EACCES;
665 Py_END_ALLOW_THREADS
666 if (ret)
667 goto onioerror;
669 #else
670 Py_BEGIN_ALLOW_THREADS
671 errno = 0;
672 ret = ftruncate(fileno(f->f_fp), newsize);
673 Py_END_ALLOW_THREADS
674 if (ret != 0)
675 goto onioerror;
676 #endif /* !MS_WINDOWS */
678 /* Restore original file position. */
679 Py_BEGIN_ALLOW_THREADS
680 errno = 0;
681 ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
682 Py_END_ALLOW_THREADS
683 if (ret)
684 goto onioerror;
686 Py_INCREF(Py_None);
687 return Py_None;
689 onioerror:
690 PyErr_SetFromErrno(PyExc_IOError);
691 clearerr(f->f_fp);
692 return NULL;
694 #endif /* HAVE_FTRUNCATE */
696 static PyObject *
697 file_tell(PyFileObject *f)
699 Py_off_t pos;
701 if (f->f_fp == NULL)
702 return err_closed();
703 Py_BEGIN_ALLOW_THREADS
704 errno = 0;
705 pos = _portable_ftell(f->f_fp);
706 Py_END_ALLOW_THREADS
707 if (pos == -1) {
708 PyErr_SetFromErrno(PyExc_IOError);
709 clearerr(f->f_fp);
710 return NULL;
712 if (f->f_skipnextlf) {
713 int c;
714 c = GETC(f->f_fp);
715 if (c == '\n') {
716 f->f_newlinetypes |= NEWLINE_CRLF;
717 pos++;
718 f->f_skipnextlf = 0;
719 } else if (c != EOF) ungetc(c, f->f_fp);
721 #if !defined(HAVE_LARGEFILE_SUPPORT)
722 return PyInt_FromLong(pos);
723 #else
724 return PyLong_FromLongLong(pos);
725 #endif
728 static PyObject *
729 file_fileno(PyFileObject *f)
731 if (f->f_fp == NULL)
732 return err_closed();
733 return PyInt_FromLong((long) fileno(f->f_fp));
736 static PyObject *
737 file_flush(PyFileObject *f)
739 int res;
741 if (f->f_fp == NULL)
742 return err_closed();
743 Py_BEGIN_ALLOW_THREADS
744 errno = 0;
745 res = fflush(f->f_fp);
746 Py_END_ALLOW_THREADS
747 if (res != 0) {
748 PyErr_SetFromErrno(PyExc_IOError);
749 clearerr(f->f_fp);
750 return NULL;
752 Py_INCREF(Py_None);
753 return Py_None;
756 static PyObject *
757 file_isatty(PyFileObject *f)
759 long res;
760 if (f->f_fp == NULL)
761 return err_closed();
762 Py_BEGIN_ALLOW_THREADS
763 res = isatty((int)fileno(f->f_fp));
764 Py_END_ALLOW_THREADS
765 return PyBool_FromLong(res);
769 #if BUFSIZ < 8192
770 #define SMALLCHUNK 8192
771 #else
772 #define SMALLCHUNK BUFSIZ
773 #endif
775 #if SIZEOF_INT < 4
776 #define BIGCHUNK (512 * 32)
777 #else
778 #define BIGCHUNK (512 * 1024)
779 #endif
781 static size_t
782 new_buffersize(PyFileObject *f, size_t currentsize)
784 #ifdef HAVE_FSTAT
785 off_t pos, end;
786 struct stat st;
787 if (fstat(fileno(f->f_fp), &st) == 0) {
788 end = st.st_size;
789 /* The following is not a bug: we really need to call lseek()
790 *and* ftell(). The reason is that some stdio libraries
791 mistakenly flush their buffer when ftell() is called and
792 the lseek() call it makes fails, thereby throwing away
793 data that cannot be recovered in any way. To avoid this,
794 we first test lseek(), and only call ftell() if lseek()
795 works. We can't use the lseek() value either, because we
796 need to take the amount of buffered data into account.
797 (Yet another reason why stdio stinks. :-) */
798 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
799 if (pos >= 0) {
800 pos = ftell(f->f_fp);
802 if (pos < 0)
803 clearerr(f->f_fp);
804 if (end > pos && pos >= 0)
805 return currentsize + end - pos + 1;
806 /* Add 1 so if the file were to grow we'd notice. */
808 #endif
809 if (currentsize > SMALLCHUNK) {
810 /* Keep doubling until we reach BIGCHUNK;
811 then keep adding BIGCHUNK. */
812 if (currentsize <= BIGCHUNK)
813 return currentsize + currentsize;
814 else
815 return currentsize + BIGCHUNK;
817 return currentsize + SMALLCHUNK;
820 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
821 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
822 #else
823 #ifdef EWOULDBLOCK
824 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
825 #else
826 #ifdef EAGAIN
827 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
828 #else
829 #define BLOCKED_ERRNO(x) 0
830 #endif
831 #endif
832 #endif
834 static PyObject *
835 file_read(PyFileObject *f, PyObject *args)
837 long bytesrequested = -1;
838 size_t bytesread, buffersize, chunksize;
839 PyObject *v;
841 if (f->f_fp == NULL)
842 return err_closed();
843 /* refuse to mix with f.next() */
844 if (f->f_buf != NULL &&
845 (f->f_bufend - f->f_bufptr) > 0 &&
846 f->f_buf[0] != '\0')
847 return err_iterbuffered();
848 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
849 return NULL;
850 if (bytesrequested < 0)
851 buffersize = new_buffersize(f, (size_t)0);
852 else
853 buffersize = bytesrequested;
854 if (buffersize > PY_SSIZE_T_MAX) {
855 PyErr_SetString(PyExc_OverflowError,
856 "requested number of bytes is more than a Python string can hold");
857 return NULL;
859 v = PyString_FromStringAndSize((char *)NULL, buffersize);
860 if (v == NULL)
861 return NULL;
862 bytesread = 0;
863 for (;;) {
864 Py_BEGIN_ALLOW_THREADS
865 errno = 0;
866 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
867 buffersize - bytesread, f->f_fp, (PyObject *)f);
868 Py_END_ALLOW_THREADS
869 if (chunksize == 0) {
870 if (!ferror(f->f_fp))
871 break;
872 clearerr(f->f_fp);
873 /* When in non-blocking mode, data shouldn't
874 * be discarded if a blocking signal was
875 * received. That will also happen if
876 * chunksize != 0, but bytesread < buffersize. */
877 if (bytesread > 0 && BLOCKED_ERRNO(errno))
878 break;
879 PyErr_SetFromErrno(PyExc_IOError);
880 Py_DECREF(v);
881 return NULL;
883 bytesread += chunksize;
884 if (bytesread < buffersize) {
885 clearerr(f->f_fp);
886 break;
888 if (bytesrequested < 0) {
889 buffersize = new_buffersize(f, buffersize);
890 if (_PyString_Resize(&v, buffersize) < 0)
891 return NULL;
892 } else {
893 /* Got what was requested. */
894 break;
897 if (bytesread != buffersize)
898 _PyString_Resize(&v, bytesread);
899 return v;
902 static PyObject *
903 file_readinto(PyFileObject *f, PyObject *args)
905 char *ptr;
906 Py_ssize_t ntodo;
907 Py_ssize_t ndone, nnow;
909 if (f->f_fp == NULL)
910 return err_closed();
911 /* refuse to mix with f.next() */
912 if (f->f_buf != NULL &&
913 (f->f_bufend - f->f_bufptr) > 0 &&
914 f->f_buf[0] != '\0')
915 return err_iterbuffered();
916 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
917 return NULL;
918 ndone = 0;
919 while (ntodo > 0) {
920 Py_BEGIN_ALLOW_THREADS
921 errno = 0;
922 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
923 (PyObject *)f);
924 Py_END_ALLOW_THREADS
925 if (nnow == 0) {
926 if (!ferror(f->f_fp))
927 break;
928 PyErr_SetFromErrno(PyExc_IOError);
929 clearerr(f->f_fp);
930 return NULL;
932 ndone += nnow;
933 ntodo -= nnow;
935 return PyInt_FromSsize_t(ndone);
938 /**************************************************************************
939 Routine to get next line using platform fgets().
941 Under MSVC 6:
943 + MS threadsafe getc is very slow (multiple layers of function calls before+
944 after each character, to lock+unlock the stream).
945 + The stream-locking functions are MS-internal -- can't access them from user
946 code.
947 + There's nothing Tim could find in the MS C or platform SDK libraries that
948 can worm around this.
949 + MS fgets locks/unlocks only once per line; it's the only hook we have.
951 So we use fgets for speed(!), despite that it's painful.
953 MS realloc is also slow.
955 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
956 have):
957 Linux a wash
958 Solaris a wash
959 Tru64 Unix getline_via_fgets significantly faster
961 CAUTION: The C std isn't clear about this: in those cases where fgets
962 writes something into the buffer, can it write into any position beyond the
963 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
964 known on which it does; and it would be a strange way to code fgets. Still,
965 getline_via_fgets may not work correctly if it does. The std test
966 test_bufio.py should fail if platform fgets() routinely writes beyond the
967 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
968 **************************************************************************/
970 /* Use this routine if told to, or by default on non-get_unlocked()
971 * platforms unless told not to. Yikes! Let's spell that out:
972 * On a platform with getc_unlocked():
973 * By default, use getc_unlocked().
974 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
975 * On a platform without getc_unlocked():
976 * By default, use fgets().
977 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
979 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
980 #define USE_FGETS_IN_GETLINE
981 #endif
983 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
984 #undef USE_FGETS_IN_GETLINE
985 #endif
987 #ifdef USE_FGETS_IN_GETLINE
988 static PyObject*
989 getline_via_fgets(FILE *fp)
991 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
992 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
993 * to fill this much of the buffer with a known value in order to figure out
994 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
995 * than "most" lines, we waste time filling unused buffer slots. 100 is
996 * surely adequate for most peoples' email archives, chewing over source code,
997 * etc -- "regular old text files".
998 * MAXBUFSIZE is the maximum line length that lets us get away with the less
999 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
1000 * cautions about boosting that. 300 was chosen because the worst real-life
1001 * text-crunching job reported on Python-Dev was a mail-log crawler where over
1002 * half the lines were 254 chars.
1004 #define INITBUFSIZE 100
1005 #define MAXBUFSIZE 300
1006 char* p; /* temp */
1007 char buf[MAXBUFSIZE];
1008 PyObject* v; /* the string object result */
1009 char* pvfree; /* address of next free slot */
1010 char* pvend; /* address one beyond last free slot */
1011 size_t nfree; /* # of free buffer slots; pvend-pvfree */
1012 size_t total_v_size; /* total # of slots in buffer */
1013 size_t increment; /* amount to increment the buffer */
1014 size_t prev_v_size;
1016 /* Optimize for normal case: avoid _PyString_Resize if at all
1017 * possible via first reading into stack buffer "buf".
1019 total_v_size = INITBUFSIZE; /* start small and pray */
1020 pvfree = buf;
1021 for (;;) {
1022 Py_BEGIN_ALLOW_THREADS
1023 pvend = buf + total_v_size;
1024 nfree = pvend - pvfree;
1025 memset(pvfree, '\n', nfree);
1026 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1027 p = fgets(pvfree, (int)nfree, fp);
1028 Py_END_ALLOW_THREADS
1030 if (p == NULL) {
1031 clearerr(fp);
1032 if (PyErr_CheckSignals())
1033 return NULL;
1034 v = PyString_FromStringAndSize(buf, pvfree - buf);
1035 return v;
1037 /* fgets read *something* */
1038 p = memchr(pvfree, '\n', nfree);
1039 if (p != NULL) {
1040 /* Did the \n come from fgets or from us?
1041 * Since fgets stops at the first \n, and then writes
1042 * \0, if it's from fgets a \0 must be next. But if
1043 * that's so, it could not have come from us, since
1044 * the \n's we filled the buffer with have only more
1045 * \n's to the right.
1047 if (p+1 < pvend && *(p+1) == '\0') {
1048 /* It's from fgets: we win! In particular,
1049 * we haven't done any mallocs yet, and can
1050 * build the final result on the first try.
1052 ++p; /* include \n from fgets */
1054 else {
1055 /* Must be from us: fgets didn't fill the
1056 * buffer and didn't find a newline, so it
1057 * must be the last and newline-free line of
1058 * the file.
1060 assert(p > pvfree && *(p-1) == '\0');
1061 --p; /* don't include \0 from fgets */
1063 v = PyString_FromStringAndSize(buf, p - buf);
1064 return v;
1066 /* yuck: fgets overwrote all the newlines, i.e. the entire
1067 * buffer. So this line isn't over yet, or maybe it is but
1068 * we're exactly at EOF. If we haven't already, try using the
1069 * rest of the stack buffer.
1071 assert(*(pvend-1) == '\0');
1072 if (pvfree == buf) {
1073 pvfree = pvend - 1; /* overwrite trailing null */
1074 total_v_size = MAXBUFSIZE;
1076 else
1077 break;
1080 /* The stack buffer isn't big enough; malloc a string object and read
1081 * into its buffer.
1083 total_v_size = MAXBUFSIZE << 1;
1084 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1085 if (v == NULL)
1086 return v;
1087 /* copy over everything except the last null byte */
1088 memcpy(BUF(v), buf, MAXBUFSIZE-1);
1089 pvfree = BUF(v) + MAXBUFSIZE - 1;
1091 /* Keep reading stuff into v; if it ever ends successfully, break
1092 * after setting p one beyond the end of the line. The code here is
1093 * very much like the code above, except reads into v's buffer; see
1094 * the code above for detailed comments about the logic.
1096 for (;;) {
1097 Py_BEGIN_ALLOW_THREADS
1098 pvend = BUF(v) + total_v_size;
1099 nfree = pvend - pvfree;
1100 memset(pvfree, '\n', nfree);
1101 assert(nfree < INT_MAX);
1102 p = fgets(pvfree, (int)nfree, fp);
1103 Py_END_ALLOW_THREADS
1105 if (p == NULL) {
1106 clearerr(fp);
1107 if (PyErr_CheckSignals()) {
1108 Py_DECREF(v);
1109 return NULL;
1111 p = pvfree;
1112 break;
1114 p = memchr(pvfree, '\n', nfree);
1115 if (p != NULL) {
1116 if (p+1 < pvend && *(p+1) == '\0') {
1117 /* \n came from fgets */
1118 ++p;
1119 break;
1121 /* \n came from us; last line of file, no newline */
1122 assert(p > pvfree && *(p-1) == '\0');
1123 --p;
1124 break;
1126 /* expand buffer and try again */
1127 assert(*(pvend-1) == '\0');
1128 increment = total_v_size >> 2; /* mild exponential growth */
1129 prev_v_size = total_v_size;
1130 total_v_size += increment;
1131 /* check for overflow */
1132 if (total_v_size <= prev_v_size ||
1133 total_v_size > PY_SSIZE_T_MAX) {
1134 PyErr_SetString(PyExc_OverflowError,
1135 "line is longer than a Python string can hold");
1136 Py_DECREF(v);
1137 return NULL;
1139 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1140 return NULL;
1141 /* overwrite the trailing null byte */
1142 pvfree = BUF(v) + (prev_v_size - 1);
1144 if (BUF(v) + total_v_size != p)
1145 _PyString_Resize(&v, p - BUF(v));
1146 return v;
1147 #undef INITBUFSIZE
1148 #undef MAXBUFSIZE
1150 #endif /* ifdef USE_FGETS_IN_GETLINE */
1152 /* Internal routine to get a line.
1153 Size argument interpretation:
1154 > 0: max length;
1155 <= 0: read arbitrary line
1158 static PyObject *
1159 get_line(PyFileObject *f, int n)
1161 FILE *fp = f->f_fp;
1162 int c;
1163 char *buf, *end;
1164 size_t total_v_size; /* total # of slots in buffer */
1165 size_t used_v_size; /* # used slots in buffer */
1166 size_t increment; /* amount to increment the buffer */
1167 PyObject *v;
1168 int newlinetypes = f->f_newlinetypes;
1169 int skipnextlf = f->f_skipnextlf;
1170 int univ_newline = f->f_univ_newline;
1172 #if defined(USE_FGETS_IN_GETLINE)
1173 if (n <= 0 && !univ_newline )
1174 return getline_via_fgets(fp);
1175 #endif
1176 total_v_size = n > 0 ? n : 100;
1177 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1178 if (v == NULL)
1179 return NULL;
1180 buf = BUF(v);
1181 end = buf + total_v_size;
1183 for (;;) {
1184 Py_BEGIN_ALLOW_THREADS
1185 FLOCKFILE(fp);
1186 if (univ_newline) {
1187 c = 'x'; /* Shut up gcc warning */
1188 while ( buf != end && (c = GETC(fp)) != EOF ) {
1189 if (skipnextlf ) {
1190 skipnextlf = 0;
1191 if (c == '\n') {
1192 /* Seeing a \n here with
1193 * skipnextlf true means we
1194 * saw a \r before.
1196 newlinetypes |= NEWLINE_CRLF;
1197 c = GETC(fp);
1198 if (c == EOF) break;
1199 } else {
1200 newlinetypes |= NEWLINE_CR;
1203 if (c == '\r') {
1204 skipnextlf = 1;
1205 c = '\n';
1206 } else if ( c == '\n')
1207 newlinetypes |= NEWLINE_LF;
1208 *buf++ = c;
1209 if (c == '\n') break;
1211 if ( c == EOF && skipnextlf )
1212 newlinetypes |= NEWLINE_CR;
1213 } else /* If not universal newlines use the normal loop */
1214 while ((c = GETC(fp)) != EOF &&
1215 (*buf++ = c) != '\n' &&
1216 buf != end)
1218 FUNLOCKFILE(fp);
1219 Py_END_ALLOW_THREADS
1220 f->f_newlinetypes = newlinetypes;
1221 f->f_skipnextlf = skipnextlf;
1222 if (c == '\n')
1223 break;
1224 if (c == EOF) {
1225 if (ferror(fp)) {
1226 PyErr_SetFromErrno(PyExc_IOError);
1227 clearerr(fp);
1228 Py_DECREF(v);
1229 return NULL;
1231 clearerr(fp);
1232 if (PyErr_CheckSignals()) {
1233 Py_DECREF(v);
1234 return NULL;
1236 break;
1238 /* Must be because buf == end */
1239 if (n > 0)
1240 break;
1241 used_v_size = total_v_size;
1242 increment = total_v_size >> 2; /* mild exponential growth */
1243 total_v_size += increment;
1244 if (total_v_size > PY_SSIZE_T_MAX) {
1245 PyErr_SetString(PyExc_OverflowError,
1246 "line is longer than a Python string can hold");
1247 Py_DECREF(v);
1248 return NULL;
1250 if (_PyString_Resize(&v, total_v_size) < 0)
1251 return NULL;
1252 buf = BUF(v) + used_v_size;
1253 end = BUF(v) + total_v_size;
1256 used_v_size = buf - BUF(v);
1257 if (used_v_size != total_v_size)
1258 _PyString_Resize(&v, used_v_size);
1259 return v;
1262 /* External C interface */
1264 PyObject *
1265 PyFile_GetLine(PyObject *f, int n)
1267 PyObject *result;
1269 if (f == NULL) {
1270 PyErr_BadInternalCall();
1271 return NULL;
1274 if (PyFile_Check(f)) {
1275 PyFileObject *fo = (PyFileObject *)f;
1276 if (fo->f_fp == NULL)
1277 return err_closed();
1278 /* refuse to mix with f.next() */
1279 if (fo->f_buf != NULL &&
1280 (fo->f_bufend - fo->f_bufptr) > 0 &&
1281 fo->f_buf[0] != '\0')
1282 return err_iterbuffered();
1283 result = get_line(fo, n);
1285 else {
1286 PyObject *reader;
1287 PyObject *args;
1289 reader = PyObject_GetAttrString(f, "readline");
1290 if (reader == NULL)
1291 return NULL;
1292 if (n <= 0)
1293 args = PyTuple_New(0);
1294 else
1295 args = Py_BuildValue("(i)", n);
1296 if (args == NULL) {
1297 Py_DECREF(reader);
1298 return NULL;
1300 result = PyEval_CallObject(reader, args);
1301 Py_DECREF(reader);
1302 Py_DECREF(args);
1303 if (result != NULL && !PyString_Check(result) &&
1304 !PyUnicode_Check(result)) {
1305 Py_DECREF(result);
1306 result = NULL;
1307 PyErr_SetString(PyExc_TypeError,
1308 "object.readline() returned non-string");
1312 if (n < 0 && result != NULL && PyString_Check(result)) {
1313 char *s = PyString_AS_STRING(result);
1314 Py_ssize_t len = PyString_GET_SIZE(result);
1315 if (len == 0) {
1316 Py_DECREF(result);
1317 result = NULL;
1318 PyErr_SetString(PyExc_EOFError,
1319 "EOF when reading a line");
1321 else if (s[len-1] == '\n') {
1322 if (result->ob_refcnt == 1)
1323 _PyString_Resize(&result, len-1);
1324 else {
1325 PyObject *v;
1326 v = PyString_FromStringAndSize(s, len-1);
1327 Py_DECREF(result);
1328 result = v;
1332 #ifdef Py_USING_UNICODE
1333 if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1334 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1335 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1336 if (len == 0) {
1337 Py_DECREF(result);
1338 result = NULL;
1339 PyErr_SetString(PyExc_EOFError,
1340 "EOF when reading a line");
1342 else if (s[len-1] == '\n') {
1343 if (result->ob_refcnt == 1)
1344 PyUnicode_Resize(&result, len-1);
1345 else {
1346 PyObject *v;
1347 v = PyUnicode_FromUnicode(s, len-1);
1348 Py_DECREF(result);
1349 result = v;
1353 #endif
1354 return result;
1357 /* Python method */
1359 static PyObject *
1360 file_readline(PyFileObject *f, PyObject *args)
1362 int n = -1;
1364 if (f->f_fp == NULL)
1365 return err_closed();
1366 /* refuse to mix with f.next() */
1367 if (f->f_buf != NULL &&
1368 (f->f_bufend - f->f_bufptr) > 0 &&
1369 f->f_buf[0] != '\0')
1370 return err_iterbuffered();
1371 if (!PyArg_ParseTuple(args, "|i:readline", &n))
1372 return NULL;
1373 if (n == 0)
1374 return PyString_FromString("");
1375 if (n < 0)
1376 n = 0;
1377 return get_line(f, n);
1380 static PyObject *
1381 file_readlines(PyFileObject *f, PyObject *args)
1383 long sizehint = 0;
1384 PyObject *list;
1385 PyObject *line;
1386 char small_buffer[SMALLCHUNK];
1387 char *buffer = small_buffer;
1388 size_t buffersize = SMALLCHUNK;
1389 PyObject *big_buffer = NULL;
1390 size_t nfilled = 0;
1391 size_t nread;
1392 size_t totalread = 0;
1393 char *p, *q, *end;
1394 int err;
1395 int shortread = 0;
1397 if (f->f_fp == NULL)
1398 return err_closed();
1399 /* refuse to mix with f.next() */
1400 if (f->f_buf != NULL &&
1401 (f->f_bufend - f->f_bufptr) > 0 &&
1402 f->f_buf[0] != '\0')
1403 return err_iterbuffered();
1404 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1405 return NULL;
1406 if ((list = PyList_New(0)) == NULL)
1407 return NULL;
1408 for (;;) {
1409 if (shortread)
1410 nread = 0;
1411 else {
1412 Py_BEGIN_ALLOW_THREADS
1413 errno = 0;
1414 nread = Py_UniversalNewlineFread(buffer+nfilled,
1415 buffersize-nfilled, f->f_fp, (PyObject *)f);
1416 Py_END_ALLOW_THREADS
1417 shortread = (nread < buffersize-nfilled);
1419 if (nread == 0) {
1420 sizehint = 0;
1421 if (!ferror(f->f_fp))
1422 break;
1423 PyErr_SetFromErrno(PyExc_IOError);
1424 clearerr(f->f_fp);
1425 error:
1426 Py_DECREF(list);
1427 list = NULL;
1428 goto cleanup;
1430 totalread += nread;
1431 p = (char *)memchr(buffer+nfilled, '\n', nread);
1432 if (p == NULL) {
1433 /* Need a larger buffer to fit this line */
1434 nfilled += nread;
1435 buffersize *= 2;
1436 if (buffersize > PY_SSIZE_T_MAX) {
1437 PyErr_SetString(PyExc_OverflowError,
1438 "line is longer than a Python string can hold");
1439 goto error;
1441 if (big_buffer == NULL) {
1442 /* Create the big buffer */
1443 big_buffer = PyString_FromStringAndSize(
1444 NULL, buffersize);
1445 if (big_buffer == NULL)
1446 goto error;
1447 buffer = PyString_AS_STRING(big_buffer);
1448 memcpy(buffer, small_buffer, nfilled);
1450 else {
1451 /* Grow the big buffer */
1452 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1453 goto error;
1454 buffer = PyString_AS_STRING(big_buffer);
1456 continue;
1458 end = buffer+nfilled+nread;
1459 q = buffer;
1460 do {
1461 /* Process complete lines */
1462 p++;
1463 line = PyString_FromStringAndSize(q, p-q);
1464 if (line == NULL)
1465 goto error;
1466 err = PyList_Append(list, line);
1467 Py_DECREF(line);
1468 if (err != 0)
1469 goto error;
1470 q = p;
1471 p = (char *)memchr(q, '\n', end-q);
1472 } while (p != NULL);
1473 /* Move the remaining incomplete line to the start */
1474 nfilled = end-q;
1475 memmove(buffer, q, nfilled);
1476 if (sizehint > 0)
1477 if (totalread >= (size_t)sizehint)
1478 break;
1480 if (nfilled != 0) {
1481 /* Partial last line */
1482 line = PyString_FromStringAndSize(buffer, nfilled);
1483 if (line == NULL)
1484 goto error;
1485 if (sizehint > 0) {
1486 /* Need to complete the last line */
1487 PyObject *rest = get_line(f, 0);
1488 if (rest == NULL) {
1489 Py_DECREF(line);
1490 goto error;
1492 PyString_Concat(&line, rest);
1493 Py_DECREF(rest);
1494 if (line == NULL)
1495 goto error;
1497 err = PyList_Append(list, line);
1498 Py_DECREF(line);
1499 if (err != 0)
1500 goto error;
1502 cleanup:
1503 Py_XDECREF(big_buffer);
1504 return list;
1507 static PyObject *
1508 file_write(PyFileObject *f, PyObject *args)
1510 char *s;
1511 Py_ssize_t n, n2;
1512 if (f->f_fp == NULL)
1513 return err_closed();
1514 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1515 return NULL;
1516 f->f_softspace = 0;
1517 Py_BEGIN_ALLOW_THREADS
1518 errno = 0;
1519 n2 = fwrite(s, 1, n, f->f_fp);
1520 Py_END_ALLOW_THREADS
1521 if (n2 != n) {
1522 PyErr_SetFromErrno(PyExc_IOError);
1523 clearerr(f->f_fp);
1524 return NULL;
1526 Py_INCREF(Py_None);
1527 return Py_None;
1530 static PyObject *
1531 file_writelines(PyFileObject *f, PyObject *seq)
1533 #define CHUNKSIZE 1000
1534 PyObject *list, *line;
1535 PyObject *it; /* iter(seq) */
1536 PyObject *result;
1537 int index, islist;
1538 Py_ssize_t i, j, nwritten, len;
1540 assert(seq != NULL);
1541 if (f->f_fp == NULL)
1542 return err_closed();
1544 result = NULL;
1545 list = NULL;
1546 islist = PyList_Check(seq);
1547 if (islist)
1548 it = NULL;
1549 else {
1550 it = PyObject_GetIter(seq);
1551 if (it == NULL) {
1552 PyErr_SetString(PyExc_TypeError,
1553 "writelines() requires an iterable argument");
1554 return NULL;
1556 /* From here on, fail by going to error, to reclaim "it". */
1557 list = PyList_New(CHUNKSIZE);
1558 if (list == NULL)
1559 goto error;
1562 /* Strategy: slurp CHUNKSIZE lines into a private list,
1563 checking that they are all strings, then write that list
1564 without holding the interpreter lock, then come back for more. */
1565 for (index = 0; ; index += CHUNKSIZE) {
1566 if (islist) {
1567 Py_XDECREF(list);
1568 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1569 if (list == NULL)
1570 goto error;
1571 j = PyList_GET_SIZE(list);
1573 else {
1574 for (j = 0; j < CHUNKSIZE; j++) {
1575 line = PyIter_Next(it);
1576 if (line == NULL) {
1577 if (PyErr_Occurred())
1578 goto error;
1579 break;
1581 PyList_SetItem(list, j, line);
1584 if (j == 0)
1585 break;
1587 /* Check that all entries are indeed strings. If not,
1588 apply the same rules as for file.write() and
1589 convert the results to strings. This is slow, but
1590 seems to be the only way since all conversion APIs
1591 could potentially execute Python code. */
1592 for (i = 0; i < j; i++) {
1593 PyObject *v = PyList_GET_ITEM(list, i);
1594 if (!PyString_Check(v)) {
1595 const char *buffer;
1596 if (((f->f_binary &&
1597 PyObject_AsReadBuffer(v,
1598 (const void**)&buffer,
1599 &len)) ||
1600 PyObject_AsCharBuffer(v,
1601 &buffer,
1602 &len))) {
1603 PyErr_SetString(PyExc_TypeError,
1604 "writelines() argument must be a sequence of strings");
1605 goto error;
1607 line = PyString_FromStringAndSize(buffer,
1608 len);
1609 if (line == NULL)
1610 goto error;
1611 Py_DECREF(v);
1612 PyList_SET_ITEM(list, i, line);
1616 /* Since we are releasing the global lock, the
1617 following code may *not* execute Python code. */
1618 Py_BEGIN_ALLOW_THREADS
1619 f->f_softspace = 0;
1620 errno = 0;
1621 for (i = 0; i < j; i++) {
1622 line = PyList_GET_ITEM(list, i);
1623 len = PyString_GET_SIZE(line);
1624 nwritten = fwrite(PyString_AS_STRING(line),
1625 1, len, f->f_fp);
1626 if (nwritten != len) {
1627 Py_BLOCK_THREADS
1628 PyErr_SetFromErrno(PyExc_IOError);
1629 clearerr(f->f_fp);
1630 goto error;
1633 Py_END_ALLOW_THREADS
1635 if (j < CHUNKSIZE)
1636 break;
1639 Py_INCREF(Py_None);
1640 result = Py_None;
1641 error:
1642 Py_XDECREF(list);
1643 Py_XDECREF(it);
1644 return result;
1645 #undef CHUNKSIZE
1648 static PyObject *
1649 file_self(PyFileObject *f)
1651 if (f->f_fp == NULL)
1652 return err_closed();
1653 Py_INCREF(f);
1654 return (PyObject *)f;
1657 static PyObject *
1658 file_exit(PyObject *f, PyObject *args)
1660 PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1661 if (!ret)
1662 /* If error occurred, pass through */
1663 return NULL;
1664 Py_DECREF(ret);
1665 /* We cannot return the result of close since a true
1666 * value will be interpreted as "yes, swallow the
1667 * exception if one was raised inside the with block". */
1668 Py_RETURN_NONE;
1671 PyDoc_STRVAR(readline_doc,
1672 "readline([size]) -> next line from the file, as a string.\n"
1673 "\n"
1674 "Retain newline. A non-negative size argument limits the maximum\n"
1675 "number of bytes to return (an incomplete line may be returned then).\n"
1676 "Return an empty string at EOF.");
1678 PyDoc_STRVAR(read_doc,
1679 "read([size]) -> read at most size bytes, returned as a string.\n"
1680 "\n"
1681 "If the size argument is negative or omitted, read until EOF is reached.\n"
1682 "Notice that when in non-blocking mode, less data than what was requested\n"
1683 "may be returned, even if no size parameter was given.");
1685 PyDoc_STRVAR(write_doc,
1686 "write(str) -> None. Write string str to file.\n"
1687 "\n"
1688 "Note that due to buffering, flush() or close() may be needed before\n"
1689 "the file on disk reflects the data written.");
1691 PyDoc_STRVAR(fileno_doc,
1692 "fileno() -> integer \"file descriptor\".\n"
1693 "\n"
1694 "This is needed for lower-level file interfaces, such os.read().");
1696 PyDoc_STRVAR(seek_doc,
1697 "seek(offset[, whence]) -> None. Move to new file position.\n"
1698 "\n"
1699 "Argument offset is a byte count. Optional argument whence defaults to\n"
1700 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1701 "(move relative to current position, positive or negative), and 2 (move\n"
1702 "relative to end of file, usually negative, although many platforms allow\n"
1703 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1704 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1705 "undefined behavior."
1706 "\n"
1707 "Note that not all file objects are seekable.");
1709 #ifdef HAVE_FTRUNCATE
1710 PyDoc_STRVAR(truncate_doc,
1711 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1712 "\n"
1713 "Size defaults to the current file position, as returned by tell().");
1714 #endif
1716 PyDoc_STRVAR(tell_doc,
1717 "tell() -> current file position, an integer (may be a long integer).");
1719 PyDoc_STRVAR(readinto_doc,
1720 "readinto() -> Undocumented. Don't use this; it may go away.");
1722 PyDoc_STRVAR(readlines_doc,
1723 "readlines([size]) -> list of strings, each a line from the file.\n"
1724 "\n"
1725 "Call readline() repeatedly and return a list of the lines so read.\n"
1726 "The optional size argument, if given, is an approximate bound on the\n"
1727 "total number of bytes in the lines returned.");
1729 PyDoc_STRVAR(xreadlines_doc,
1730 "xreadlines() -> returns self.\n"
1731 "\n"
1732 "For backward compatibility. File objects now include the performance\n"
1733 "optimizations previously implemented in the xreadlines module.");
1735 PyDoc_STRVAR(writelines_doc,
1736 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1737 "\n"
1738 "Note that newlines are not added. The sequence can be any iterable object\n"
1739 "producing strings. This is equivalent to calling write() for each string.");
1741 PyDoc_STRVAR(flush_doc,
1742 "flush() -> None. Flush the internal I/O buffer.");
1744 PyDoc_STRVAR(close_doc,
1745 "close() -> None or (perhaps) an integer. Close the file.\n"
1746 "\n"
1747 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1748 "further I/O operations. close() may be called more than once without\n"
1749 "error. Some kinds of file objects (for example, opened by popen())\n"
1750 "may return an exit status upon closing.");
1752 PyDoc_STRVAR(isatty_doc,
1753 "isatty() -> true or false. True if the file is connected to a tty device.");
1755 PyDoc_STRVAR(enter_doc,
1756 "__enter__() -> self.");
1758 PyDoc_STRVAR(exit_doc,
1759 "__exit__(*excinfo) -> None. Closes the file.");
1761 static PyMethodDef file_methods[] = {
1762 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1763 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1764 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1765 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1766 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1767 #ifdef HAVE_FTRUNCATE
1768 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1769 #endif
1770 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1771 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1772 {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1773 {"xreadlines",(PyCFunction)file_self, METH_NOARGS, xreadlines_doc},
1774 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1775 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1776 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1777 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1778 {"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
1779 {"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc},
1780 {NULL, NULL} /* sentinel */
1783 #define OFF(x) offsetof(PyFileObject, x)
1785 static PyMemberDef file_memberlist[] = {
1786 {"softspace", T_INT, OFF(f_softspace), 0,
1787 "flag indicating that a space needs to be printed; used by print"},
1788 {"mode", T_OBJECT, OFF(f_mode), RO,
1789 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1790 {"name", T_OBJECT, OFF(f_name), RO,
1791 "file name"},
1792 {"encoding", T_OBJECT, OFF(f_encoding), RO,
1793 "file encoding"},
1794 /* getattr(f, "closed") is implemented without this table */
1795 {NULL} /* Sentinel */
1798 static PyObject *
1799 get_closed(PyFileObject *f, void *closure)
1801 return PyBool_FromLong((long)(f->f_fp == 0));
1803 static PyObject *
1804 get_newlines(PyFileObject *f, void *closure)
1806 switch (f->f_newlinetypes) {
1807 case NEWLINE_UNKNOWN:
1808 Py_INCREF(Py_None);
1809 return Py_None;
1810 case NEWLINE_CR:
1811 return PyString_FromString("\r");
1812 case NEWLINE_LF:
1813 return PyString_FromString("\n");
1814 case NEWLINE_CR|NEWLINE_LF:
1815 return Py_BuildValue("(ss)", "\r", "\n");
1816 case NEWLINE_CRLF:
1817 return PyString_FromString("\r\n");
1818 case NEWLINE_CR|NEWLINE_CRLF:
1819 return Py_BuildValue("(ss)", "\r", "\r\n");
1820 case NEWLINE_LF|NEWLINE_CRLF:
1821 return Py_BuildValue("(ss)", "\n", "\r\n");
1822 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1823 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1824 default:
1825 PyErr_Format(PyExc_SystemError,
1826 "Unknown newlines value 0x%x\n",
1827 f->f_newlinetypes);
1828 return NULL;
1832 static PyGetSetDef file_getsetlist[] = {
1833 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1834 {"newlines", (getter)get_newlines, NULL,
1835 "end-of-line convention used in this file"},
1836 {0},
1839 static void
1840 drop_readahead(PyFileObject *f)
1842 if (f->f_buf != NULL) {
1843 PyMem_Free(f->f_buf);
1844 f->f_buf = NULL;
1848 /* Make sure that file has a readahead buffer with at least one byte
1849 (unless at EOF) and no more than bufsize. Returns negative value on
1850 error, will set MemoryError if bufsize bytes cannot be allocated. */
1851 static int
1852 readahead(PyFileObject *f, int bufsize)
1854 Py_ssize_t chunksize;
1856 if (f->f_buf != NULL) {
1857 if( (f->f_bufend - f->f_bufptr) >= 1)
1858 return 0;
1859 else
1860 drop_readahead(f);
1862 if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
1863 PyErr_NoMemory();
1864 return -1;
1866 Py_BEGIN_ALLOW_THREADS
1867 errno = 0;
1868 chunksize = Py_UniversalNewlineFread(
1869 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1870 Py_END_ALLOW_THREADS
1871 if (chunksize == 0) {
1872 if (ferror(f->f_fp)) {
1873 PyErr_SetFromErrno(PyExc_IOError);
1874 clearerr(f->f_fp);
1875 drop_readahead(f);
1876 return -1;
1879 f->f_bufptr = f->f_buf;
1880 f->f_bufend = f->f_buf + chunksize;
1881 return 0;
1884 /* Used by file_iternext. The returned string will start with 'skip'
1885 uninitialized bytes followed by the remainder of the line. Don't be
1886 horrified by the recursive call: maximum recursion depth is limited by
1887 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1889 static PyStringObject *
1890 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1892 PyStringObject* s;
1893 char *bufptr;
1894 char *buf;
1895 Py_ssize_t len;
1897 if (f->f_buf == NULL)
1898 if (readahead(f, bufsize) < 0)
1899 return NULL;
1901 len = f->f_bufend - f->f_bufptr;
1902 if (len == 0)
1903 return (PyStringObject *)
1904 PyString_FromStringAndSize(NULL, skip);
1905 bufptr = (char *)memchr(f->f_bufptr, '\n', len);
1906 if (bufptr != NULL) {
1907 bufptr++; /* Count the '\n' */
1908 len = bufptr - f->f_bufptr;
1909 s = (PyStringObject *)
1910 PyString_FromStringAndSize(NULL, skip+len);
1911 if (s == NULL)
1912 return NULL;
1913 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1914 f->f_bufptr = bufptr;
1915 if (bufptr == f->f_bufend)
1916 drop_readahead(f);
1917 } else {
1918 bufptr = f->f_bufptr;
1919 buf = f->f_buf;
1920 f->f_buf = NULL; /* Force new readahead buffer */
1921 assert(skip+len < INT_MAX);
1922 s = readahead_get_line_skip(
1923 f, (int)(skip+len), bufsize + (bufsize>>2) );
1924 if (s == NULL) {
1925 PyMem_Free(buf);
1926 return NULL;
1928 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1929 PyMem_Free(buf);
1931 return s;
1934 /* A larger buffer size may actually decrease performance. */
1935 #define READAHEAD_BUFSIZE 8192
1937 static PyObject *
1938 file_iternext(PyFileObject *f)
1940 PyStringObject* l;
1942 if (f->f_fp == NULL)
1943 return err_closed();
1945 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1946 if (l == NULL || PyString_GET_SIZE(l) == 0) {
1947 Py_XDECREF(l);
1948 return NULL;
1950 return (PyObject *)l;
1954 static PyObject *
1955 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1957 PyObject *self;
1958 static PyObject *not_yet_string;
1960 assert(type != NULL && type->tp_alloc != NULL);
1962 if (not_yet_string == NULL) {
1963 not_yet_string = PyString_InternFromString("<uninitialized file>");
1964 if (not_yet_string == NULL)
1965 return NULL;
1968 self = type->tp_alloc(type, 0);
1969 if (self != NULL) {
1970 /* Always fill in the name and mode, so that nobody else
1971 needs to special-case NULLs there. */
1972 Py_INCREF(not_yet_string);
1973 ((PyFileObject *)self)->f_name = not_yet_string;
1974 Py_INCREF(not_yet_string);
1975 ((PyFileObject *)self)->f_mode = not_yet_string;
1976 Py_INCREF(Py_None);
1977 ((PyFileObject *)self)->f_encoding = Py_None;
1978 ((PyFileObject *)self)->weakreflist = NULL;
1980 return self;
1983 static int
1984 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1986 PyFileObject *foself = (PyFileObject *)self;
1987 int ret = 0;
1988 static char *kwlist[] = {"name", "mode", "buffering", 0};
1989 char *name = NULL;
1990 char *mode = "r";
1991 int bufsize = -1;
1992 int wideargument = 0;
1994 assert(PyFile_Check(self));
1995 if (foself->f_fp != NULL) {
1996 /* Have to close the existing file first. */
1997 PyObject *closeresult = file_close(foself);
1998 if (closeresult == NULL)
1999 return -1;
2000 Py_DECREF(closeresult);
2003 #ifdef Py_WIN_WIDE_FILENAMES
2004 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
2005 PyObject *po;
2006 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2007 kwlist, &po, &mode, &bufsize)) {
2008 wideargument = 1;
2009 if (fill_file_fields(foself, NULL, po, mode,
2010 fclose) == NULL)
2011 goto Error;
2012 } else {
2013 /* Drop the argument parsing error as narrow
2014 strings are also valid. */
2015 PyErr_Clear();
2018 #endif
2020 if (!wideargument) {
2021 PyObject *o_name;
2023 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2024 Py_FileSystemDefaultEncoding,
2025 &name,
2026 &mode, &bufsize))
2027 return -1;
2029 /* We parse again to get the name as a PyObject */
2030 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2031 kwlist, &o_name, &mode,
2032 &bufsize))
2033 goto Error;
2035 if (fill_file_fields(foself, NULL, o_name, mode,
2036 fclose) == NULL)
2037 goto Error;
2039 if (open_the_file(foself, name, mode) == NULL)
2040 goto Error;
2041 foself->f_setbuf = NULL;
2042 PyFile_SetBufSize(self, bufsize);
2043 goto Done;
2045 Error:
2046 ret = -1;
2047 /* fall through */
2048 Done:
2049 PyMem_Free(name); /* free the encoded string */
2050 return ret;
2053 PyDoc_VAR(file_doc) =
2054 PyDoc_STR(
2055 "file(name[, mode[, buffering]]) -> file object\n"
2056 "\n"
2057 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2058 "writing or appending. The file will be created if it doesn't exist\n"
2059 "when opened for writing or appending; it will be truncated when\n"
2060 "opened for writing. Add a 'b' to the mode for binary files.\n"
2061 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2062 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2063 "buffered, and larger numbers specify the buffer size. The preferred way\n"
2064 "to open a file is with the builtin open() function.\n"
2066 PyDoc_STR(
2067 "Add a 'U' to mode to open the file for input with universal newline\n"
2068 "support. Any line ending in the input file will be seen as a '\\n'\n"
2069 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
2070 "the value for this attribute is one of None (no newline read yet),\n"
2071 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2072 "\n"
2073 "'U' cannot be combined with 'w' or '+' mode.\n"
2076 PyTypeObject PyFile_Type = {
2077 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2078 "file",
2079 sizeof(PyFileObject),
2081 (destructor)file_dealloc, /* tp_dealloc */
2082 0, /* tp_print */
2083 0, /* tp_getattr */
2084 0, /* tp_setattr */
2085 0, /* tp_compare */
2086 (reprfunc)file_repr, /* tp_repr */
2087 0, /* tp_as_number */
2088 0, /* tp_as_sequence */
2089 0, /* tp_as_mapping */
2090 0, /* tp_hash */
2091 0, /* tp_call */
2092 0, /* tp_str */
2093 PyObject_GenericGetAttr, /* tp_getattro */
2094 /* softspace is writable: we must supply tp_setattro */
2095 PyObject_GenericSetAttr, /* tp_setattro */
2096 0, /* tp_as_buffer */
2097 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2098 file_doc, /* tp_doc */
2099 0, /* tp_traverse */
2100 0, /* tp_clear */
2101 0, /* tp_richcompare */
2102 offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
2103 (getiterfunc)file_self, /* tp_iter */
2104 (iternextfunc)file_iternext, /* tp_iternext */
2105 file_methods, /* tp_methods */
2106 file_memberlist, /* tp_members */
2107 file_getsetlist, /* tp_getset */
2108 0, /* tp_base */
2109 0, /* tp_dict */
2110 0, /* tp_descr_get */
2111 0, /* tp_descr_set */
2112 0, /* tp_dictoffset */
2113 file_init, /* tp_init */
2114 PyType_GenericAlloc, /* tp_alloc */
2115 file_new, /* tp_new */
2116 PyObject_Del, /* tp_free */
2119 /* Interface for the 'soft space' between print items. */
2122 PyFile_SoftSpace(PyObject *f, int newflag)
2124 long oldflag = 0;
2125 if (f == NULL) {
2126 /* Do nothing */
2128 else if (PyFile_Check(f)) {
2129 oldflag = ((PyFileObject *)f)->f_softspace;
2130 ((PyFileObject *)f)->f_softspace = newflag;
2132 else {
2133 PyObject *v;
2134 v = PyObject_GetAttrString(f, "softspace");
2135 if (v == NULL)
2136 PyErr_Clear();
2137 else {
2138 if (PyInt_Check(v))
2139 oldflag = PyInt_AsLong(v);
2140 assert(oldflag < INT_MAX);
2141 Py_DECREF(v);
2143 v = PyInt_FromLong((long)newflag);
2144 if (v == NULL)
2145 PyErr_Clear();
2146 else {
2147 if (PyObject_SetAttrString(f, "softspace", v) != 0)
2148 PyErr_Clear();
2149 Py_DECREF(v);
2152 return (int)oldflag;
2155 /* Interfaces to write objects/strings to file-like objects */
2158 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2160 PyObject *writer, *value, *args, *result;
2161 if (f == NULL) {
2162 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2163 return -1;
2165 else if (PyFile_Check(f)) {
2166 FILE *fp = PyFile_AsFile(f);
2167 #ifdef Py_USING_UNICODE
2168 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2169 int result;
2170 #endif
2171 if (fp == NULL) {
2172 err_closed();
2173 return -1;
2175 #ifdef Py_USING_UNICODE
2176 if ((flags & Py_PRINT_RAW) &&
2177 PyUnicode_Check(v) && enc != Py_None) {
2178 char *cenc = PyString_AS_STRING(enc);
2179 value = PyUnicode_AsEncodedString(v, cenc, "strict");
2180 if (value == NULL)
2181 return -1;
2182 } else {
2183 value = v;
2184 Py_INCREF(value);
2186 result = PyObject_Print(value, fp, flags);
2187 Py_DECREF(value);
2188 return result;
2189 #else
2190 return PyObject_Print(v, fp, flags);
2191 #endif
2193 writer = PyObject_GetAttrString(f, "write");
2194 if (writer == NULL)
2195 return -1;
2196 if (flags & Py_PRINT_RAW) {
2197 if (PyUnicode_Check(v)) {
2198 value = v;
2199 Py_INCREF(value);
2200 } else
2201 value = PyObject_Str(v);
2203 else
2204 value = PyObject_Repr(v);
2205 if (value == NULL) {
2206 Py_DECREF(writer);
2207 return -1;
2209 args = PyTuple_Pack(1, value);
2210 if (args == NULL) {
2211 Py_DECREF(value);
2212 Py_DECREF(writer);
2213 return -1;
2215 result = PyEval_CallObject(writer, args);
2216 Py_DECREF(args);
2217 Py_DECREF(value);
2218 Py_DECREF(writer);
2219 if (result == NULL)
2220 return -1;
2221 Py_DECREF(result);
2222 return 0;
2226 PyFile_WriteString(const char *s, PyObject *f)
2228 if (f == NULL) {
2229 /* Should be caused by a pre-existing error */
2230 if (!PyErr_Occurred())
2231 PyErr_SetString(PyExc_SystemError,
2232 "null file for PyFile_WriteString");
2233 return -1;
2235 else if (PyFile_Check(f)) {
2236 FILE *fp = PyFile_AsFile(f);
2237 if (fp == NULL) {
2238 err_closed();
2239 return -1;
2241 Py_BEGIN_ALLOW_THREADS
2242 fputs(s, fp);
2243 Py_END_ALLOW_THREADS
2244 return 0;
2246 else if (!PyErr_Occurred()) {
2247 PyObject *v = PyString_FromString(s);
2248 int err;
2249 if (v == NULL)
2250 return -1;
2251 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2252 Py_DECREF(v);
2253 return err;
2255 else
2256 return -1;
2259 /* Try to get a file-descriptor from a Python object. If the object
2260 is an integer or long integer, its value is returned. If not, the
2261 object's fileno() method is called if it exists; the method must return
2262 an integer or long integer, which is returned as the file descriptor value.
2263 -1 is returned on failure.
2266 int PyObject_AsFileDescriptor(PyObject *o)
2268 int fd;
2269 PyObject *meth;
2271 if (PyInt_Check(o)) {
2272 fd = PyInt_AsLong(o);
2274 else if (PyLong_Check(o)) {
2275 fd = PyLong_AsLong(o);
2277 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2279 PyObject *fno = PyEval_CallObject(meth, NULL);
2280 Py_DECREF(meth);
2281 if (fno == NULL)
2282 return -1;
2284 if (PyInt_Check(fno)) {
2285 fd = PyInt_AsLong(fno);
2286 Py_DECREF(fno);
2288 else if (PyLong_Check(fno)) {
2289 fd = PyLong_AsLong(fno);
2290 Py_DECREF(fno);
2292 else {
2293 PyErr_SetString(PyExc_TypeError,
2294 "fileno() returned a non-integer");
2295 Py_DECREF(fno);
2296 return -1;
2299 else {
2300 PyErr_SetString(PyExc_TypeError,
2301 "argument must be an int, or have a fileno() method.");
2302 return -1;
2305 if (fd < 0) {
2306 PyErr_Format(PyExc_ValueError,
2307 "file descriptor cannot be a negative integer (%i)",
2308 fd);
2309 return -1;
2311 return fd;
2314 /* From here on we need access to the real fgets and fread */
2315 #undef fgets
2316 #undef fread
2319 ** Py_UniversalNewlineFgets is an fgets variation that understands
2320 ** all of \r, \n and \r\n conventions.
2321 ** The stream should be opened in binary mode.
2322 ** If fobj is NULL the routine always does newline conversion, and
2323 ** it may peek one char ahead to gobble the second char in \r\n.
2324 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2325 ** is no readahead but in stead a flag is used to skip a following
2326 ** \n on the next read. Also, if the file is open in binary mode
2327 ** the whole conversion is skipped. Finally, the routine keeps track of
2328 ** the different types of newlines seen.
2329 ** Note that we need no error handling: fgets() treats error and eof
2330 ** identically.
2332 char *
2333 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2335 char *p = buf;
2336 int c;
2337 int newlinetypes = 0;
2338 int skipnextlf = 0;
2339 int univ_newline = 1;
2341 if (fobj) {
2342 if (!PyFile_Check(fobj)) {
2343 errno = ENXIO; /* What can you do... */
2344 return NULL;
2346 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2347 if ( !univ_newline )
2348 return fgets(buf, n, stream);
2349 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2350 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2352 FLOCKFILE(stream);
2353 c = 'x'; /* Shut up gcc warning */
2354 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2355 if (skipnextlf ) {
2356 skipnextlf = 0;
2357 if (c == '\n') {
2358 /* Seeing a \n here with skipnextlf true
2359 ** means we saw a \r before.
2361 newlinetypes |= NEWLINE_CRLF;
2362 c = GETC(stream);
2363 if (c == EOF) break;
2364 } else {
2366 ** Note that c == EOF also brings us here,
2367 ** so we're okay if the last char in the file
2368 ** is a CR.
2370 newlinetypes |= NEWLINE_CR;
2373 if (c == '\r') {
2374 /* A \r is translated into a \n, and we skip
2375 ** an adjacent \n, if any. We don't set the
2376 ** newlinetypes flag until we've seen the next char.
2378 skipnextlf = 1;
2379 c = '\n';
2380 } else if ( c == '\n') {
2381 newlinetypes |= NEWLINE_LF;
2383 *p++ = c;
2384 if (c == '\n') break;
2386 if ( c == EOF && skipnextlf )
2387 newlinetypes |= NEWLINE_CR;
2388 FUNLOCKFILE(stream);
2389 *p = '\0';
2390 if (fobj) {
2391 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2392 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2393 } else if ( skipnextlf ) {
2394 /* If we have no file object we cannot save the
2395 ** skipnextlf flag. We have to readahead, which
2396 ** will cause a pause if we're reading from an
2397 ** interactive stream, but that is very unlikely
2398 ** unless we're doing something silly like
2399 ** execfile("/dev/tty").
2401 c = GETC(stream);
2402 if ( c != '\n' )
2403 ungetc(c, stream);
2405 if (p == buf)
2406 return NULL;
2407 return buf;
2411 ** Py_UniversalNewlineFread is an fread variation that understands
2412 ** all of \r, \n and \r\n conventions.
2413 ** The stream should be opened in binary mode.
2414 ** fobj must be a PyFileObject. In this case there
2415 ** is no readahead but in stead a flag is used to skip a following
2416 ** \n on the next read. Also, if the file is open in binary mode
2417 ** the whole conversion is skipped. Finally, the routine keeps track of
2418 ** the different types of newlines seen.
2420 size_t
2421 Py_UniversalNewlineFread(char *buf, size_t n,
2422 FILE *stream, PyObject *fobj)
2424 char *dst = buf;
2425 PyFileObject *f = (PyFileObject *)fobj;
2426 int newlinetypes, skipnextlf;
2428 assert(buf != NULL);
2429 assert(stream != NULL);
2431 if (!fobj || !PyFile_Check(fobj)) {
2432 errno = ENXIO; /* What can you do... */
2433 return 0;
2435 if (!f->f_univ_newline)
2436 return fread(buf, 1, n, stream);
2437 newlinetypes = f->f_newlinetypes;
2438 skipnextlf = f->f_skipnextlf;
2439 /* Invariant: n is the number of bytes remaining to be filled
2440 * in the buffer.
2442 while (n) {
2443 size_t nread;
2444 int shortread;
2445 char *src = dst;
2447 nread = fread(dst, 1, n, stream);
2448 assert(nread <= n);
2449 if (nread == 0)
2450 break;
2452 n -= nread; /* assuming 1 byte out for each in; will adjust */
2453 shortread = n != 0; /* true iff EOF or error */
2454 while (nread--) {
2455 char c = *src++;
2456 if (c == '\r') {
2457 /* Save as LF and set flag to skip next LF. */
2458 *dst++ = '\n';
2459 skipnextlf = 1;
2461 else if (skipnextlf && c == '\n') {
2462 /* Skip LF, and remember we saw CR LF. */
2463 skipnextlf = 0;
2464 newlinetypes |= NEWLINE_CRLF;
2465 ++n;
2467 else {
2468 /* Normal char to be stored in buffer. Also
2469 * update the newlinetypes flag if either this
2470 * is an LF or the previous char was a CR.
2472 if (c == '\n')
2473 newlinetypes |= NEWLINE_LF;
2474 else if (skipnextlf)
2475 newlinetypes |= NEWLINE_CR;
2476 *dst++ = c;
2477 skipnextlf = 0;
2480 if (shortread) {
2481 /* If this is EOF, update type flags. */
2482 if (skipnextlf && feof(stream))
2483 newlinetypes |= NEWLINE_CR;
2484 break;
2487 f->f_newlinetypes = newlinetypes;
2488 f->f_skipnextlf = skipnextlf;
2489 return dst - buf;
2492 #ifdef __cplusplus
2494 #endif