Exceptions raised during renaming in rotating file handlers are now passed to handleE...
[python.git] / Objects / fileobject.c
blobb34dd5259269f6f9539c150f13041dcc45b5736e
1 /* File object implementation */
3 #include "Python.h"
4 #include "structmember.h"
6 #ifndef DONT_HAVE_SYS_TYPES_H
7 #include <sys/types.h>
8 #endif /* DONT_HAVE_SYS_TYPES_H */
10 #ifdef MS_WINDOWS
11 #define fileno _fileno
12 /* can simulate truncate with Win32 API functions; see file_truncate */
13 #define HAVE_FTRUNCATE
14 #define WIN32_LEAN_AND_MEAN
15 #include <windows.h>
16 #endif
18 #ifdef _MSC_VER
19 /* Need GetVersion to see if on NT so safe to use _wfopen */
20 #define WIN32_LEAN_AND_MEAN
21 #include <windows.h>
22 #endif /* _MSC_VER */
24 #if defined(PYOS_OS2) && defined(PYCC_GCC)
25 #include <io.h>
26 #endif
28 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
30 #ifndef DONT_HAVE_ERRNO_H
31 #include <errno.h>
32 #endif
34 #ifdef HAVE_GETC_UNLOCKED
35 #define GETC(f) getc_unlocked(f)
36 #define FLOCKFILE(f) flockfile(f)
37 #define FUNLOCKFILE(f) funlockfile(f)
38 #else
39 #define GETC(f) getc(f)
40 #define FLOCKFILE(f)
41 #define FUNLOCKFILE(f)
42 #endif
44 /* Bits in f_newlinetypes */
45 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
46 #define NEWLINE_CR 1 /* \r newline seen */
47 #define NEWLINE_LF 2 /* \n newline seen */
48 #define NEWLINE_CRLF 4 /* \r\n newline seen */
50 FILE *
51 PyFile_AsFile(PyObject *f)
53 if (f == NULL || !PyFile_Check(f))
54 return NULL;
55 else
56 return ((PyFileObject *)f)->f_fp;
59 PyObject *
60 PyFile_Name(PyObject *f)
62 if (f == NULL || !PyFile_Check(f))
63 return NULL;
64 else
65 return ((PyFileObject *)f)->f_name;
68 /* On Unix, fopen will succeed for directories.
69 In Python, there should be no file objects referring to
70 directories, so we need a check. */
72 static PyFileObject*
73 dircheck(PyFileObject* f)
75 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
76 struct stat buf;
77 if (f->f_fp == NULL)
78 return f;
79 if (fstat(fileno(f->f_fp), &buf) == 0 &&
80 S_ISDIR(buf.st_mode)) {
81 #ifdef HAVE_STRERROR
82 char *msg = strerror(EISDIR);
83 #else
84 char *msg = "Is a directory";
85 #endif
86 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
87 EISDIR, msg);
88 PyErr_SetObject(PyExc_IOError, exc);
89 Py_XDECREF(exc);
90 return NULL;
92 #endif
93 return f;
97 static PyObject *
98 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
99 int (*close)(FILE *))
101 assert(f != NULL);
102 assert(PyFile_Check(f));
103 assert(f->f_fp == NULL);
105 Py_DECREF(f->f_name);
106 Py_DECREF(f->f_mode);
107 Py_DECREF(f->f_encoding);
109 Py_INCREF (name);
110 f->f_name = name;
112 f->f_mode = PyString_FromString(mode);
114 f->f_close = close;
115 f->f_softspace = 0;
116 f->f_binary = strchr(mode,'b') != NULL;
117 f->f_buf = NULL;
118 f->f_univ_newline = (strchr(mode, 'U') != NULL);
119 f->f_newlinetypes = NEWLINE_UNKNOWN;
120 f->f_skipnextlf = 0;
121 Py_INCREF(Py_None);
122 f->f_encoding = Py_None;
124 if (f->f_name == NULL || f->f_mode == NULL)
125 return NULL;
126 f->f_fp = fp;
127 f = dircheck(f);
128 return (PyObject *) f;
131 /* check for known incorrect mode strings - problem is, platforms are
132 free to accept any mode characters they like and are supposed to
133 ignore stuff they don't understand... write or append mode with
134 universal newline support is expressly forbidden by PEP 278. */
135 /* zero return is kewl - one is un-kewl */
136 static int
137 check_the_mode(char *mode)
139 size_t len = strlen(mode);
141 switch (len) {
142 case 0:
143 PyErr_SetString(PyExc_ValueError, "empty mode string");
144 return 1;
146 /* reject wU, aU */
147 case 2:
148 switch (mode[0]) {
149 case 'w':
150 case 'a':
151 if (mode[1] == 'U') {
152 PyErr_SetString(PyExc_ValueError,
153 "invalid mode string");
154 return 1;
156 break;
158 break;
160 /* reject w+U, a+U, wU+, aU+ */
161 case 3:
162 switch (mode[0]) {
163 case 'w':
164 case 'a':
165 if ((mode[1] == '+' && mode[2] == 'U') ||
166 (mode[1] == 'U' && mode[2] == '+')) {
167 PyErr_SetString(PyExc_ValueError,
168 "invalid mode string");
169 return 1;
171 break;
173 break;
176 return 0;
179 static PyObject *
180 open_the_file(PyFileObject *f, char *name, char *mode)
182 assert(f != NULL);
183 assert(PyFile_Check(f));
184 #ifdef MS_WINDOWS
185 /* windows ignores the passed name in order to support Unicode */
186 assert(f->f_name != NULL);
187 #else
188 assert(name != NULL);
189 #endif
190 assert(mode != NULL);
191 assert(f->f_fp == NULL);
193 if (check_the_mode(mode))
194 return NULL;
196 /* rexec.py can't stop a user from getting the file() constructor --
197 all they have to do is get *any* file object f, and then do
198 type(f). Here we prevent them from doing damage with it. */
199 if (PyEval_GetRestricted()) {
200 PyErr_SetString(PyExc_IOError,
201 "file() constructor not accessible in restricted mode");
202 return NULL;
204 errno = 0;
206 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
207 mode = "rb";
208 #ifdef MS_WINDOWS
209 if (PyUnicode_Check(f->f_name)) {
210 PyObject *wmode;
211 wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
212 if (f->f_name && wmode) {
213 Py_BEGIN_ALLOW_THREADS
214 /* PyUnicode_AS_UNICODE OK without thread
215 lock as it is a simple dereference. */
216 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
217 PyUnicode_AS_UNICODE(wmode));
218 Py_END_ALLOW_THREADS
220 Py_XDECREF(wmode);
222 #endif
223 if (NULL == f->f_fp && NULL != name) {
224 Py_BEGIN_ALLOW_THREADS
225 f->f_fp = fopen(name, mode);
226 Py_END_ALLOW_THREADS
229 if (f->f_fp == NULL) {
230 #ifdef _MSC_VER
231 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
232 * across all Windows flavors. When it sets EINVAL varies
233 * across Windows flavors, the exact conditions aren't
234 * documented, and the answer lies in the OS's implementation
235 * of Win32's CreateFile function (whose source is secret).
236 * Seems the best we can do is map EINVAL to ENOENT.
238 if (errno == 0) /* bad mode string */
239 errno = EINVAL;
240 else if (errno == EINVAL) /* unknown, but not a mode string */
241 errno = ENOENT;
242 #endif
243 if (errno == EINVAL)
244 PyErr_Format(PyExc_IOError, "invalid mode: %s",
245 mode);
246 else
247 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
248 f = NULL;
250 if (f != NULL)
251 f = dircheck(f);
252 return (PyObject *)f;
255 PyObject *
256 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
258 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
259 NULL, NULL);
260 if (f != NULL) {
261 PyObject *o_name = PyString_FromString(name);
262 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
263 Py_DECREF(f);
264 f = NULL;
266 Py_DECREF(o_name);
268 return (PyObject *) f;
271 PyObject *
272 PyFile_FromString(char *name, char *mode)
274 extern int fclose(FILE *);
275 PyFileObject *f;
277 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
278 if (f != NULL) {
279 if (open_the_file(f, name, mode) == NULL) {
280 Py_DECREF(f);
281 f = NULL;
284 return (PyObject *)f;
287 void
288 PyFile_SetBufSize(PyObject *f, int bufsize)
290 PyFileObject *file = (PyFileObject *)f;
291 if (bufsize >= 0) {
292 int type;
293 switch (bufsize) {
294 case 0:
295 type = _IONBF;
296 break;
297 #ifdef HAVE_SETVBUF
298 case 1:
299 type = _IOLBF;
300 bufsize = BUFSIZ;
301 break;
302 #endif
303 default:
304 type = _IOFBF;
305 #ifndef HAVE_SETVBUF
306 bufsize = BUFSIZ;
307 #endif
308 break;
310 fflush(file->f_fp);
311 if (type == _IONBF) {
312 PyMem_Free(file->f_setbuf);
313 file->f_setbuf = NULL;
314 } else {
315 file->f_setbuf = PyMem_Realloc(file->f_setbuf, bufsize);
317 #ifdef HAVE_SETVBUF
318 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
319 #else /* !HAVE_SETVBUF */
320 setbuf(file->f_fp, file->f_setbuf);
321 #endif /* !HAVE_SETVBUF */
325 /* Set the encoding used to output Unicode strings.
326 Returh 1 on success, 0 on failure. */
329 PyFile_SetEncoding(PyObject *f, const char *enc)
331 PyFileObject *file = (PyFileObject*)f;
332 PyObject *str = PyString_FromString(enc);
333 if (!str)
334 return 0;
335 Py_DECREF(file->f_encoding);
336 file->f_encoding = str;
337 return 1;
340 static PyObject *
341 err_closed(void)
343 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
344 return NULL;
347 static void drop_readahead(PyFileObject *);
349 /* Methods */
351 static void
352 file_dealloc(PyFileObject *f)
354 int sts = 0;
355 if (f->weakreflist != NULL)
356 PyObject_ClearWeakRefs((PyObject *) f);
357 if (f->f_fp != NULL && f->f_close != NULL) {
358 Py_BEGIN_ALLOW_THREADS
359 sts = (*f->f_close)(f->f_fp);
360 Py_END_ALLOW_THREADS
361 if (sts == EOF)
362 #ifdef HAVE_STRERROR
363 PySys_WriteStderr("close failed: [Errno %d] %s\n", errno, strerror(errno));
364 #else
365 PySys_WriteStderr("close failed: [Errno %d]\n", errno);
366 #endif
368 PyMem_Free(f->f_setbuf);
369 Py_XDECREF(f->f_name);
370 Py_XDECREF(f->f_mode);
371 Py_XDECREF(f->f_encoding);
372 drop_readahead(f);
373 f->ob_type->tp_free((PyObject *)f);
376 static PyObject *
377 file_repr(PyFileObject *f)
379 if (PyUnicode_Check(f->f_name)) {
380 #ifdef Py_USING_UNICODE
381 PyObject *ret = NULL;
382 PyObject *name;
383 name = PyUnicode_AsUnicodeEscapeString(f->f_name);
384 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
385 f->f_fp == NULL ? "closed" : "open",
386 PyString_AsString(name),
387 PyString_AsString(f->f_mode),
389 Py_XDECREF(name);
390 return ret;
391 #endif
392 } else {
393 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
394 f->f_fp == NULL ? "closed" : "open",
395 PyString_AsString(f->f_name),
396 PyString_AsString(f->f_mode),
401 static PyObject *
402 file_close(PyFileObject *f)
404 int sts = 0;
405 if (f->f_fp != NULL) {
406 if (f->f_close != NULL) {
407 Py_BEGIN_ALLOW_THREADS
408 errno = 0;
409 sts = (*f->f_close)(f->f_fp);
410 Py_END_ALLOW_THREADS
412 f->f_fp = NULL;
414 PyMem_Free(f->f_setbuf);
415 f->f_setbuf = NULL;
416 if (sts == EOF)
417 return PyErr_SetFromErrno(PyExc_IOError);
418 if (sts != 0)
419 return PyInt_FromLong((long)sts);
420 Py_INCREF(Py_None);
421 return Py_None;
425 /* Our very own off_t-like type, 64-bit if possible */
426 #if !defined(HAVE_LARGEFILE_SUPPORT)
427 typedef off_t Py_off_t;
428 #elif SIZEOF_OFF_T >= 8
429 typedef off_t Py_off_t;
430 #elif SIZEOF_FPOS_T >= 8
431 typedef fpos_t Py_off_t;
432 #else
433 #error "Large file support, but neither off_t nor fpos_t is large enough."
434 #endif
437 /* a portable fseek() function
438 return 0 on success, non-zero on failure (with errno set) */
439 static int
440 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
442 #if !defined(HAVE_LARGEFILE_SUPPORT)
443 return fseek(fp, offset, whence);
444 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
445 return fseeko(fp, offset, whence);
446 #elif defined(HAVE_FSEEK64)
447 return fseek64(fp, offset, whence);
448 #elif defined(__BEOS__)
449 return _fseek(fp, offset, whence);
450 #elif SIZEOF_FPOS_T >= 8
451 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
452 and fgetpos() to implement fseek()*/
453 fpos_t pos;
454 switch (whence) {
455 case SEEK_END:
456 #ifdef MS_WINDOWS
457 fflush(fp);
458 if (_lseeki64(fileno(fp), 0, 2) == -1)
459 return -1;
460 #else
461 if (fseek(fp, 0, SEEK_END) != 0)
462 return -1;
463 #endif
464 /* fall through */
465 case SEEK_CUR:
466 if (fgetpos(fp, &pos) != 0)
467 return -1;
468 offset += pos;
469 break;
470 /* case SEEK_SET: break; */
472 return fsetpos(fp, &offset);
473 #else
474 #error "Large file support, but no way to fseek."
475 #endif
479 /* a portable ftell() function
480 Return -1 on failure with errno set appropriately, current file
481 position on success */
482 static Py_off_t
483 _portable_ftell(FILE* fp)
485 #if !defined(HAVE_LARGEFILE_SUPPORT)
486 return ftell(fp);
487 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
488 return ftello(fp);
489 #elif defined(HAVE_FTELL64)
490 return ftell64(fp);
491 #elif SIZEOF_FPOS_T >= 8
492 fpos_t pos;
493 if (fgetpos(fp, &pos) != 0)
494 return -1;
495 return pos;
496 #else
497 #error "Large file support, but no way to ftell."
498 #endif
502 static PyObject *
503 file_seek(PyFileObject *f, PyObject *args)
505 int whence;
506 int ret;
507 Py_off_t offset;
508 PyObject *offobj;
510 if (f->f_fp == NULL)
511 return err_closed();
512 drop_readahead(f);
513 whence = 0;
514 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
515 return NULL;
516 #if !defined(HAVE_LARGEFILE_SUPPORT)
517 offset = PyInt_AsLong(offobj);
518 #else
519 offset = PyLong_Check(offobj) ?
520 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
521 #endif
522 if (PyErr_Occurred())
523 return NULL;
525 Py_BEGIN_ALLOW_THREADS
526 errno = 0;
527 ret = _portable_fseek(f->f_fp, offset, whence);
528 Py_END_ALLOW_THREADS
530 if (ret != 0) {
531 PyErr_SetFromErrno(PyExc_IOError);
532 clearerr(f->f_fp);
533 return NULL;
535 f->f_skipnextlf = 0;
536 Py_INCREF(Py_None);
537 return Py_None;
541 #ifdef HAVE_FTRUNCATE
542 static PyObject *
543 file_truncate(PyFileObject *f, PyObject *args)
545 Py_off_t newsize;
546 PyObject *newsizeobj = NULL;
547 Py_off_t initialpos;
548 int ret;
550 if (f->f_fp == NULL)
551 return err_closed();
552 if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
553 return NULL;
555 /* Get current file position. If the file happens to be open for
556 * update and the last operation was an input operation, C doesn't
557 * define what the later fflush() will do, but we promise truncate()
558 * won't change the current position (and fflush() *does* change it
559 * then at least on Windows). The easiest thing is to capture
560 * current pos now and seek back to it at the end.
562 Py_BEGIN_ALLOW_THREADS
563 errno = 0;
564 initialpos = _portable_ftell(f->f_fp);
565 Py_END_ALLOW_THREADS
566 if (initialpos == -1)
567 goto onioerror;
569 /* Set newsize to current postion if newsizeobj NULL, else to the
570 * specified value.
572 if (newsizeobj != NULL) {
573 #if !defined(HAVE_LARGEFILE_SUPPORT)
574 newsize = PyInt_AsLong(newsizeobj);
575 #else
576 newsize = PyLong_Check(newsizeobj) ?
577 PyLong_AsLongLong(newsizeobj) :
578 PyInt_AsLong(newsizeobj);
579 #endif
580 if (PyErr_Occurred())
581 return NULL;
583 else /* default to current position */
584 newsize = initialpos;
586 /* Flush the stream. We're mixing stream-level I/O with lower-level
587 * I/O, and a flush may be necessary to synch both platform views
588 * of the current file state.
590 Py_BEGIN_ALLOW_THREADS
591 errno = 0;
592 ret = fflush(f->f_fp);
593 Py_END_ALLOW_THREADS
594 if (ret != 0)
595 goto onioerror;
597 #ifdef MS_WINDOWS
598 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
599 so don't even try using it. */
601 HANDLE hFile;
603 /* Have to move current pos to desired endpoint on Windows. */
604 Py_BEGIN_ALLOW_THREADS
605 errno = 0;
606 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
607 Py_END_ALLOW_THREADS
608 if (ret)
609 goto onioerror;
611 /* Truncate. Note that this may grow the file! */
612 Py_BEGIN_ALLOW_THREADS
613 errno = 0;
614 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
615 ret = hFile == (HANDLE)-1;
616 if (ret == 0) {
617 ret = SetEndOfFile(hFile) == 0;
618 if (ret)
619 errno = EACCES;
621 Py_END_ALLOW_THREADS
622 if (ret)
623 goto onioerror;
625 #else
626 Py_BEGIN_ALLOW_THREADS
627 errno = 0;
628 ret = ftruncate(fileno(f->f_fp), newsize);
629 Py_END_ALLOW_THREADS
630 if (ret != 0)
631 goto onioerror;
632 #endif /* !MS_WINDOWS */
634 /* Restore original file position. */
635 Py_BEGIN_ALLOW_THREADS
636 errno = 0;
637 ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
638 Py_END_ALLOW_THREADS
639 if (ret)
640 goto onioerror;
642 Py_INCREF(Py_None);
643 return Py_None;
645 onioerror:
646 PyErr_SetFromErrno(PyExc_IOError);
647 clearerr(f->f_fp);
648 return NULL;
650 #endif /* HAVE_FTRUNCATE */
652 static PyObject *
653 file_tell(PyFileObject *f)
655 Py_off_t pos;
657 if (f->f_fp == NULL)
658 return err_closed();
659 Py_BEGIN_ALLOW_THREADS
660 errno = 0;
661 pos = _portable_ftell(f->f_fp);
662 Py_END_ALLOW_THREADS
663 if (pos == -1) {
664 PyErr_SetFromErrno(PyExc_IOError);
665 clearerr(f->f_fp);
666 return NULL;
668 if (f->f_skipnextlf) {
669 int c;
670 c = GETC(f->f_fp);
671 if (c == '\n') {
672 pos++;
673 f->f_skipnextlf = 0;
674 } else if (c != EOF) ungetc(c, f->f_fp);
676 #if !defined(HAVE_LARGEFILE_SUPPORT)
677 return PyInt_FromLong(pos);
678 #else
679 return PyLong_FromLongLong(pos);
680 #endif
683 static PyObject *
684 file_fileno(PyFileObject *f)
686 if (f->f_fp == NULL)
687 return err_closed();
688 return PyInt_FromLong((long) fileno(f->f_fp));
691 static PyObject *
692 file_flush(PyFileObject *f)
694 int res;
696 if (f->f_fp == NULL)
697 return err_closed();
698 Py_BEGIN_ALLOW_THREADS
699 errno = 0;
700 res = fflush(f->f_fp);
701 Py_END_ALLOW_THREADS
702 if (res != 0) {
703 PyErr_SetFromErrno(PyExc_IOError);
704 clearerr(f->f_fp);
705 return NULL;
707 Py_INCREF(Py_None);
708 return Py_None;
711 static PyObject *
712 file_isatty(PyFileObject *f)
714 long res;
715 if (f->f_fp == NULL)
716 return err_closed();
717 Py_BEGIN_ALLOW_THREADS
718 res = isatty((int)fileno(f->f_fp));
719 Py_END_ALLOW_THREADS
720 return PyBool_FromLong(res);
724 #if BUFSIZ < 8192
725 #define SMALLCHUNK 8192
726 #else
727 #define SMALLCHUNK BUFSIZ
728 #endif
730 #if SIZEOF_INT < 4
731 #define BIGCHUNK (512 * 32)
732 #else
733 #define BIGCHUNK (512 * 1024)
734 #endif
736 static size_t
737 new_buffersize(PyFileObject *f, size_t currentsize)
739 #ifdef HAVE_FSTAT
740 off_t pos, end;
741 struct stat st;
742 if (fstat(fileno(f->f_fp), &st) == 0) {
743 end = st.st_size;
744 /* The following is not a bug: we really need to call lseek()
745 *and* ftell(). The reason is that some stdio libraries
746 mistakenly flush their buffer when ftell() is called and
747 the lseek() call it makes fails, thereby throwing away
748 data that cannot be recovered in any way. To avoid this,
749 we first test lseek(), and only call ftell() if lseek()
750 works. We can't use the lseek() value either, because we
751 need to take the amount of buffered data into account.
752 (Yet another reason why stdio stinks. :-) */
753 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
754 if (pos >= 0) {
755 pos = ftell(f->f_fp);
757 if (pos < 0)
758 clearerr(f->f_fp);
759 if (end > pos && pos >= 0)
760 return currentsize + end - pos + 1;
761 /* Add 1 so if the file were to grow we'd notice. */
763 #endif
764 if (currentsize > SMALLCHUNK) {
765 /* Keep doubling until we reach BIGCHUNK;
766 then keep adding BIGCHUNK. */
767 if (currentsize <= BIGCHUNK)
768 return currentsize + currentsize;
769 else
770 return currentsize + BIGCHUNK;
772 return currentsize + SMALLCHUNK;
775 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
776 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
777 #else
778 #ifdef EWOULDBLOCK
779 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
780 #else
781 #ifdef EAGAIN
782 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
783 #else
784 #define BLOCKED_ERRNO(x) 0
785 #endif
786 #endif
787 #endif
789 static PyObject *
790 file_read(PyFileObject *f, PyObject *args)
792 long bytesrequested = -1;
793 size_t bytesread, buffersize, chunksize;
794 PyObject *v;
796 if (f->f_fp == NULL)
797 return err_closed();
798 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
799 return NULL;
800 if (bytesrequested < 0)
801 buffersize = new_buffersize(f, (size_t)0);
802 else
803 buffersize = bytesrequested;
804 if (buffersize > INT_MAX) {
805 PyErr_SetString(PyExc_OverflowError,
806 "requested number of bytes is more than a Python string can hold");
807 return NULL;
809 v = PyString_FromStringAndSize((char *)NULL, buffersize);
810 if (v == NULL)
811 return NULL;
812 bytesread = 0;
813 for (;;) {
814 Py_BEGIN_ALLOW_THREADS
815 errno = 0;
816 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
817 buffersize - bytesread, f->f_fp, (PyObject *)f);
818 Py_END_ALLOW_THREADS
819 if (chunksize == 0) {
820 if (!ferror(f->f_fp))
821 break;
822 clearerr(f->f_fp);
823 /* When in non-blocking mode, data shouldn't
824 * be discarded if a blocking signal was
825 * received. That will also happen if
826 * chunksize != 0, but bytesread < buffersize. */
827 if (bytesread > 0 && BLOCKED_ERRNO(errno))
828 break;
829 PyErr_SetFromErrno(PyExc_IOError);
830 Py_DECREF(v);
831 return NULL;
833 bytesread += chunksize;
834 if (bytesread < buffersize) {
835 clearerr(f->f_fp);
836 break;
838 if (bytesrequested < 0) {
839 buffersize = new_buffersize(f, buffersize);
840 if (_PyString_Resize(&v, buffersize) < 0)
841 return NULL;
842 } else {
843 /* Got what was requested. */
844 break;
847 if (bytesread != buffersize)
848 _PyString_Resize(&v, bytesread);
849 return v;
852 static PyObject *
853 file_readinto(PyFileObject *f, PyObject *args)
855 char *ptr;
856 int ntodo;
857 size_t ndone, nnow;
859 if (f->f_fp == NULL)
860 return err_closed();
861 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
862 return NULL;
863 ndone = 0;
864 while (ntodo > 0) {
865 Py_BEGIN_ALLOW_THREADS
866 errno = 0;
867 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
868 (PyObject *)f);
869 Py_END_ALLOW_THREADS
870 if (nnow == 0) {
871 if (!ferror(f->f_fp))
872 break;
873 PyErr_SetFromErrno(PyExc_IOError);
874 clearerr(f->f_fp);
875 return NULL;
877 ndone += nnow;
878 ntodo -= nnow;
880 return PyInt_FromLong((long)ndone);
883 /**************************************************************************
884 Routine to get next line using platform fgets().
886 Under MSVC 6:
888 + MS threadsafe getc is very slow (multiple layers of function calls before+
889 after each character, to lock+unlock the stream).
890 + The stream-locking functions are MS-internal -- can't access them from user
891 code.
892 + There's nothing Tim could find in the MS C or platform SDK libraries that
893 can worm around this.
894 + MS fgets locks/unlocks only once per line; it's the only hook we have.
896 So we use fgets for speed(!), despite that it's painful.
898 MS realloc is also slow.
900 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
901 have):
902 Linux a wash
903 Solaris a wash
904 Tru64 Unix getline_via_fgets significantly faster
906 CAUTION: The C std isn't clear about this: in those cases where fgets
907 writes something into the buffer, can it write into any position beyond the
908 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
909 known on which it does; and it would be a strange way to code fgets. Still,
910 getline_via_fgets may not work correctly if it does. The std test
911 test_bufio.py should fail if platform fgets() routinely writes beyond the
912 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
913 **************************************************************************/
915 /* Use this routine if told to, or by default on non-get_unlocked()
916 * platforms unless told not to. Yikes! Let's spell that out:
917 * On a platform with getc_unlocked():
918 * By default, use getc_unlocked().
919 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
920 * On a platform without getc_unlocked():
921 * By default, use fgets().
922 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
924 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
925 #define USE_FGETS_IN_GETLINE
926 #endif
928 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
929 #undef USE_FGETS_IN_GETLINE
930 #endif
932 #ifdef USE_FGETS_IN_GETLINE
933 static PyObject*
934 getline_via_fgets(FILE *fp)
936 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
937 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
938 * to fill this much of the buffer with a known value in order to figure out
939 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
940 * than "most" lines, we waste time filling unused buffer slots. 100 is
941 * surely adequate for most peoples' email archives, chewing over source code,
942 * etc -- "regular old text files".
943 * MAXBUFSIZE is the maximum line length that lets us get away with the less
944 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
945 * cautions about boosting that. 300 was chosen because the worst real-life
946 * text-crunching job reported on Python-Dev was a mail-log crawler where over
947 * half the lines were 254 chars.
949 #define INITBUFSIZE 100
950 #define MAXBUFSIZE 300
951 char* p; /* temp */
952 char buf[MAXBUFSIZE];
953 PyObject* v; /* the string object result */
954 char* pvfree; /* address of next free slot */
955 char* pvend; /* address one beyond last free slot */
956 size_t nfree; /* # of free buffer slots; pvend-pvfree */
957 size_t total_v_size; /* total # of slots in buffer */
958 size_t increment; /* amount to increment the buffer */
960 /* Optimize for normal case: avoid _PyString_Resize if at all
961 * possible via first reading into stack buffer "buf".
963 total_v_size = INITBUFSIZE; /* start small and pray */
964 pvfree = buf;
965 for (;;) {
966 Py_BEGIN_ALLOW_THREADS
967 pvend = buf + total_v_size;
968 nfree = pvend - pvfree;
969 memset(pvfree, '\n', nfree);
970 p = fgets(pvfree, nfree, fp);
971 Py_END_ALLOW_THREADS
973 if (p == NULL) {
974 clearerr(fp);
975 if (PyErr_CheckSignals())
976 return NULL;
977 v = PyString_FromStringAndSize(buf, pvfree - buf);
978 return v;
980 /* fgets read *something* */
981 p = memchr(pvfree, '\n', nfree);
982 if (p != NULL) {
983 /* Did the \n come from fgets or from us?
984 * Since fgets stops at the first \n, and then writes
985 * \0, if it's from fgets a \0 must be next. But if
986 * that's so, it could not have come from us, since
987 * the \n's we filled the buffer with have only more
988 * \n's to the right.
990 if (p+1 < pvend && *(p+1) == '\0') {
991 /* It's from fgets: we win! In particular,
992 * we haven't done any mallocs yet, and can
993 * build the final result on the first try.
995 ++p; /* include \n from fgets */
997 else {
998 /* Must be from us: fgets didn't fill the
999 * buffer and didn't find a newline, so it
1000 * must be the last and newline-free line of
1001 * the file.
1003 assert(p > pvfree && *(p-1) == '\0');
1004 --p; /* don't include \0 from fgets */
1006 v = PyString_FromStringAndSize(buf, p - buf);
1007 return v;
1009 /* yuck: fgets overwrote all the newlines, i.e. the entire
1010 * buffer. So this line isn't over yet, or maybe it is but
1011 * we're exactly at EOF. If we haven't already, try using the
1012 * rest of the stack buffer.
1014 assert(*(pvend-1) == '\0');
1015 if (pvfree == buf) {
1016 pvfree = pvend - 1; /* overwrite trailing null */
1017 total_v_size = MAXBUFSIZE;
1019 else
1020 break;
1023 /* The stack buffer isn't big enough; malloc a string object and read
1024 * into its buffer.
1026 total_v_size = MAXBUFSIZE << 1;
1027 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1028 if (v == NULL)
1029 return v;
1030 /* copy over everything except the last null byte */
1031 memcpy(BUF(v), buf, MAXBUFSIZE-1);
1032 pvfree = BUF(v) + MAXBUFSIZE - 1;
1034 /* Keep reading stuff into v; if it ever ends successfully, break
1035 * after setting p one beyond the end of the line. The code here is
1036 * very much like the code above, except reads into v's buffer; see
1037 * the code above for detailed comments about the logic.
1039 for (;;) {
1040 Py_BEGIN_ALLOW_THREADS
1041 pvend = BUF(v) + total_v_size;
1042 nfree = pvend - pvfree;
1043 memset(pvfree, '\n', nfree);
1044 p = fgets(pvfree, nfree, fp);
1045 Py_END_ALLOW_THREADS
1047 if (p == NULL) {
1048 clearerr(fp);
1049 if (PyErr_CheckSignals()) {
1050 Py_DECREF(v);
1051 return NULL;
1053 p = pvfree;
1054 break;
1056 p = memchr(pvfree, '\n', nfree);
1057 if (p != NULL) {
1058 if (p+1 < pvend && *(p+1) == '\0') {
1059 /* \n came from fgets */
1060 ++p;
1061 break;
1063 /* \n came from us; last line of file, no newline */
1064 assert(p > pvfree && *(p-1) == '\0');
1065 --p;
1066 break;
1068 /* expand buffer and try again */
1069 assert(*(pvend-1) == '\0');
1070 increment = total_v_size >> 2; /* mild exponential growth */
1071 total_v_size += increment;
1072 if (total_v_size > INT_MAX) {
1073 PyErr_SetString(PyExc_OverflowError,
1074 "line is longer than a Python string can hold");
1075 Py_DECREF(v);
1076 return NULL;
1078 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1079 return NULL;
1080 /* overwrite the trailing null byte */
1081 pvfree = BUF(v) + (total_v_size - increment - 1);
1083 if (BUF(v) + total_v_size != p)
1084 _PyString_Resize(&v, p - BUF(v));
1085 return v;
1086 #undef INITBUFSIZE
1087 #undef MAXBUFSIZE
1089 #endif /* ifdef USE_FGETS_IN_GETLINE */
1091 /* Internal routine to get a line.
1092 Size argument interpretation:
1093 > 0: max length;
1094 <= 0: read arbitrary line
1097 static PyObject *
1098 get_line(PyFileObject *f, int n)
1100 FILE *fp = f->f_fp;
1101 int c;
1102 char *buf, *end;
1103 size_t total_v_size; /* total # of slots in buffer */
1104 size_t used_v_size; /* # used slots in buffer */
1105 size_t increment; /* amount to increment the buffer */
1106 PyObject *v;
1107 int newlinetypes = f->f_newlinetypes;
1108 int skipnextlf = f->f_skipnextlf;
1109 int univ_newline = f->f_univ_newline;
1111 #if defined(USE_FGETS_IN_GETLINE)
1112 if (n <= 0 && !univ_newline )
1113 return getline_via_fgets(fp);
1114 #endif
1115 total_v_size = n > 0 ? n : 100;
1116 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1117 if (v == NULL)
1118 return NULL;
1119 buf = BUF(v);
1120 end = buf + total_v_size;
1122 for (;;) {
1123 Py_BEGIN_ALLOW_THREADS
1124 FLOCKFILE(fp);
1125 if (univ_newline) {
1126 c = 'x'; /* Shut up gcc warning */
1127 while ( buf != end && (c = GETC(fp)) != EOF ) {
1128 if (skipnextlf ) {
1129 skipnextlf = 0;
1130 if (c == '\n') {
1131 /* Seeing a \n here with
1132 * skipnextlf true means we
1133 * saw a \r before.
1135 newlinetypes |= NEWLINE_CRLF;
1136 c = GETC(fp);
1137 if (c == EOF) break;
1138 } else {
1139 newlinetypes |= NEWLINE_CR;
1142 if (c == '\r') {
1143 skipnextlf = 1;
1144 c = '\n';
1145 } else if ( c == '\n')
1146 newlinetypes |= NEWLINE_LF;
1147 *buf++ = c;
1148 if (c == '\n') break;
1150 if ( c == EOF && skipnextlf )
1151 newlinetypes |= NEWLINE_CR;
1152 } else /* If not universal newlines use the normal loop */
1153 while ((c = GETC(fp)) != EOF &&
1154 (*buf++ = c) != '\n' &&
1155 buf != end)
1157 FUNLOCKFILE(fp);
1158 Py_END_ALLOW_THREADS
1159 f->f_newlinetypes = newlinetypes;
1160 f->f_skipnextlf = skipnextlf;
1161 if (c == '\n')
1162 break;
1163 if (c == EOF) {
1164 if (ferror(fp)) {
1165 PyErr_SetFromErrno(PyExc_IOError);
1166 clearerr(fp);
1167 Py_DECREF(v);
1168 return NULL;
1170 clearerr(fp);
1171 if (PyErr_CheckSignals()) {
1172 Py_DECREF(v);
1173 return NULL;
1175 break;
1177 /* Must be because buf == end */
1178 if (n > 0)
1179 break;
1180 used_v_size = total_v_size;
1181 increment = total_v_size >> 2; /* mild exponential growth */
1182 total_v_size += increment;
1183 if (total_v_size > INT_MAX) {
1184 PyErr_SetString(PyExc_OverflowError,
1185 "line is longer than a Python string can hold");
1186 Py_DECREF(v);
1187 return NULL;
1189 if (_PyString_Resize(&v, total_v_size) < 0)
1190 return NULL;
1191 buf = BUF(v) + used_v_size;
1192 end = BUF(v) + total_v_size;
1195 used_v_size = buf - BUF(v);
1196 if (used_v_size != total_v_size)
1197 _PyString_Resize(&v, used_v_size);
1198 return v;
1201 /* External C interface */
1203 PyObject *
1204 PyFile_GetLine(PyObject *f, int n)
1206 PyObject *result;
1208 if (f == NULL) {
1209 PyErr_BadInternalCall();
1210 return NULL;
1213 if (PyFile_Check(f)) {
1214 if (((PyFileObject*)f)->f_fp == NULL)
1215 return err_closed();
1216 result = get_line((PyFileObject *)f, n);
1218 else {
1219 PyObject *reader;
1220 PyObject *args;
1222 reader = PyObject_GetAttrString(f, "readline");
1223 if (reader == NULL)
1224 return NULL;
1225 if (n <= 0)
1226 args = PyTuple_New(0);
1227 else
1228 args = Py_BuildValue("(i)", n);
1229 if (args == NULL) {
1230 Py_DECREF(reader);
1231 return NULL;
1233 result = PyEval_CallObject(reader, args);
1234 Py_DECREF(reader);
1235 Py_DECREF(args);
1236 if (result != NULL && !PyString_Check(result) &&
1237 !PyUnicode_Check(result)) {
1238 Py_DECREF(result);
1239 result = NULL;
1240 PyErr_SetString(PyExc_TypeError,
1241 "object.readline() returned non-string");
1245 if (n < 0 && result != NULL && PyString_Check(result)) {
1246 char *s = PyString_AS_STRING(result);
1247 int len = PyString_GET_SIZE(result);
1248 if (len == 0) {
1249 Py_DECREF(result);
1250 result = NULL;
1251 PyErr_SetString(PyExc_EOFError,
1252 "EOF when reading a line");
1254 else if (s[len-1] == '\n') {
1255 if (result->ob_refcnt == 1)
1256 _PyString_Resize(&result, len-1);
1257 else {
1258 PyObject *v;
1259 v = PyString_FromStringAndSize(s, len-1);
1260 Py_DECREF(result);
1261 result = v;
1265 #ifdef Py_USING_UNICODE
1266 if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1267 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1268 int len = PyUnicode_GET_SIZE(result);
1269 if (len == 0) {
1270 Py_DECREF(result);
1271 result = NULL;
1272 PyErr_SetString(PyExc_EOFError,
1273 "EOF when reading a line");
1275 else if (s[len-1] == '\n') {
1276 if (result->ob_refcnt == 1)
1277 PyUnicode_Resize(&result, len-1);
1278 else {
1279 PyObject *v;
1280 v = PyUnicode_FromUnicode(s, len-1);
1281 Py_DECREF(result);
1282 result = v;
1286 #endif
1287 return result;
1290 /* Python method */
1292 static PyObject *
1293 file_readline(PyFileObject *f, PyObject *args)
1295 int n = -1;
1297 if (f->f_fp == NULL)
1298 return err_closed();
1299 if (!PyArg_ParseTuple(args, "|i:readline", &n))
1300 return NULL;
1301 if (n == 0)
1302 return PyString_FromString("");
1303 if (n < 0)
1304 n = 0;
1305 return get_line(f, n);
1308 static PyObject *
1309 file_readlines(PyFileObject *f, PyObject *args)
1311 long sizehint = 0;
1312 PyObject *list;
1313 PyObject *line;
1314 char small_buffer[SMALLCHUNK];
1315 char *buffer = small_buffer;
1316 size_t buffersize = SMALLCHUNK;
1317 PyObject *big_buffer = NULL;
1318 size_t nfilled = 0;
1319 size_t nread;
1320 size_t totalread = 0;
1321 char *p, *q, *end;
1322 int err;
1323 int shortread = 0;
1325 if (f->f_fp == NULL)
1326 return err_closed();
1327 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1328 return NULL;
1329 if ((list = PyList_New(0)) == NULL)
1330 return NULL;
1331 for (;;) {
1332 if (shortread)
1333 nread = 0;
1334 else {
1335 Py_BEGIN_ALLOW_THREADS
1336 errno = 0;
1337 nread = Py_UniversalNewlineFread(buffer+nfilled,
1338 buffersize-nfilled, f->f_fp, (PyObject *)f);
1339 Py_END_ALLOW_THREADS
1340 shortread = (nread < buffersize-nfilled);
1342 if (nread == 0) {
1343 sizehint = 0;
1344 if (!ferror(f->f_fp))
1345 break;
1346 PyErr_SetFromErrno(PyExc_IOError);
1347 clearerr(f->f_fp);
1348 error:
1349 Py_DECREF(list);
1350 list = NULL;
1351 goto cleanup;
1353 totalread += nread;
1354 p = memchr(buffer+nfilled, '\n', nread);
1355 if (p == NULL) {
1356 /* Need a larger buffer to fit this line */
1357 nfilled += nread;
1358 buffersize *= 2;
1359 if (buffersize > INT_MAX) {
1360 PyErr_SetString(PyExc_OverflowError,
1361 "line is longer than a Python string can hold");
1362 goto error;
1364 if (big_buffer == NULL) {
1365 /* Create the big buffer */
1366 big_buffer = PyString_FromStringAndSize(
1367 NULL, buffersize);
1368 if (big_buffer == NULL)
1369 goto error;
1370 buffer = PyString_AS_STRING(big_buffer);
1371 memcpy(buffer, small_buffer, nfilled);
1373 else {
1374 /* Grow the big buffer */
1375 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1376 goto error;
1377 buffer = PyString_AS_STRING(big_buffer);
1379 continue;
1381 end = buffer+nfilled+nread;
1382 q = buffer;
1383 do {
1384 /* Process complete lines */
1385 p++;
1386 line = PyString_FromStringAndSize(q, p-q);
1387 if (line == NULL)
1388 goto error;
1389 err = PyList_Append(list, line);
1390 Py_DECREF(line);
1391 if (err != 0)
1392 goto error;
1393 q = p;
1394 p = memchr(q, '\n', end-q);
1395 } while (p != NULL);
1396 /* Move the remaining incomplete line to the start */
1397 nfilled = end-q;
1398 memmove(buffer, q, nfilled);
1399 if (sizehint > 0)
1400 if (totalread >= (size_t)sizehint)
1401 break;
1403 if (nfilled != 0) {
1404 /* Partial last line */
1405 line = PyString_FromStringAndSize(buffer, nfilled);
1406 if (line == NULL)
1407 goto error;
1408 if (sizehint > 0) {
1409 /* Need to complete the last line */
1410 PyObject *rest = get_line(f, 0);
1411 if (rest == NULL) {
1412 Py_DECREF(line);
1413 goto error;
1415 PyString_Concat(&line, rest);
1416 Py_DECREF(rest);
1417 if (line == NULL)
1418 goto error;
1420 err = PyList_Append(list, line);
1421 Py_DECREF(line);
1422 if (err != 0)
1423 goto error;
1425 cleanup:
1426 Py_XDECREF(big_buffer);
1427 return list;
1430 static PyObject *
1431 file_write(PyFileObject *f, PyObject *args)
1433 char *s;
1434 int n, n2;
1435 if (f->f_fp == NULL)
1436 return err_closed();
1437 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1438 return NULL;
1439 f->f_softspace = 0;
1440 Py_BEGIN_ALLOW_THREADS
1441 errno = 0;
1442 n2 = fwrite(s, 1, n, f->f_fp);
1443 Py_END_ALLOW_THREADS
1444 if (n2 != n) {
1445 PyErr_SetFromErrno(PyExc_IOError);
1446 clearerr(f->f_fp);
1447 return NULL;
1449 Py_INCREF(Py_None);
1450 return Py_None;
1453 static PyObject *
1454 file_writelines(PyFileObject *f, PyObject *seq)
1456 #define CHUNKSIZE 1000
1457 PyObject *list, *line;
1458 PyObject *it; /* iter(seq) */
1459 PyObject *result;
1460 int i, j, index, len, nwritten, islist;
1462 assert(seq != NULL);
1463 if (f->f_fp == NULL)
1464 return err_closed();
1466 result = NULL;
1467 list = NULL;
1468 islist = PyList_Check(seq);
1469 if (islist)
1470 it = NULL;
1471 else {
1472 it = PyObject_GetIter(seq);
1473 if (it == NULL) {
1474 PyErr_SetString(PyExc_TypeError,
1475 "writelines() requires an iterable argument");
1476 return NULL;
1478 /* From here on, fail by going to error, to reclaim "it". */
1479 list = PyList_New(CHUNKSIZE);
1480 if (list == NULL)
1481 goto error;
1484 /* Strategy: slurp CHUNKSIZE lines into a private list,
1485 checking that they are all strings, then write that list
1486 without holding the interpreter lock, then come back for more. */
1487 for (index = 0; ; index += CHUNKSIZE) {
1488 if (islist) {
1489 Py_XDECREF(list);
1490 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1491 if (list == NULL)
1492 goto error;
1493 j = PyList_GET_SIZE(list);
1495 else {
1496 for (j = 0; j < CHUNKSIZE; j++) {
1497 line = PyIter_Next(it);
1498 if (line == NULL) {
1499 if (PyErr_Occurred())
1500 goto error;
1501 break;
1503 PyList_SetItem(list, j, line);
1506 if (j == 0)
1507 break;
1509 /* Check that all entries are indeed strings. If not,
1510 apply the same rules as for file.write() and
1511 convert the results to strings. This is slow, but
1512 seems to be the only way since all conversion APIs
1513 could potentially execute Python code. */
1514 for (i = 0; i < j; i++) {
1515 PyObject *v = PyList_GET_ITEM(list, i);
1516 if (!PyString_Check(v)) {
1517 const char *buffer;
1518 int len;
1519 if (((f->f_binary &&
1520 PyObject_AsReadBuffer(v,
1521 (const void**)&buffer,
1522 &len)) ||
1523 PyObject_AsCharBuffer(v,
1524 &buffer,
1525 &len))) {
1526 PyErr_SetString(PyExc_TypeError,
1527 "writelines() argument must be a sequence of strings");
1528 goto error;
1530 line = PyString_FromStringAndSize(buffer,
1531 len);
1532 if (line == NULL)
1533 goto error;
1534 Py_DECREF(v);
1535 PyList_SET_ITEM(list, i, line);
1539 /* Since we are releasing the global lock, the
1540 following code may *not* execute Python code. */
1541 Py_BEGIN_ALLOW_THREADS
1542 f->f_softspace = 0;
1543 errno = 0;
1544 for (i = 0; i < j; i++) {
1545 line = PyList_GET_ITEM(list, i);
1546 len = PyString_GET_SIZE(line);
1547 nwritten = fwrite(PyString_AS_STRING(line),
1548 1, len, f->f_fp);
1549 if (nwritten != len) {
1550 Py_BLOCK_THREADS
1551 PyErr_SetFromErrno(PyExc_IOError);
1552 clearerr(f->f_fp);
1553 goto error;
1556 Py_END_ALLOW_THREADS
1558 if (j < CHUNKSIZE)
1559 break;
1562 Py_INCREF(Py_None);
1563 result = Py_None;
1564 error:
1565 Py_XDECREF(list);
1566 Py_XDECREF(it);
1567 return result;
1568 #undef CHUNKSIZE
1571 static PyObject *
1572 file_getiter(PyFileObject *f)
1574 if (f->f_fp == NULL)
1575 return err_closed();
1576 Py_INCREF(f);
1577 return (PyObject *)f;
1580 PyDoc_STRVAR(readline_doc,
1581 "readline([size]) -> next line from the file, as a string.\n"
1582 "\n"
1583 "Retain newline. A non-negative size argument limits the maximum\n"
1584 "number of bytes to return (an incomplete line may be returned then).\n"
1585 "Return an empty string at EOF.");
1587 PyDoc_STRVAR(read_doc,
1588 "read([size]) -> read at most size bytes, returned as a string.\n"
1589 "\n"
1590 "If the size argument is negative or omitted, read until EOF is reached.\n"
1591 "Notice that when in non-blocking mode, less data than what was requested\n"
1592 "may be returned, even if no size parameter was given.");
1594 PyDoc_STRVAR(write_doc,
1595 "write(str) -> None. Write string str to file.\n"
1596 "\n"
1597 "Note that due to buffering, flush() or close() may be needed before\n"
1598 "the file on disk reflects the data written.");
1600 PyDoc_STRVAR(fileno_doc,
1601 "fileno() -> integer \"file descriptor\".\n"
1602 "\n"
1603 "This is needed for lower-level file interfaces, such os.read().");
1605 PyDoc_STRVAR(seek_doc,
1606 "seek(offset[, whence]) -> None. Move to new file position.\n"
1607 "\n"
1608 "Argument offset is a byte count. Optional argument whence defaults to\n"
1609 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1610 "(move relative to current position, positive or negative), and 2 (move\n"
1611 "relative to end of file, usually negative, although many platforms allow\n"
1612 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1613 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1614 "undefined behavior."
1615 "\n"
1616 "Note that not all file objects are seekable.");
1618 #ifdef HAVE_FTRUNCATE
1619 PyDoc_STRVAR(truncate_doc,
1620 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1621 "\n"
1622 "Size defaults to the current file position, as returned by tell().");
1623 #endif
1625 PyDoc_STRVAR(tell_doc,
1626 "tell() -> current file position, an integer (may be a long integer).");
1628 PyDoc_STRVAR(readinto_doc,
1629 "readinto() -> Undocumented. Don't use this; it may go away.");
1631 PyDoc_STRVAR(readlines_doc,
1632 "readlines([size]) -> list of strings, each a line from the file.\n"
1633 "\n"
1634 "Call readline() repeatedly and return a list of the lines so read.\n"
1635 "The optional size argument, if given, is an approximate bound on the\n"
1636 "total number of bytes in the lines returned.");
1638 PyDoc_STRVAR(xreadlines_doc,
1639 "xreadlines() -> returns self.\n"
1640 "\n"
1641 "For backward compatibility. File objects now include the performance\n"
1642 "optimizations previously implemented in the xreadlines module.");
1644 PyDoc_STRVAR(writelines_doc,
1645 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1646 "\n"
1647 "Note that newlines are not added. The sequence can be any iterable object\n"
1648 "producing strings. This is equivalent to calling write() for each string.");
1650 PyDoc_STRVAR(flush_doc,
1651 "flush() -> None. Flush the internal I/O buffer.");
1653 PyDoc_STRVAR(close_doc,
1654 "close() -> None or (perhaps) an integer. Close the file.\n"
1655 "\n"
1656 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1657 "further I/O operations. close() may be called more than once without\n"
1658 "error. Some kinds of file objects (for example, opened by popen())\n"
1659 "may return an exit status upon closing.");
1661 PyDoc_STRVAR(isatty_doc,
1662 "isatty() -> true or false. True if the file is connected to a tty device.");
1664 static PyMethodDef file_methods[] = {
1665 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1666 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1667 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1668 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1669 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1670 #ifdef HAVE_FTRUNCATE
1671 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1672 #endif
1673 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1674 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1675 {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1676 {"xreadlines",(PyCFunction)file_getiter, METH_NOARGS, xreadlines_doc},
1677 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1678 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1679 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1680 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1681 {NULL, NULL} /* sentinel */
1684 #define OFF(x) offsetof(PyFileObject, x)
1686 static PyMemberDef file_memberlist[] = {
1687 {"softspace", T_INT, OFF(f_softspace), 0,
1688 "flag indicating that a space needs to be printed; used by print"},
1689 {"mode", T_OBJECT, OFF(f_mode), RO,
1690 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1691 {"name", T_OBJECT, OFF(f_name), RO,
1692 "file name"},
1693 {"encoding", T_OBJECT, OFF(f_encoding), RO,
1694 "file encoding"},
1695 /* getattr(f, "closed") is implemented without this table */
1696 {NULL} /* Sentinel */
1699 static PyObject *
1700 get_closed(PyFileObject *f, void *closure)
1702 return PyBool_FromLong((long)(f->f_fp == 0));
1704 static PyObject *
1705 get_newlines(PyFileObject *f, void *closure)
1707 switch (f->f_newlinetypes) {
1708 case NEWLINE_UNKNOWN:
1709 Py_INCREF(Py_None);
1710 return Py_None;
1711 case NEWLINE_CR:
1712 return PyString_FromString("\r");
1713 case NEWLINE_LF:
1714 return PyString_FromString("\n");
1715 case NEWLINE_CR|NEWLINE_LF:
1716 return Py_BuildValue("(ss)", "\r", "\n");
1717 case NEWLINE_CRLF:
1718 return PyString_FromString("\r\n");
1719 case NEWLINE_CR|NEWLINE_CRLF:
1720 return Py_BuildValue("(ss)", "\r", "\r\n");
1721 case NEWLINE_LF|NEWLINE_CRLF:
1722 return Py_BuildValue("(ss)", "\n", "\r\n");
1723 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1724 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1725 default:
1726 PyErr_Format(PyExc_SystemError,
1727 "Unknown newlines value 0x%x\n",
1728 f->f_newlinetypes);
1729 return NULL;
1733 static PyGetSetDef file_getsetlist[] = {
1734 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1735 {"newlines", (getter)get_newlines, NULL,
1736 "end-of-line convention used in this file"},
1737 {0},
1740 static void
1741 drop_readahead(PyFileObject *f)
1743 if (f->f_buf != NULL) {
1744 PyMem_Free(f->f_buf);
1745 f->f_buf = NULL;
1749 /* Make sure that file has a readahead buffer with at least one byte
1750 (unless at EOF) and no more than bufsize. Returns negative value on
1751 error */
1752 static int
1753 readahead(PyFileObject *f, int bufsize)
1755 int chunksize;
1757 if (f->f_buf != NULL) {
1758 if( (f->f_bufend - f->f_bufptr) >= 1)
1759 return 0;
1760 else
1761 drop_readahead(f);
1763 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1764 return -1;
1766 Py_BEGIN_ALLOW_THREADS
1767 errno = 0;
1768 chunksize = Py_UniversalNewlineFread(
1769 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1770 Py_END_ALLOW_THREADS
1771 if (chunksize == 0) {
1772 if (ferror(f->f_fp)) {
1773 PyErr_SetFromErrno(PyExc_IOError);
1774 clearerr(f->f_fp);
1775 drop_readahead(f);
1776 return -1;
1779 f->f_bufptr = f->f_buf;
1780 f->f_bufend = f->f_buf + chunksize;
1781 return 0;
1784 /* Used by file_iternext. The returned string will start with 'skip'
1785 uninitialized bytes followed by the remainder of the line. Don't be
1786 horrified by the recursive call: maximum recursion depth is limited by
1787 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1789 static PyStringObject *
1790 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1792 PyStringObject* s;
1793 char *bufptr;
1794 char *buf;
1795 int len;
1797 if (f->f_buf == NULL)
1798 if (readahead(f, bufsize) < 0)
1799 return NULL;
1801 len = f->f_bufend - f->f_bufptr;
1802 if (len == 0)
1803 return (PyStringObject *)
1804 PyString_FromStringAndSize(NULL, skip);
1805 bufptr = memchr(f->f_bufptr, '\n', len);
1806 if (bufptr != NULL) {
1807 bufptr++; /* Count the '\n' */
1808 len = bufptr - f->f_bufptr;
1809 s = (PyStringObject *)
1810 PyString_FromStringAndSize(NULL, skip+len);
1811 if (s == NULL)
1812 return NULL;
1813 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1814 f->f_bufptr = bufptr;
1815 if (bufptr == f->f_bufend)
1816 drop_readahead(f);
1817 } else {
1818 bufptr = f->f_bufptr;
1819 buf = f->f_buf;
1820 f->f_buf = NULL; /* Force new readahead buffer */
1821 s = readahead_get_line_skip(
1822 f, skip+len, bufsize + (bufsize>>2) );
1823 if (s == NULL) {
1824 PyMem_Free(buf);
1825 return NULL;
1827 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1828 PyMem_Free(buf);
1830 return s;
1833 /* A larger buffer size may actually decrease performance. */
1834 #define READAHEAD_BUFSIZE 8192
1836 static PyObject *
1837 file_iternext(PyFileObject *f)
1839 PyStringObject* l;
1841 if (f->f_fp == NULL)
1842 return err_closed();
1844 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1845 if (l == NULL || PyString_GET_SIZE(l) == 0) {
1846 Py_XDECREF(l);
1847 return NULL;
1849 return (PyObject *)l;
1853 static PyObject *
1854 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1856 PyObject *self;
1857 static PyObject *not_yet_string;
1859 assert(type != NULL && type->tp_alloc != NULL);
1861 if (not_yet_string == NULL) {
1862 not_yet_string = PyString_FromString("<uninitialized file>");
1863 if (not_yet_string == NULL)
1864 return NULL;
1867 self = type->tp_alloc(type, 0);
1868 if (self != NULL) {
1869 /* Always fill in the name and mode, so that nobody else
1870 needs to special-case NULLs there. */
1871 Py_INCREF(not_yet_string);
1872 ((PyFileObject *)self)->f_name = not_yet_string;
1873 Py_INCREF(not_yet_string);
1874 ((PyFileObject *)self)->f_mode = not_yet_string;
1875 Py_INCREF(Py_None);
1876 ((PyFileObject *)self)->f_encoding = Py_None;
1877 ((PyFileObject *)self)->weakreflist = NULL;
1879 return self;
1882 static int
1883 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1885 PyFileObject *foself = (PyFileObject *)self;
1886 int ret = 0;
1887 static const char *kwlist[] = {"name", "mode", "buffering", 0};
1888 char *name = NULL;
1889 char *mode = "r";
1890 int bufsize = -1;
1891 int wideargument = 0;
1893 assert(PyFile_Check(self));
1894 if (foself->f_fp != NULL) {
1895 /* Have to close the existing file first. */
1896 PyObject *closeresult = file_close(foself);
1897 if (closeresult == NULL)
1898 return -1;
1899 Py_DECREF(closeresult);
1902 #ifdef Py_WIN_WIDE_FILENAMES
1903 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
1904 PyObject *po;
1905 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
1906 kwlist, &po, &mode, &bufsize)) {
1907 wideargument = 1;
1908 if (fill_file_fields(foself, NULL, po, mode,
1909 fclose) == NULL)
1910 goto Error;
1911 } else {
1912 /* Drop the argument parsing error as narrow
1913 strings are also valid. */
1914 PyErr_Clear();
1917 #endif
1919 if (!wideargument) {
1920 PyObject *o_name;
1922 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1923 Py_FileSystemDefaultEncoding,
1924 &name,
1925 &mode, &bufsize))
1926 return -1;
1928 /* We parse again to get the name as a PyObject */
1929 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
1930 kwlist, &o_name, &mode,
1931 &bufsize))
1932 return -1;
1934 if (fill_file_fields(foself, NULL, o_name, mode,
1935 fclose) == NULL)
1936 goto Error;
1938 if (open_the_file(foself, name, mode) == NULL)
1939 goto Error;
1940 foself->f_setbuf = NULL;
1941 PyFile_SetBufSize(self, bufsize);
1942 goto Done;
1944 Error:
1945 ret = -1;
1946 /* fall through */
1947 Done:
1948 PyMem_Free(name); /* free the encoded string */
1949 return ret;
1952 PyDoc_VAR(file_doc) =
1953 PyDoc_STR(
1954 "file(name[, mode[, buffering]]) -> file object\n"
1955 "\n"
1956 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
1957 "writing or appending. The file will be created if it doesn't exist\n"
1958 "when opened for writing or appending; it will be truncated when\n"
1959 "opened for writing. Add a 'b' to the mode for binary files.\n"
1960 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
1961 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
1962 "buffered, and larger numbers specify the buffer size.\n"
1964 PyDoc_STR(
1965 "Add a 'U' to mode to open the file for input with universal newline\n"
1966 "support. Any line ending in the input file will be seen as a '\\n'\n"
1967 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
1968 "the value for this attribute is one of None (no newline read yet),\n"
1969 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1970 "\n"
1971 "'U' cannot be combined with 'w' or '+' mode.\n"
1973 PyDoc_STR(
1974 "\n"
1975 "Note: open() is an alias for file()."
1978 PyTypeObject PyFile_Type = {
1979 PyObject_HEAD_INIT(&PyType_Type)
1981 "file",
1982 sizeof(PyFileObject),
1984 (destructor)file_dealloc, /* tp_dealloc */
1985 0, /* tp_print */
1986 0, /* tp_getattr */
1987 0, /* tp_setattr */
1988 0, /* tp_compare */
1989 (reprfunc)file_repr, /* tp_repr */
1990 0, /* tp_as_number */
1991 0, /* tp_as_sequence */
1992 0, /* tp_as_mapping */
1993 0, /* tp_hash */
1994 0, /* tp_call */
1995 0, /* tp_str */
1996 PyObject_GenericGetAttr, /* tp_getattro */
1997 /* softspace is writable: we must supply tp_setattro */
1998 PyObject_GenericSetAttr, /* tp_setattro */
1999 0, /* tp_as_buffer */
2000 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2001 file_doc, /* tp_doc */
2002 0, /* tp_traverse */
2003 0, /* tp_clear */
2004 0, /* tp_richcompare */
2005 offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
2006 (getiterfunc)file_getiter, /* tp_iter */
2007 (iternextfunc)file_iternext, /* tp_iternext */
2008 file_methods, /* tp_methods */
2009 file_memberlist, /* tp_members */
2010 file_getsetlist, /* tp_getset */
2011 0, /* tp_base */
2012 0, /* tp_dict */
2013 0, /* tp_descr_get */
2014 0, /* tp_descr_set */
2015 0, /* tp_dictoffset */
2016 (initproc)file_init, /* tp_init */
2017 PyType_GenericAlloc, /* tp_alloc */
2018 file_new, /* tp_new */
2019 PyObject_Del, /* tp_free */
2022 /* Interface for the 'soft space' between print items. */
2025 PyFile_SoftSpace(PyObject *f, int newflag)
2027 int oldflag = 0;
2028 if (f == NULL) {
2029 /* Do nothing */
2031 else if (PyFile_Check(f)) {
2032 oldflag = ((PyFileObject *)f)->f_softspace;
2033 ((PyFileObject *)f)->f_softspace = newflag;
2035 else {
2036 PyObject *v;
2037 v = PyObject_GetAttrString(f, "softspace");
2038 if (v == NULL)
2039 PyErr_Clear();
2040 else {
2041 if (PyInt_Check(v))
2042 oldflag = PyInt_AsLong(v);
2043 Py_DECREF(v);
2045 v = PyInt_FromLong((long)newflag);
2046 if (v == NULL)
2047 PyErr_Clear();
2048 else {
2049 if (PyObject_SetAttrString(f, "softspace", v) != 0)
2050 PyErr_Clear();
2051 Py_DECREF(v);
2054 return oldflag;
2057 /* Interfaces to write objects/strings to file-like objects */
2060 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2062 PyObject *writer, *value, *args, *result;
2063 if (f == NULL) {
2064 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2065 return -1;
2067 else if (PyFile_Check(f)) {
2068 FILE *fp = PyFile_AsFile(f);
2069 #ifdef Py_USING_UNICODE
2070 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2071 int result;
2072 #endif
2073 if (fp == NULL) {
2074 err_closed();
2075 return -1;
2077 #ifdef Py_USING_UNICODE
2078 if ((flags & Py_PRINT_RAW) &&
2079 PyUnicode_Check(v) && enc != Py_None) {
2080 char *cenc = PyString_AS_STRING(enc);
2081 value = PyUnicode_AsEncodedString(v, cenc, "strict");
2082 if (value == NULL)
2083 return -1;
2084 } else {
2085 value = v;
2086 Py_INCREF(value);
2088 result = PyObject_Print(value, fp, flags);
2089 Py_DECREF(value);
2090 return result;
2091 #else
2092 return PyObject_Print(v, fp, flags);
2093 #endif
2095 writer = PyObject_GetAttrString(f, "write");
2096 if (writer == NULL)
2097 return -1;
2098 if (flags & Py_PRINT_RAW) {
2099 if (PyUnicode_Check(v)) {
2100 value = v;
2101 Py_INCREF(value);
2102 } else
2103 value = PyObject_Str(v);
2105 else
2106 value = PyObject_Repr(v);
2107 if (value == NULL) {
2108 Py_DECREF(writer);
2109 return -1;
2111 args = PyTuple_Pack(1, value);
2112 if (args == NULL) {
2113 Py_DECREF(value);
2114 Py_DECREF(writer);
2115 return -1;
2117 result = PyEval_CallObject(writer, args);
2118 Py_DECREF(args);
2119 Py_DECREF(value);
2120 Py_DECREF(writer);
2121 if (result == NULL)
2122 return -1;
2123 Py_DECREF(result);
2124 return 0;
2128 PyFile_WriteString(const char *s, PyObject *f)
2130 if (f == NULL) {
2131 /* Should be caused by a pre-existing error */
2132 if (!PyErr_Occurred())
2133 PyErr_SetString(PyExc_SystemError,
2134 "null file for PyFile_WriteString");
2135 return -1;
2137 else if (PyFile_Check(f)) {
2138 FILE *fp = PyFile_AsFile(f);
2139 if (fp == NULL) {
2140 err_closed();
2141 return -1;
2143 fputs(s, fp);
2144 return 0;
2146 else if (!PyErr_Occurred()) {
2147 PyObject *v = PyString_FromString(s);
2148 int err;
2149 if (v == NULL)
2150 return -1;
2151 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2152 Py_DECREF(v);
2153 return err;
2155 else
2156 return -1;
2159 /* Try to get a file-descriptor from a Python object. If the object
2160 is an integer or long integer, its value is returned. If not, the
2161 object's fileno() method is called if it exists; the method must return
2162 an integer or long integer, which is returned as the file descriptor value.
2163 -1 is returned on failure.
2166 int PyObject_AsFileDescriptor(PyObject *o)
2168 int fd;
2169 PyObject *meth;
2171 if (PyInt_Check(o)) {
2172 fd = PyInt_AsLong(o);
2174 else if (PyLong_Check(o)) {
2175 fd = PyLong_AsLong(o);
2177 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2179 PyObject *fno = PyEval_CallObject(meth, NULL);
2180 Py_DECREF(meth);
2181 if (fno == NULL)
2182 return -1;
2184 if (PyInt_Check(fno)) {
2185 fd = PyInt_AsLong(fno);
2186 Py_DECREF(fno);
2188 else if (PyLong_Check(fno)) {
2189 fd = PyLong_AsLong(fno);
2190 Py_DECREF(fno);
2192 else {
2193 PyErr_SetString(PyExc_TypeError,
2194 "fileno() returned a non-integer");
2195 Py_DECREF(fno);
2196 return -1;
2199 else {
2200 PyErr_SetString(PyExc_TypeError,
2201 "argument must be an int, or have a fileno() method.");
2202 return -1;
2205 if (fd < 0) {
2206 PyErr_Format(PyExc_ValueError,
2207 "file descriptor cannot be a negative integer (%i)",
2208 fd);
2209 return -1;
2211 return fd;
2214 /* From here on we need access to the real fgets and fread */
2215 #undef fgets
2216 #undef fread
2219 ** Py_UniversalNewlineFgets is an fgets variation that understands
2220 ** all of \r, \n and \r\n conventions.
2221 ** The stream should be opened in binary mode.
2222 ** If fobj is NULL the routine always does newline conversion, and
2223 ** it may peek one char ahead to gobble the second char in \r\n.
2224 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2225 ** is no readahead but in stead a flag is used to skip a following
2226 ** \n on the next read. Also, if the file is open in binary mode
2227 ** the whole conversion is skipped. Finally, the routine keeps track of
2228 ** the different types of newlines seen.
2229 ** Note that we need no error handling: fgets() treats error and eof
2230 ** identically.
2232 char *
2233 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2235 char *p = buf;
2236 int c;
2237 int newlinetypes = 0;
2238 int skipnextlf = 0;
2239 int univ_newline = 1;
2241 if (fobj) {
2242 if (!PyFile_Check(fobj)) {
2243 errno = ENXIO; /* What can you do... */
2244 return NULL;
2246 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2247 if ( !univ_newline )
2248 return fgets(buf, n, stream);
2249 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2250 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2252 FLOCKFILE(stream);
2253 c = 'x'; /* Shut up gcc warning */
2254 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2255 if (skipnextlf ) {
2256 skipnextlf = 0;
2257 if (c == '\n') {
2258 /* Seeing a \n here with skipnextlf true
2259 ** means we saw a \r before.
2261 newlinetypes |= NEWLINE_CRLF;
2262 c = GETC(stream);
2263 if (c == EOF) break;
2264 } else {
2266 ** Note that c == EOF also brings us here,
2267 ** so we're okay if the last char in the file
2268 ** is a CR.
2270 newlinetypes |= NEWLINE_CR;
2273 if (c == '\r') {
2274 /* A \r is translated into a \n, and we skip
2275 ** an adjacent \n, if any. We don't set the
2276 ** newlinetypes flag until we've seen the next char.
2278 skipnextlf = 1;
2279 c = '\n';
2280 } else if ( c == '\n') {
2281 newlinetypes |= NEWLINE_LF;
2283 *p++ = c;
2284 if (c == '\n') break;
2286 if ( c == EOF && skipnextlf )
2287 newlinetypes |= NEWLINE_CR;
2288 FUNLOCKFILE(stream);
2289 *p = '\0';
2290 if (fobj) {
2291 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2292 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2293 } else if ( skipnextlf ) {
2294 /* If we have no file object we cannot save the
2295 ** skipnextlf flag. We have to readahead, which
2296 ** will cause a pause if we're reading from an
2297 ** interactive stream, but that is very unlikely
2298 ** unless we're doing something silly like
2299 ** execfile("/dev/tty").
2301 c = GETC(stream);
2302 if ( c != '\n' )
2303 ungetc(c, stream);
2305 if (p == buf)
2306 return NULL;
2307 return buf;
2311 ** Py_UniversalNewlineFread is an fread variation that understands
2312 ** all of \r, \n and \r\n conventions.
2313 ** The stream should be opened in binary mode.
2314 ** fobj must be a PyFileObject. In this case there
2315 ** is no readahead but in stead a flag is used to skip a following
2316 ** \n on the next read. Also, if the file is open in binary mode
2317 ** the whole conversion is skipped. Finally, the routine keeps track of
2318 ** the different types of newlines seen.
2320 size_t
2321 Py_UniversalNewlineFread(char *buf, size_t n,
2322 FILE *stream, PyObject *fobj)
2324 char *dst = buf;
2325 PyFileObject *f = (PyFileObject *)fobj;
2326 int newlinetypes, skipnextlf;
2328 assert(buf != NULL);
2329 assert(stream != NULL);
2331 if (!fobj || !PyFile_Check(fobj)) {
2332 errno = ENXIO; /* What can you do... */
2333 return 0;
2335 if (!f->f_univ_newline)
2336 return fread(buf, 1, n, stream);
2337 newlinetypes = f->f_newlinetypes;
2338 skipnextlf = f->f_skipnextlf;
2339 /* Invariant: n is the number of bytes remaining to be filled
2340 * in the buffer.
2342 while (n) {
2343 size_t nread;
2344 int shortread;
2345 char *src = dst;
2347 nread = fread(dst, 1, n, stream);
2348 assert(nread <= n);
2349 if (nread == 0)
2350 break;
2352 n -= nread; /* assuming 1 byte out for each in; will adjust */
2353 shortread = n != 0; /* true iff EOF or error */
2354 while (nread--) {
2355 char c = *src++;
2356 if (c == '\r') {
2357 /* Save as LF and set flag to skip next LF. */
2358 *dst++ = '\n';
2359 skipnextlf = 1;
2361 else if (skipnextlf && c == '\n') {
2362 /* Skip LF, and remember we saw CR LF. */
2363 skipnextlf = 0;
2364 newlinetypes |= NEWLINE_CRLF;
2365 ++n;
2367 else {
2368 /* Normal char to be stored in buffer. Also
2369 * update the newlinetypes flag if either this
2370 * is an LF or the previous char was a CR.
2372 if (c == '\n')
2373 newlinetypes |= NEWLINE_LF;
2374 else if (skipnextlf)
2375 newlinetypes |= NEWLINE_CR;
2376 *dst++ = c;
2377 skipnextlf = 0;
2380 if (shortread) {
2381 /* If this is EOF, update type flags. */
2382 if (skipnextlf && feof(stream))
2383 newlinetypes |= NEWLINE_CR;
2384 break;
2387 f->f_newlinetypes = newlinetypes;
2388 f->f_skipnextlf = skipnextlf;
2389 return dst - buf;