Convert from long to Py_ssize_t.
[python.git] / Modules / bz2module.c
blob16201bd6486c5bfbe66df646ea99553fa0452514
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83 #else
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
86 #endif
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
97 typedef struct {
98 PyObject_HEAD
99 PyObject *file;
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
105 int f_softspace; /* Flag used by 'print' command */
107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
111 BZFILE *fp;
112 int mode;
113 Py_off_t pos;
114 Py_off_t size;
115 #ifdef WITH_THREAD
116 PyThread_type_lock lock;
117 #endif
118 } BZ2FileObject;
120 typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124 #ifdef WITH_THREAD
125 PyThread_type_lock lock;
126 #endif
127 } BZ2CompObject;
129 typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134 #ifdef WITH_THREAD
135 PyThread_type_lock lock;
136 #endif
137 } BZ2DecompObject;
139 /* ===================================================================== */
140 /* Utility functions. */
142 static int
143 Util_CatchBZ2Error(int bzerror)
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
151 #ifdef BZ_CONFIG_ERROR
152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
158 #endif
160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
197 return ret;
200 #if BUFSIZ < 8192
201 #define SMALLCHUNK 8192
202 #else
203 #define SMALLCHUNK BUFSIZ
204 #endif
206 #if SIZEOF_INT < 4
207 #define BIGCHUNK (512 * 32)
208 #else
209 #define BIGCHUNK (512 * 1024)
210 #endif
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213 static size_t
214 Util_NewBufferSize(size_t currentsize)
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
224 return currentsize + SMALLCHUNK;
227 /* This is a hacked version of Python's fileobject.c:get_line(). */
228 static PyObject *
229 Util_GetLine(BZ2FileObject *f, int n)
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
238 int bytes_read;
239 int newlinetypes = f->f_newlinetypes;
240 int skipnextlf = f->f_skipnextlf;
241 int univ_newline = f->f_univ_newline;
243 total_v_size = n > 0 ? n : 100;
244 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
245 if (v == NULL)
246 return NULL;
248 buf = BUF(v);
249 end = buf + total_v_size;
251 for (;;) {
252 Py_BEGIN_ALLOW_THREADS
253 while (buf != end) {
254 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bytes_read == 0) break;
257 if (univ_newline) {
258 if (skipnextlf) {
259 skipnextlf = 0;
260 if (c == '\n') {
261 /* Seeing a \n here with skipnextlf true means we
262 * saw a \r before.
264 newlinetypes |= NEWLINE_CRLF;
265 if (bzerror != BZ_OK) break;
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0) break;
269 } else {
270 newlinetypes |= NEWLINE_CR;
273 if (c == '\r') {
274 skipnextlf = 1;
275 c = '\n';
276 } else if (c == '\n')
277 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (bzerror != BZ_OK || c == '\n') break;
282 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 Py_END_ALLOW_THREADS
285 f->f_newlinetypes = newlinetypes;
286 f->f_skipnextlf = skipnextlf;
287 if (bzerror == BZ_STREAM_END) {
288 f->size = f->pos;
289 f->mode = MODE_READ_EOF;
290 break;
291 } else if (bzerror != BZ_OK) {
292 Util_CatchBZ2Error(bzerror);
293 Py_DECREF(v);
294 return NULL;
296 if (c == '\n')
297 break;
298 /* Must be because buf == end */
299 if (n > 0)
300 break;
301 used_v_size = total_v_size;
302 increment = total_v_size >> 2; /* mild exponential growth */
303 total_v_size += increment;
304 if (total_v_size > INT_MAX) {
305 PyErr_SetString(PyExc_OverflowError,
306 "line is longer than a Python string can hold");
307 Py_DECREF(v);
308 return NULL;
310 if (_PyString_Resize(&v, total_v_size) < 0)
311 return NULL;
312 buf = BUF(v) + used_v_size;
313 end = BUF(v) + total_v_size;
316 used_v_size = buf - BUF(v);
317 if (used_v_size != total_v_size)
318 _PyString_Resize(&v, used_v_size);
319 return v;
322 /* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
324 size_t
325 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
326 char* buf, size_t n, BZ2FileObject *f)
328 char *dst = buf;
329 int newlinetypes, skipnextlf;
331 assert(buf != NULL);
332 assert(stream != NULL);
334 if (!f->f_univ_newline)
335 return BZ2_bzRead(bzerror, stream, buf, n);
337 newlinetypes = f->f_newlinetypes;
338 skipnextlf = f->f_skipnextlf;
340 /* Invariant: n is the number of bytes remaining to be filled
341 * in the buffer.
343 while (n) {
344 size_t nread;
345 int shortread;
346 char *src = dst;
348 nread = BZ2_bzRead(bzerror, stream, dst, n);
349 assert(nread <= n);
350 n -= nread; /* assuming 1 byte out for each in; will adjust */
351 shortread = n != 0; /* true iff EOF or error */
352 while (nread--) {
353 char c = *src++;
354 if (c == '\r') {
355 /* Save as LF and set flag to skip next LF. */
356 *dst++ = '\n';
357 skipnextlf = 1;
359 else if (skipnextlf && c == '\n') {
360 /* Skip LF, and remember we saw CR LF. */
361 skipnextlf = 0;
362 newlinetypes |= NEWLINE_CRLF;
363 ++n;
365 else {
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
370 if (c == '\n')
371 newlinetypes |= NEWLINE_LF;
372 else if (skipnextlf)
373 newlinetypes |= NEWLINE_CR;
374 *dst++ = c;
375 skipnextlf = 0;
378 if (shortread) {
379 /* If this is EOF, update type flags. */
380 if (skipnextlf && *bzerror == BZ_STREAM_END)
381 newlinetypes |= NEWLINE_CR;
382 break;
385 f->f_newlinetypes = newlinetypes;
386 f->f_skipnextlf = skipnextlf;
387 return dst - buf;
390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
391 static void
392 Util_DropReadAhead(BZ2FileObject *f)
394 if (f->f_buf != NULL) {
395 PyMem_Free(f->f_buf);
396 f->f_buf = NULL;
400 /* This is a hacked version of Python's fileobject.c:readahead(). */
401 static int
402 Util_ReadAhead(BZ2FileObject *f, int bufsize)
404 int chunksize;
405 int bzerror;
407 if (f->f_buf != NULL) {
408 if((f->f_bufend - f->f_bufptr) >= 1)
409 return 0;
410 else
411 Util_DropReadAhead(f);
413 if (f->mode == MODE_READ_EOF) {
414 f->f_bufptr = f->f_buf;
415 f->f_bufend = f->f_buf;
416 return 0;
418 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
419 return -1;
421 Py_BEGIN_ALLOW_THREADS
422 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
423 bufsize, f);
424 Py_END_ALLOW_THREADS
425 f->pos += chunksize;
426 if (bzerror == BZ_STREAM_END) {
427 f->size = f->pos;
428 f->mode = MODE_READ_EOF;
429 } else if (bzerror != BZ_OK) {
430 Util_CatchBZ2Error(bzerror);
431 Util_DropReadAhead(f);
432 return -1;
434 f->f_bufptr = f->f_buf;
435 f->f_bufend = f->f_buf + chunksize;
436 return 0;
439 /* This is a hacked version of Python's
440 * fileobject.c:readahead_get_line_skip(). */
441 static PyStringObject *
442 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
444 PyStringObject* s;
445 char *bufptr;
446 char *buf;
447 int len;
449 if (f->f_buf == NULL)
450 if (Util_ReadAhead(f, bufsize) < 0)
451 return NULL;
453 len = f->f_bufend - f->f_bufptr;
454 if (len == 0)
455 return (PyStringObject *)
456 PyString_FromStringAndSize(NULL, skip);
457 bufptr = memchr(f->f_bufptr, '\n', len);
458 if (bufptr != NULL) {
459 bufptr++; /* Count the '\n' */
460 len = bufptr - f->f_bufptr;
461 s = (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip+len);
463 if (s == NULL)
464 return NULL;
465 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
466 f->f_bufptr = bufptr;
467 if (bufptr == f->f_bufend)
468 Util_DropReadAhead(f);
469 } else {
470 bufptr = f->f_bufptr;
471 buf = f->f_buf;
472 f->f_buf = NULL; /* Force new readahead buffer */
473 s = Util_ReadAheadGetLineSkip(f, skip+len,
474 bufsize + (bufsize>>2));
475 if (s == NULL) {
476 PyMem_Free(buf);
477 return NULL;
479 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
480 PyMem_Free(buf);
482 return s;
485 /* ===================================================================== */
486 /* Methods of BZ2File. */
488 PyDoc_STRVAR(BZ2File_read__doc__,
489 "read([size]) -> string\n\
491 Read at most size uncompressed bytes, returned as a string. If the size\n\
492 argument is negative or omitted, read until EOF is reached.\n\
495 /* This is a hacked version of Python's fileobject.c:file_read(). */
496 static PyObject *
497 BZ2File_read(BZ2FileObject *self, PyObject *args)
499 long bytesrequested = -1;
500 size_t bytesread, buffersize, chunksize;
501 int bzerror;
502 PyObject *ret = NULL;
504 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
505 return NULL;
507 ACQUIRE_LOCK(self);
508 switch (self->mode) {
509 case MODE_READ:
510 break;
511 case MODE_READ_EOF:
512 ret = PyString_FromString("");
513 goto cleanup;
514 case MODE_CLOSED:
515 PyErr_SetString(PyExc_ValueError,
516 "I/O operation on closed file");
517 goto cleanup;
518 default:
519 PyErr_SetString(PyExc_IOError,
520 "file is not ready for reading");
521 goto cleanup;
524 if (bytesrequested < 0)
525 buffersize = Util_NewBufferSize((size_t)0);
526 else
527 buffersize = bytesrequested;
528 if (buffersize > INT_MAX) {
529 PyErr_SetString(PyExc_OverflowError,
530 "requested number of bytes is "
531 "more than a Python string can hold");
532 goto cleanup;
534 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
535 if (ret == NULL)
536 goto cleanup;
537 bytesread = 0;
539 for (;;) {
540 Py_BEGIN_ALLOW_THREADS
541 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
542 BUF(ret)+bytesread,
543 buffersize-bytesread,
544 self);
545 self->pos += chunksize;
546 Py_END_ALLOW_THREADS
547 bytesread += chunksize;
548 if (bzerror == BZ_STREAM_END) {
549 self->size = self->pos;
550 self->mode = MODE_READ_EOF;
551 break;
552 } else if (bzerror != BZ_OK) {
553 Util_CatchBZ2Error(bzerror);
554 Py_DECREF(ret);
555 ret = NULL;
556 goto cleanup;
558 if (bytesrequested < 0) {
559 buffersize = Util_NewBufferSize(buffersize);
560 if (_PyString_Resize(&ret, buffersize) < 0)
561 goto cleanup;
562 } else {
563 break;
566 if (bytesread != buffersize)
567 _PyString_Resize(&ret, bytesread);
569 cleanup:
570 RELEASE_LOCK(self);
571 return ret;
574 PyDoc_STRVAR(BZ2File_readline__doc__,
575 "readline([size]) -> string\n\
577 Return the next line from the file, as a string, retaining newline.\n\
578 A non-negative size argument will limit the maximum number of bytes to\n\
579 return (an incomplete line may be returned then). Return an empty\n\
580 string at EOF.\n\
583 static PyObject *
584 BZ2File_readline(BZ2FileObject *self, PyObject *args)
586 PyObject *ret = NULL;
587 int sizehint = -1;
589 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
590 return NULL;
592 ACQUIRE_LOCK(self);
593 switch (self->mode) {
594 case MODE_READ:
595 break;
596 case MODE_READ_EOF:
597 ret = PyString_FromString("");
598 goto cleanup;
599 case MODE_CLOSED:
600 PyErr_SetString(PyExc_ValueError,
601 "I/O operation on closed file");
602 goto cleanup;
603 default:
604 PyErr_SetString(PyExc_IOError,
605 "file is not ready for reading");
606 goto cleanup;
609 if (sizehint == 0)
610 ret = PyString_FromString("");
611 else
612 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
614 cleanup:
615 RELEASE_LOCK(self);
616 return ret;
619 PyDoc_STRVAR(BZ2File_readlines__doc__,
620 "readlines([size]) -> list\n\
622 Call readline() repeatedly and return a list of lines read.\n\
623 The optional size argument, if given, is an approximate bound on the\n\
624 total number of bytes in the lines returned.\n\
627 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
628 static PyObject *
629 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
631 long sizehint = 0;
632 PyObject *list = NULL;
633 PyObject *line;
634 char small_buffer[SMALLCHUNK];
635 char *buffer = small_buffer;
636 size_t buffersize = SMALLCHUNK;
637 PyObject *big_buffer = NULL;
638 size_t nfilled = 0;
639 size_t nread;
640 size_t totalread = 0;
641 char *p, *q, *end;
642 int err;
643 int shortread = 0;
644 int bzerror;
646 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
647 return NULL;
649 ACQUIRE_LOCK(self);
650 switch (self->mode) {
651 case MODE_READ:
652 break;
653 case MODE_READ_EOF:
654 list = PyList_New(0);
655 goto cleanup;
656 case MODE_CLOSED:
657 PyErr_SetString(PyExc_ValueError,
658 "I/O operation on closed file");
659 goto cleanup;
660 default:
661 PyErr_SetString(PyExc_IOError,
662 "file is not ready for reading");
663 goto cleanup;
666 if ((list = PyList_New(0)) == NULL)
667 goto cleanup;
669 for (;;) {
670 Py_BEGIN_ALLOW_THREADS
671 nread = Util_UnivNewlineRead(&bzerror, self->fp,
672 buffer+nfilled,
673 buffersize-nfilled, self);
674 self->pos += nread;
675 Py_END_ALLOW_THREADS
676 if (bzerror == BZ_STREAM_END) {
677 self->size = self->pos;
678 self->mode = MODE_READ_EOF;
679 if (nread == 0) {
680 sizehint = 0;
681 break;
683 shortread = 1;
684 } else if (bzerror != BZ_OK) {
685 Util_CatchBZ2Error(bzerror);
686 error:
687 Py_DECREF(list);
688 list = NULL;
689 goto cleanup;
691 totalread += nread;
692 p = memchr(buffer+nfilled, '\n', nread);
693 if (!shortread && p == NULL) {
694 /* Need a larger buffer to fit this line */
695 nfilled += nread;
696 buffersize *= 2;
697 if (buffersize > INT_MAX) {
698 PyErr_SetString(PyExc_OverflowError,
699 "line is longer than a Python string can hold");
700 goto error;
702 if (big_buffer == NULL) {
703 /* Create the big buffer */
704 big_buffer = PyString_FromStringAndSize(
705 NULL, buffersize);
706 if (big_buffer == NULL)
707 goto error;
708 buffer = PyString_AS_STRING(big_buffer);
709 memcpy(buffer, small_buffer, nfilled);
711 else {
712 /* Grow the big buffer */
713 _PyString_Resize(&big_buffer, buffersize);
714 buffer = PyString_AS_STRING(big_buffer);
716 continue;
718 end = buffer+nfilled+nread;
719 q = buffer;
720 while (p != NULL) {
721 /* Process complete lines */
722 p++;
723 line = PyString_FromStringAndSize(q, p-q);
724 if (line == NULL)
725 goto error;
726 err = PyList_Append(list, line);
727 Py_DECREF(line);
728 if (err != 0)
729 goto error;
730 q = p;
731 p = memchr(q, '\n', end-q);
733 /* Move the remaining incomplete line to the start */
734 nfilled = end-q;
735 memmove(buffer, q, nfilled);
736 if (sizehint > 0)
737 if (totalread >= (size_t)sizehint)
738 break;
739 if (shortread) {
740 sizehint = 0;
741 break;
744 if (nfilled != 0) {
745 /* Partial last line */
746 line = PyString_FromStringAndSize(buffer, nfilled);
747 if (line == NULL)
748 goto error;
749 if (sizehint > 0) {
750 /* Need to complete the last line */
751 PyObject *rest = Util_GetLine(self, 0);
752 if (rest == NULL) {
753 Py_DECREF(line);
754 goto error;
756 PyString_Concat(&line, rest);
757 Py_DECREF(rest);
758 if (line == NULL)
759 goto error;
761 err = PyList_Append(list, line);
762 Py_DECREF(line);
763 if (err != 0)
764 goto error;
767 cleanup:
768 RELEASE_LOCK(self);
769 if (big_buffer) {
770 Py_DECREF(big_buffer);
772 return list;
775 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
776 "xreadlines() -> self\n\
778 For backward compatibility. BZ2File objects now include the performance\n\
779 optimizations previously implemented in the xreadlines module.\n\
782 PyDoc_STRVAR(BZ2File_write__doc__,
783 "write(data) -> None\n\
785 Write the 'data' string to file. Note that due to buffering, close() may\n\
786 be needed before the file on disk reflects the data written.\n\
789 /* This is a hacked version of Python's fileobject.c:file_write(). */
790 static PyObject *
791 BZ2File_write(BZ2FileObject *self, PyObject *args)
793 PyObject *ret = NULL;
794 char *buf;
795 int len;
796 int bzerror;
798 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
799 return NULL;
801 ACQUIRE_LOCK(self);
802 switch (self->mode) {
803 case MODE_WRITE:
804 break;
806 case MODE_CLOSED:
807 PyErr_SetString(PyExc_ValueError,
808 "I/O operation on closed file");
809 goto cleanup;
811 default:
812 PyErr_SetString(PyExc_IOError,
813 "file is not ready for writing");
814 goto cleanup;
817 self->f_softspace = 0;
819 Py_BEGIN_ALLOW_THREADS
820 BZ2_bzWrite (&bzerror, self->fp, buf, len);
821 self->pos += len;
822 Py_END_ALLOW_THREADS
824 if (bzerror != BZ_OK) {
825 Util_CatchBZ2Error(bzerror);
826 goto cleanup;
829 Py_INCREF(Py_None);
830 ret = Py_None;
832 cleanup:
833 RELEASE_LOCK(self);
834 return ret;
837 PyDoc_STRVAR(BZ2File_writelines__doc__,
838 "writelines(sequence_of_strings) -> None\n\
840 Write the sequence of strings to the file. Note that newlines are not\n\
841 added. The sequence can be any iterable object producing strings. This is\n\
842 equivalent to calling write() for each string.\n\
845 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
846 static PyObject *
847 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
849 #define CHUNKSIZE 1000
850 PyObject *list = NULL;
851 PyObject *iter = NULL;
852 PyObject *ret = NULL;
853 PyObject *line;
854 int i, j, index, len, islist;
855 int bzerror;
857 ACQUIRE_LOCK(self);
858 switch (self->mode) {
859 case MODE_WRITE:
860 break;
862 case MODE_CLOSED:
863 PyErr_SetString(PyExc_ValueError,
864 "I/O operation on closed file");
865 goto error;
867 default:
868 PyErr_SetString(PyExc_IOError,
869 "file is not ready for writing");
870 goto error;
873 islist = PyList_Check(seq);
874 if (!islist) {
875 iter = PyObject_GetIter(seq);
876 if (iter == NULL) {
877 PyErr_SetString(PyExc_TypeError,
878 "writelines() requires an iterable argument");
879 goto error;
881 list = PyList_New(CHUNKSIZE);
882 if (list == NULL)
883 goto error;
886 /* Strategy: slurp CHUNKSIZE lines into a private list,
887 checking that they are all strings, then write that list
888 without holding the interpreter lock, then come back for more. */
889 for (index = 0; ; index += CHUNKSIZE) {
890 if (islist) {
891 Py_XDECREF(list);
892 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
893 if (list == NULL)
894 goto error;
895 j = PyList_GET_SIZE(list);
897 else {
898 for (j = 0; j < CHUNKSIZE; j++) {
899 line = PyIter_Next(iter);
900 if (line == NULL) {
901 if (PyErr_Occurred())
902 goto error;
903 break;
905 PyList_SetItem(list, j, line);
908 if (j == 0)
909 break;
911 /* Check that all entries are indeed strings. If not,
912 apply the same rules as for file.write() and
913 convert the rets to strings. This is slow, but
914 seems to be the only way since all conversion APIs
915 could potentially execute Python code. */
916 for (i = 0; i < j; i++) {
917 PyObject *v = PyList_GET_ITEM(list, i);
918 if (!PyString_Check(v)) {
919 const char *buffer;
920 Py_ssize_t len;
921 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
922 PyErr_SetString(PyExc_TypeError,
923 "writelines() "
924 "argument must be "
925 "a sequence of "
926 "strings");
927 goto error;
929 line = PyString_FromStringAndSize(buffer,
930 len);
931 if (line == NULL)
932 goto error;
933 Py_DECREF(v);
934 PyList_SET_ITEM(list, i, line);
938 self->f_softspace = 0;
940 /* Since we are releasing the global lock, the
941 following code may *not* execute Python code. */
942 Py_BEGIN_ALLOW_THREADS
943 for (i = 0; i < j; i++) {
944 line = PyList_GET_ITEM(list, i);
945 len = PyString_GET_SIZE(line);
946 BZ2_bzWrite (&bzerror, self->fp,
947 PyString_AS_STRING(line), len);
948 if (bzerror != BZ_OK) {
949 Py_BLOCK_THREADS
950 Util_CatchBZ2Error(bzerror);
951 goto error;
954 Py_END_ALLOW_THREADS
956 if (j < CHUNKSIZE)
957 break;
960 Py_INCREF(Py_None);
961 ret = Py_None;
963 error:
964 RELEASE_LOCK(self);
965 Py_XDECREF(list);
966 Py_XDECREF(iter);
967 return ret;
968 #undef CHUNKSIZE
971 PyDoc_STRVAR(BZ2File_seek__doc__,
972 "seek(offset [, whence]) -> None\n\
974 Move to new file position. Argument offset is a byte count. Optional\n\
975 argument whence defaults to 0 (offset from start of file, offset\n\
976 should be >= 0); other values are 1 (move relative to current position,\n\
977 positive or negative), and 2 (move relative to end of file, usually\n\
978 negative, although many platforms allow seeking beyond the end of a file).\n\
980 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
981 the operation may be extremely slow.\n\
984 static PyObject *
985 BZ2File_seek(BZ2FileObject *self, PyObject *args)
987 int where = 0;
988 PyObject *offobj;
989 Py_off_t offset;
990 char small_buffer[SMALLCHUNK];
991 char *buffer = small_buffer;
992 size_t buffersize = SMALLCHUNK;
993 Py_off_t bytesread = 0;
994 size_t readsize;
995 int chunksize;
996 int bzerror;
997 PyObject *ret = NULL;
999 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1000 return NULL;
1001 #if !defined(HAVE_LARGEFILE_SUPPORT)
1002 offset = PyInt_AsLong(offobj);
1003 #else
1004 offset = PyLong_Check(offobj) ?
1005 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1006 #endif
1007 if (PyErr_Occurred())
1008 return NULL;
1010 ACQUIRE_LOCK(self);
1011 Util_DropReadAhead(self);
1012 switch (self->mode) {
1013 case MODE_READ:
1014 case MODE_READ_EOF:
1015 break;
1017 case MODE_CLOSED:
1018 PyErr_SetString(PyExc_ValueError,
1019 "I/O operation on closed file");
1020 goto cleanup;
1022 default:
1023 PyErr_SetString(PyExc_IOError,
1024 "seek works only while reading");
1025 goto cleanup;
1028 if (where == 2) {
1029 if (self->size == -1) {
1030 assert(self->mode != MODE_READ_EOF);
1031 for (;;) {
1032 Py_BEGIN_ALLOW_THREADS
1033 chunksize = Util_UnivNewlineRead(
1034 &bzerror, self->fp,
1035 buffer, buffersize,
1036 self);
1037 self->pos += chunksize;
1038 Py_END_ALLOW_THREADS
1040 bytesread += chunksize;
1041 if (bzerror == BZ_STREAM_END) {
1042 break;
1043 } else if (bzerror != BZ_OK) {
1044 Util_CatchBZ2Error(bzerror);
1045 goto cleanup;
1048 self->mode = MODE_READ_EOF;
1049 self->size = self->pos;
1050 bytesread = 0;
1052 offset = self->size + offset;
1053 } else if (where == 1) {
1054 offset = self->pos + offset;
1057 /* Before getting here, offset must be the absolute position the file
1058 * pointer should be set to. */
1060 if (offset >= self->pos) {
1061 /* we can move forward */
1062 offset -= self->pos;
1063 } else {
1064 /* we cannot move back, so rewind the stream */
1065 BZ2_bzReadClose(&bzerror, self->fp);
1066 if (self->fp) {
1067 PyFile_DecUseCount((PyFileObject *)self->file);
1068 self->fp = NULL;
1070 if (bzerror != BZ_OK) {
1071 Util_CatchBZ2Error(bzerror);
1072 goto cleanup;
1074 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1075 if (!ret)
1076 goto cleanup;
1077 Py_DECREF(ret);
1078 ret = NULL;
1079 self->pos = 0;
1080 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1081 0, 0, NULL, 0);
1082 if (self->fp)
1083 PyFile_IncUseCount((PyFileObject *)self->file);
1084 if (bzerror != BZ_OK) {
1085 Util_CatchBZ2Error(bzerror);
1086 goto cleanup;
1088 self->mode = MODE_READ;
1091 if (offset <= 0 || self->mode == MODE_READ_EOF)
1092 goto exit;
1094 /* Before getting here, offset must be set to the number of bytes
1095 * to walk forward. */
1096 for (;;) {
1097 if (offset-bytesread > buffersize)
1098 readsize = buffersize;
1099 else
1100 /* offset might be wider that readsize, but the result
1101 * of the subtraction is bound by buffersize (see the
1102 * condition above). buffersize is 8192. */
1103 readsize = (size_t)(offset-bytesread);
1104 Py_BEGIN_ALLOW_THREADS
1105 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1106 buffer, readsize, self);
1107 self->pos += chunksize;
1108 Py_END_ALLOW_THREADS
1109 bytesread += chunksize;
1110 if (bzerror == BZ_STREAM_END) {
1111 self->size = self->pos;
1112 self->mode = MODE_READ_EOF;
1113 break;
1114 } else if (bzerror != BZ_OK) {
1115 Util_CatchBZ2Error(bzerror);
1116 goto cleanup;
1118 if (bytesread == offset)
1119 break;
1122 exit:
1123 Py_INCREF(Py_None);
1124 ret = Py_None;
1126 cleanup:
1127 RELEASE_LOCK(self);
1128 return ret;
1131 PyDoc_STRVAR(BZ2File_tell__doc__,
1132 "tell() -> int\n\
1134 Return the current file position, an integer (may be a long integer).\n\
1137 static PyObject *
1138 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1140 PyObject *ret = NULL;
1142 if (self->mode == MODE_CLOSED) {
1143 PyErr_SetString(PyExc_ValueError,
1144 "I/O operation on closed file");
1145 goto cleanup;
1148 #if !defined(HAVE_LARGEFILE_SUPPORT)
1149 ret = PyInt_FromLong(self->pos);
1150 #else
1151 ret = PyLong_FromLongLong(self->pos);
1152 #endif
1154 cleanup:
1155 return ret;
1158 PyDoc_STRVAR(BZ2File_close__doc__,
1159 "close() -> None or (perhaps) an integer\n\
1161 Close the file. Sets data attribute .closed to true. A closed file\n\
1162 cannot be used for further I/O operations. close() may be called more\n\
1163 than once without error.\n\
1166 static PyObject *
1167 BZ2File_close(BZ2FileObject *self)
1169 PyObject *ret = NULL;
1170 int bzerror = BZ_OK;
1172 ACQUIRE_LOCK(self);
1173 switch (self->mode) {
1174 case MODE_READ:
1175 case MODE_READ_EOF:
1176 BZ2_bzReadClose(&bzerror, self->fp);
1177 break;
1178 case MODE_WRITE:
1179 BZ2_bzWriteClose(&bzerror, self->fp,
1180 0, NULL, NULL);
1181 break;
1183 if (self->fp) {
1184 PyFile_DecUseCount((PyFileObject *)self->file);
1185 self->fp = NULL;
1187 self->mode = MODE_CLOSED;
1188 ret = PyObject_CallMethod(self->file, "close", NULL);
1189 if (bzerror != BZ_OK) {
1190 Util_CatchBZ2Error(bzerror);
1191 Py_XDECREF(ret);
1192 ret = NULL;
1195 RELEASE_LOCK(self);
1196 return ret;
1199 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1201 static PyMethodDef BZ2File_methods[] = {
1202 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1203 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1204 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1205 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1206 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1207 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1208 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1209 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1210 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1211 {NULL, NULL} /* sentinel */
1215 /* ===================================================================== */
1216 /* Getters and setters of BZ2File. */
1218 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1219 static PyObject *
1220 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1222 switch (self->f_newlinetypes) {
1223 case NEWLINE_UNKNOWN:
1224 Py_INCREF(Py_None);
1225 return Py_None;
1226 case NEWLINE_CR:
1227 return PyString_FromString("\r");
1228 case NEWLINE_LF:
1229 return PyString_FromString("\n");
1230 case NEWLINE_CR|NEWLINE_LF:
1231 return Py_BuildValue("(ss)", "\r", "\n");
1232 case NEWLINE_CRLF:
1233 return PyString_FromString("\r\n");
1234 case NEWLINE_CR|NEWLINE_CRLF:
1235 return Py_BuildValue("(ss)", "\r", "\r\n");
1236 case NEWLINE_LF|NEWLINE_CRLF:
1237 return Py_BuildValue("(ss)", "\n", "\r\n");
1238 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1239 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1240 default:
1241 PyErr_Format(PyExc_SystemError,
1242 "Unknown newlines value 0x%x\n",
1243 self->f_newlinetypes);
1244 return NULL;
1248 static PyObject *
1249 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1251 return PyInt_FromLong(self->mode == MODE_CLOSED);
1254 static PyObject *
1255 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1257 return PyObject_GetAttrString(self->file, "mode");
1260 static PyObject *
1261 BZ2File_get_name(BZ2FileObject *self, void *closure)
1263 return PyObject_GetAttrString(self->file, "name");
1266 static PyGetSetDef BZ2File_getset[] = {
1267 {"closed", (getter)BZ2File_get_closed, NULL,
1268 "True if the file is closed"},
1269 {"newlines", (getter)BZ2File_get_newlines, NULL,
1270 "end-of-line convention used in this file"},
1271 {"mode", (getter)BZ2File_get_mode, NULL,
1272 "file mode ('r', 'w', or 'U')"},
1273 {"name", (getter)BZ2File_get_name, NULL,
1274 "file name"},
1275 {NULL} /* Sentinel */
1279 /* ===================================================================== */
1280 /* Members of BZ2File_Type. */
1282 #undef OFF
1283 #define OFF(x) offsetof(BZ2FileObject, x)
1285 static PyMemberDef BZ2File_members[] = {
1286 {"softspace", T_INT, OFF(f_softspace), 0,
1287 "flag indicating that a space needs to be printed; used by print"},
1288 {NULL} /* Sentinel */
1291 /* ===================================================================== */
1292 /* Slot definitions for BZ2File_Type. */
1294 static int
1295 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1297 static char *kwlist[] = {"filename", "mode", "buffering",
1298 "compresslevel", 0};
1299 PyObject *name;
1300 char *mode = "r";
1301 int buffering = -1;
1302 int compresslevel = 9;
1303 int bzerror;
1304 int mode_char = 0;
1306 self->size = -1;
1308 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1309 kwlist, &name, &mode, &buffering,
1310 &compresslevel))
1311 return -1;
1313 if (compresslevel < 1 || compresslevel > 9) {
1314 PyErr_SetString(PyExc_ValueError,
1315 "compresslevel must be between 1 and 9");
1316 return -1;
1319 for (;;) {
1320 int error = 0;
1321 switch (*mode) {
1322 case 'r':
1323 case 'w':
1324 if (mode_char)
1325 error = 1;
1326 mode_char = *mode;
1327 break;
1329 case 'b':
1330 break;
1332 case 'U':
1333 #ifdef __VMS
1334 self->f_univ_newline = 0;
1335 #else
1336 self->f_univ_newline = 1;
1337 #endif
1338 break;
1340 default:
1341 error = 1;
1342 break;
1344 if (error) {
1345 PyErr_Format(PyExc_ValueError,
1346 "invalid mode char %c", *mode);
1347 return -1;
1349 mode++;
1350 if (*mode == '\0')
1351 break;
1354 if (mode_char == 0) {
1355 mode_char = 'r';
1358 mode = (mode_char == 'r') ? "rb" : "wb";
1360 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1361 name, mode, buffering);
1362 if (self->file == NULL)
1363 return -1;
1365 /* From now on, we have stuff to dealloc, so jump to error label
1366 * instead of returning */
1368 #ifdef WITH_THREAD
1369 self->lock = PyThread_allocate_lock();
1370 if (!self->lock) {
1371 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1372 goto error;
1374 #endif
1376 if (mode_char == 'r')
1377 self->fp = BZ2_bzReadOpen(&bzerror,
1378 PyFile_AsFile(self->file),
1379 0, 0, NULL, 0);
1380 else
1381 self->fp = BZ2_bzWriteOpen(&bzerror,
1382 PyFile_AsFile(self->file),
1383 compresslevel, 0, 0);
1385 if (bzerror != BZ_OK) {
1386 Util_CatchBZ2Error(bzerror);
1387 goto error;
1389 PyFile_IncUseCount((PyFileObject *)self->file);
1391 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1393 return 0;
1395 error:
1396 Py_CLEAR(self->file);
1397 #ifdef WITH_THREAD
1398 if (self->lock) {
1399 PyThread_free_lock(self->lock);
1400 self->lock = NULL;
1402 #endif
1403 return -1;
1406 static void
1407 BZ2File_dealloc(BZ2FileObject *self)
1409 int bzerror;
1410 #ifdef WITH_THREAD
1411 if (self->lock)
1412 PyThread_free_lock(self->lock);
1413 #endif
1414 switch (self->mode) {
1415 case MODE_READ:
1416 case MODE_READ_EOF:
1417 BZ2_bzReadClose(&bzerror, self->fp);
1418 break;
1419 case MODE_WRITE:
1420 BZ2_bzWriteClose(&bzerror, self->fp,
1421 0, NULL, NULL);
1422 break;
1424 if (self->fp) {
1425 PyFile_DecUseCount((PyFileObject *)self->file);
1426 self->fp = NULL;
1428 Util_DropReadAhead(self);
1429 Py_XDECREF(self->file);
1430 Py_TYPE(self)->tp_free((PyObject *)self);
1433 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1434 static PyObject *
1435 BZ2File_getiter(BZ2FileObject *self)
1437 if (self->mode == MODE_CLOSED) {
1438 PyErr_SetString(PyExc_ValueError,
1439 "I/O operation on closed file");
1440 return NULL;
1442 Py_INCREF((PyObject*)self);
1443 return (PyObject *)self;
1446 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1447 #define READAHEAD_BUFSIZE 8192
1448 static PyObject *
1449 BZ2File_iternext(BZ2FileObject *self)
1451 PyStringObject* ret;
1452 ACQUIRE_LOCK(self);
1453 if (self->mode == MODE_CLOSED) {
1454 RELEASE_LOCK(self);
1455 PyErr_SetString(PyExc_ValueError,
1456 "I/O operation on closed file");
1457 return NULL;
1459 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1460 RELEASE_LOCK(self);
1461 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1462 Py_XDECREF(ret);
1463 return NULL;
1465 return (PyObject *)ret;
1468 /* ===================================================================== */
1469 /* BZ2File_Type definition. */
1471 PyDoc_VAR(BZ2File__doc__) =
1472 PyDoc_STR(
1473 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1475 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1476 writing. When opened for writing, the file will be created if it doesn't\n\
1477 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1478 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1479 is given, must be a number between 1 and 9.\n\
1481 PyDoc_STR(
1482 "\n\
1483 Add a 'U' to mode to open the file for input with universal newline\n\
1484 support. Any line ending in the input file will be seen as a '\\n' in\n\
1485 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1486 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1487 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1488 newlines are available only when reading.\n\
1492 static PyTypeObject BZ2File_Type = {
1493 PyVarObject_HEAD_INIT(NULL, 0)
1494 "bz2.BZ2File", /*tp_name*/
1495 sizeof(BZ2FileObject), /*tp_basicsize*/
1496 0, /*tp_itemsize*/
1497 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1498 0, /*tp_print*/
1499 0, /*tp_getattr*/
1500 0, /*tp_setattr*/
1501 0, /*tp_compare*/
1502 0, /*tp_repr*/
1503 0, /*tp_as_number*/
1504 0, /*tp_as_sequence*/
1505 0, /*tp_as_mapping*/
1506 0, /*tp_hash*/
1507 0, /*tp_call*/
1508 0, /*tp_str*/
1509 PyObject_GenericGetAttr,/*tp_getattro*/
1510 PyObject_GenericSetAttr,/*tp_setattro*/
1511 0, /*tp_as_buffer*/
1512 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1513 BZ2File__doc__, /*tp_doc*/
1514 0, /*tp_traverse*/
1515 0, /*tp_clear*/
1516 0, /*tp_richcompare*/
1517 0, /*tp_weaklistoffset*/
1518 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1519 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1520 BZ2File_methods, /*tp_methods*/
1521 BZ2File_members, /*tp_members*/
1522 BZ2File_getset, /*tp_getset*/
1523 0, /*tp_base*/
1524 0, /*tp_dict*/
1525 0, /*tp_descr_get*/
1526 0, /*tp_descr_set*/
1527 0, /*tp_dictoffset*/
1528 (initproc)BZ2File_init, /*tp_init*/
1529 PyType_GenericAlloc, /*tp_alloc*/
1530 PyType_GenericNew, /*tp_new*/
1531 _PyObject_Del, /*tp_free*/
1532 0, /*tp_is_gc*/
1536 /* ===================================================================== */
1537 /* Methods of BZ2Comp. */
1539 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1540 "compress(data) -> string\n\
1542 Provide more data to the compressor object. It will return chunks of\n\
1543 compressed data whenever possible. When you've finished providing data\n\
1544 to compress, call the flush() method to finish the compression process,\n\
1545 and return what is left in the internal buffers.\n\
1548 static PyObject *
1549 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1551 char *data;
1552 int datasize;
1553 int bufsize = SMALLCHUNK;
1554 PY_LONG_LONG totalout;
1555 PyObject *ret = NULL;
1556 bz_stream *bzs = &self->bzs;
1557 int bzerror;
1559 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1560 return NULL;
1562 if (datasize == 0)
1563 return PyString_FromString("");
1565 ACQUIRE_LOCK(self);
1566 if (!self->running) {
1567 PyErr_SetString(PyExc_ValueError,
1568 "this object was already flushed");
1569 goto error;
1572 ret = PyString_FromStringAndSize(NULL, bufsize);
1573 if (!ret)
1574 goto error;
1576 bzs->next_in = data;
1577 bzs->avail_in = datasize;
1578 bzs->next_out = BUF(ret);
1579 bzs->avail_out = bufsize;
1581 totalout = BZS_TOTAL_OUT(bzs);
1583 for (;;) {
1584 Py_BEGIN_ALLOW_THREADS
1585 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1586 Py_END_ALLOW_THREADS
1587 if (bzerror != BZ_RUN_OK) {
1588 Util_CatchBZ2Error(bzerror);
1589 goto error;
1591 if (bzs->avail_in == 0)
1592 break; /* no more input data */
1593 if (bzs->avail_out == 0) {
1594 bufsize = Util_NewBufferSize(bufsize);
1595 if (_PyString_Resize(&ret, bufsize) < 0) {
1596 BZ2_bzCompressEnd(bzs);
1597 goto error;
1599 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1600 - totalout);
1601 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1605 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1607 RELEASE_LOCK(self);
1608 return ret;
1610 error:
1611 RELEASE_LOCK(self);
1612 Py_XDECREF(ret);
1613 return NULL;
1616 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1617 "flush() -> string\n\
1619 Finish the compression process and return what is left in internal buffers.\n\
1620 You must not use the compressor object after calling this method.\n\
1623 static PyObject *
1624 BZ2Comp_flush(BZ2CompObject *self)
1626 int bufsize = SMALLCHUNK;
1627 PyObject *ret = NULL;
1628 bz_stream *bzs = &self->bzs;
1629 PY_LONG_LONG totalout;
1630 int bzerror;
1632 ACQUIRE_LOCK(self);
1633 if (!self->running) {
1634 PyErr_SetString(PyExc_ValueError, "object was already "
1635 "flushed");
1636 goto error;
1638 self->running = 0;
1640 ret = PyString_FromStringAndSize(NULL, bufsize);
1641 if (!ret)
1642 goto error;
1644 bzs->next_out = BUF(ret);
1645 bzs->avail_out = bufsize;
1647 totalout = BZS_TOTAL_OUT(bzs);
1649 for (;;) {
1650 Py_BEGIN_ALLOW_THREADS
1651 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1652 Py_END_ALLOW_THREADS
1653 if (bzerror == BZ_STREAM_END) {
1654 break;
1655 } else if (bzerror != BZ_FINISH_OK) {
1656 Util_CatchBZ2Error(bzerror);
1657 goto error;
1659 if (bzs->avail_out == 0) {
1660 bufsize = Util_NewBufferSize(bufsize);
1661 if (_PyString_Resize(&ret, bufsize) < 0)
1662 goto error;
1663 bzs->next_out = BUF(ret);
1664 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1665 - totalout);
1666 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1670 if (bzs->avail_out != 0)
1671 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1673 RELEASE_LOCK(self);
1674 return ret;
1676 error:
1677 RELEASE_LOCK(self);
1678 Py_XDECREF(ret);
1679 return NULL;
1682 static PyMethodDef BZ2Comp_methods[] = {
1683 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1684 BZ2Comp_compress__doc__},
1685 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1686 BZ2Comp_flush__doc__},
1687 {NULL, NULL} /* sentinel */
1691 /* ===================================================================== */
1692 /* Slot definitions for BZ2Comp_Type. */
1694 static int
1695 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1697 int compresslevel = 9;
1698 int bzerror;
1699 static char *kwlist[] = {"compresslevel", 0};
1701 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1702 kwlist, &compresslevel))
1703 return -1;
1705 if (compresslevel < 1 || compresslevel > 9) {
1706 PyErr_SetString(PyExc_ValueError,
1707 "compresslevel must be between 1 and 9");
1708 goto error;
1711 #ifdef WITH_THREAD
1712 self->lock = PyThread_allocate_lock();
1713 if (!self->lock) {
1714 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1715 goto error;
1717 #endif
1719 memset(&self->bzs, 0, sizeof(bz_stream));
1720 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1721 if (bzerror != BZ_OK) {
1722 Util_CatchBZ2Error(bzerror);
1723 goto error;
1726 self->running = 1;
1728 return 0;
1729 error:
1730 #ifdef WITH_THREAD
1731 if (self->lock) {
1732 PyThread_free_lock(self->lock);
1733 self->lock = NULL;
1735 #endif
1736 return -1;
1739 static void
1740 BZ2Comp_dealloc(BZ2CompObject *self)
1742 #ifdef WITH_THREAD
1743 if (self->lock)
1744 PyThread_free_lock(self->lock);
1745 #endif
1746 BZ2_bzCompressEnd(&self->bzs);
1747 Py_TYPE(self)->tp_free((PyObject *)self);
1751 /* ===================================================================== */
1752 /* BZ2Comp_Type definition. */
1754 PyDoc_STRVAR(BZ2Comp__doc__,
1755 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1757 Create a new compressor object. This object may be used to compress\n\
1758 data sequentially. If you want to compress data in one shot, use the\n\
1759 compress() function instead. The compresslevel parameter, if given,\n\
1760 must be a number between 1 and 9.\n\
1763 static PyTypeObject BZ2Comp_Type = {
1764 PyVarObject_HEAD_INIT(NULL, 0)
1765 "bz2.BZ2Compressor", /*tp_name*/
1766 sizeof(BZ2CompObject), /*tp_basicsize*/
1767 0, /*tp_itemsize*/
1768 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1769 0, /*tp_print*/
1770 0, /*tp_getattr*/
1771 0, /*tp_setattr*/
1772 0, /*tp_compare*/
1773 0, /*tp_repr*/
1774 0, /*tp_as_number*/
1775 0, /*tp_as_sequence*/
1776 0, /*tp_as_mapping*/
1777 0, /*tp_hash*/
1778 0, /*tp_call*/
1779 0, /*tp_str*/
1780 PyObject_GenericGetAttr,/*tp_getattro*/
1781 PyObject_GenericSetAttr,/*tp_setattro*/
1782 0, /*tp_as_buffer*/
1783 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1784 BZ2Comp__doc__, /*tp_doc*/
1785 0, /*tp_traverse*/
1786 0, /*tp_clear*/
1787 0, /*tp_richcompare*/
1788 0, /*tp_weaklistoffset*/
1789 0, /*tp_iter*/
1790 0, /*tp_iternext*/
1791 BZ2Comp_methods, /*tp_methods*/
1792 0, /*tp_members*/
1793 0, /*tp_getset*/
1794 0, /*tp_base*/
1795 0, /*tp_dict*/
1796 0, /*tp_descr_get*/
1797 0, /*tp_descr_set*/
1798 0, /*tp_dictoffset*/
1799 (initproc)BZ2Comp_init, /*tp_init*/
1800 PyType_GenericAlloc, /*tp_alloc*/
1801 PyType_GenericNew, /*tp_new*/
1802 _PyObject_Del, /*tp_free*/
1803 0, /*tp_is_gc*/
1807 /* ===================================================================== */
1808 /* Members of BZ2Decomp. */
1810 #undef OFF
1811 #define OFF(x) offsetof(BZ2DecompObject, x)
1813 static PyMemberDef BZ2Decomp_members[] = {
1814 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1815 {NULL} /* Sentinel */
1819 /* ===================================================================== */
1820 /* Methods of BZ2Decomp. */
1822 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1823 "decompress(data) -> string\n\
1825 Provide more data to the decompressor object. It will return chunks\n\
1826 of decompressed data whenever possible. If you try to decompress data\n\
1827 after the end of stream is found, EOFError will be raised. If any data\n\
1828 was found after the end of stream, it'll be ignored and saved in\n\
1829 unused_data attribute.\n\
1832 static PyObject *
1833 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1835 char *data;
1836 int datasize;
1837 int bufsize = SMALLCHUNK;
1838 PY_LONG_LONG totalout;
1839 PyObject *ret = NULL;
1840 bz_stream *bzs = &self->bzs;
1841 int bzerror;
1843 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1844 return NULL;
1846 ACQUIRE_LOCK(self);
1847 if (!self->running) {
1848 PyErr_SetString(PyExc_EOFError, "end of stream was "
1849 "already found");
1850 goto error;
1853 ret = PyString_FromStringAndSize(NULL, bufsize);
1854 if (!ret)
1855 goto error;
1857 bzs->next_in = data;
1858 bzs->avail_in = datasize;
1859 bzs->next_out = BUF(ret);
1860 bzs->avail_out = bufsize;
1862 totalout = BZS_TOTAL_OUT(bzs);
1864 for (;;) {
1865 Py_BEGIN_ALLOW_THREADS
1866 bzerror = BZ2_bzDecompress(bzs);
1867 Py_END_ALLOW_THREADS
1868 if (bzerror == BZ_STREAM_END) {
1869 if (bzs->avail_in != 0) {
1870 Py_DECREF(self->unused_data);
1871 self->unused_data =
1872 PyString_FromStringAndSize(bzs->next_in,
1873 bzs->avail_in);
1875 self->running = 0;
1876 break;
1878 if (bzerror != BZ_OK) {
1879 Util_CatchBZ2Error(bzerror);
1880 goto error;
1882 if (bzs->avail_in == 0)
1883 break; /* no more input data */
1884 if (bzs->avail_out == 0) {
1885 bufsize = Util_NewBufferSize(bufsize);
1886 if (_PyString_Resize(&ret, bufsize) < 0) {
1887 BZ2_bzDecompressEnd(bzs);
1888 goto error;
1890 bzs->next_out = BUF(ret);
1891 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1892 - totalout);
1893 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1897 if (bzs->avail_out != 0)
1898 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1900 RELEASE_LOCK(self);
1901 return ret;
1903 error:
1904 RELEASE_LOCK(self);
1905 Py_XDECREF(ret);
1906 return NULL;
1909 static PyMethodDef BZ2Decomp_methods[] = {
1910 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1911 {NULL, NULL} /* sentinel */
1915 /* ===================================================================== */
1916 /* Slot definitions for BZ2Decomp_Type. */
1918 static int
1919 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1921 int bzerror;
1923 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1924 return -1;
1926 #ifdef WITH_THREAD
1927 self->lock = PyThread_allocate_lock();
1928 if (!self->lock) {
1929 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1930 goto error;
1932 #endif
1934 self->unused_data = PyString_FromString("");
1935 if (!self->unused_data)
1936 goto error;
1938 memset(&self->bzs, 0, sizeof(bz_stream));
1939 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1940 if (bzerror != BZ_OK) {
1941 Util_CatchBZ2Error(bzerror);
1942 goto error;
1945 self->running = 1;
1947 return 0;
1949 error:
1950 #ifdef WITH_THREAD
1951 if (self->lock) {
1952 PyThread_free_lock(self->lock);
1953 self->lock = NULL;
1955 #endif
1956 Py_CLEAR(self->unused_data);
1957 return -1;
1960 static void
1961 BZ2Decomp_dealloc(BZ2DecompObject *self)
1963 #ifdef WITH_THREAD
1964 if (self->lock)
1965 PyThread_free_lock(self->lock);
1966 #endif
1967 Py_XDECREF(self->unused_data);
1968 BZ2_bzDecompressEnd(&self->bzs);
1969 Py_TYPE(self)->tp_free((PyObject *)self);
1973 /* ===================================================================== */
1974 /* BZ2Decomp_Type definition. */
1976 PyDoc_STRVAR(BZ2Decomp__doc__,
1977 "BZ2Decompressor() -> decompressor object\n\
1979 Create a new decompressor object. This object may be used to decompress\n\
1980 data sequentially. If you want to decompress data in one shot, use the\n\
1981 decompress() function instead.\n\
1984 static PyTypeObject BZ2Decomp_Type = {
1985 PyVarObject_HEAD_INIT(NULL, 0)
1986 "bz2.BZ2Decompressor", /*tp_name*/
1987 sizeof(BZ2DecompObject), /*tp_basicsize*/
1988 0, /*tp_itemsize*/
1989 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1990 0, /*tp_print*/
1991 0, /*tp_getattr*/
1992 0, /*tp_setattr*/
1993 0, /*tp_compare*/
1994 0, /*tp_repr*/
1995 0, /*tp_as_number*/
1996 0, /*tp_as_sequence*/
1997 0, /*tp_as_mapping*/
1998 0, /*tp_hash*/
1999 0, /*tp_call*/
2000 0, /*tp_str*/
2001 PyObject_GenericGetAttr,/*tp_getattro*/
2002 PyObject_GenericSetAttr,/*tp_setattro*/
2003 0, /*tp_as_buffer*/
2004 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2005 BZ2Decomp__doc__, /*tp_doc*/
2006 0, /*tp_traverse*/
2007 0, /*tp_clear*/
2008 0, /*tp_richcompare*/
2009 0, /*tp_weaklistoffset*/
2010 0, /*tp_iter*/
2011 0, /*tp_iternext*/
2012 BZ2Decomp_methods, /*tp_methods*/
2013 BZ2Decomp_members, /*tp_members*/
2014 0, /*tp_getset*/
2015 0, /*tp_base*/
2016 0, /*tp_dict*/
2017 0, /*tp_descr_get*/
2018 0, /*tp_descr_set*/
2019 0, /*tp_dictoffset*/
2020 (initproc)BZ2Decomp_init, /*tp_init*/
2021 PyType_GenericAlloc, /*tp_alloc*/
2022 PyType_GenericNew, /*tp_new*/
2023 _PyObject_Del, /*tp_free*/
2024 0, /*tp_is_gc*/
2028 /* ===================================================================== */
2029 /* Module functions. */
2031 PyDoc_STRVAR(bz2_compress__doc__,
2032 "compress(data [, compresslevel=9]) -> string\n\
2034 Compress data in one shot. If you want to compress data sequentially,\n\
2035 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2036 given, must be a number between 1 and 9.\n\
2039 static PyObject *
2040 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2042 int compresslevel=9;
2043 char *data;
2044 int datasize;
2045 int bufsize;
2046 PyObject *ret = NULL;
2047 bz_stream _bzs;
2048 bz_stream *bzs = &_bzs;
2049 int bzerror;
2050 static char *kwlist[] = {"data", "compresslevel", 0};
2052 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2053 kwlist, &data, &datasize,
2054 &compresslevel))
2055 return NULL;
2057 if (compresslevel < 1 || compresslevel > 9) {
2058 PyErr_SetString(PyExc_ValueError,
2059 "compresslevel must be between 1 and 9");
2060 return NULL;
2063 /* Conforming to bz2 manual, this is large enough to fit compressed
2064 * data in one shot. We will check it later anyway. */
2065 bufsize = datasize + (datasize/100+1) + 600;
2067 ret = PyString_FromStringAndSize(NULL, bufsize);
2068 if (!ret)
2069 return NULL;
2071 memset(bzs, 0, sizeof(bz_stream));
2073 bzs->next_in = data;
2074 bzs->avail_in = datasize;
2075 bzs->next_out = BUF(ret);
2076 bzs->avail_out = bufsize;
2078 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2079 if (bzerror != BZ_OK) {
2080 Util_CatchBZ2Error(bzerror);
2081 Py_DECREF(ret);
2082 return NULL;
2085 for (;;) {
2086 Py_BEGIN_ALLOW_THREADS
2087 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2088 Py_END_ALLOW_THREADS
2089 if (bzerror == BZ_STREAM_END) {
2090 break;
2091 } else if (bzerror != BZ_FINISH_OK) {
2092 BZ2_bzCompressEnd(bzs);
2093 Util_CatchBZ2Error(bzerror);
2094 Py_DECREF(ret);
2095 return NULL;
2097 if (bzs->avail_out == 0) {
2098 bufsize = Util_NewBufferSize(bufsize);
2099 if (_PyString_Resize(&ret, bufsize) < 0) {
2100 BZ2_bzCompressEnd(bzs);
2101 Py_DECREF(ret);
2102 return NULL;
2104 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2105 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2109 if (bzs->avail_out != 0)
2110 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2111 BZ2_bzCompressEnd(bzs);
2113 return ret;
2116 PyDoc_STRVAR(bz2_decompress__doc__,
2117 "decompress(data) -> decompressed data\n\
2119 Decompress data in one shot. If you want to decompress data sequentially,\n\
2120 use an instance of BZ2Decompressor instead.\n\
2123 static PyObject *
2124 bz2_decompress(PyObject *self, PyObject *args)
2126 char *data;
2127 int datasize;
2128 int bufsize = SMALLCHUNK;
2129 PyObject *ret;
2130 bz_stream _bzs;
2131 bz_stream *bzs = &_bzs;
2132 int bzerror;
2134 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2135 return NULL;
2137 if (datasize == 0)
2138 return PyString_FromString("");
2140 ret = PyString_FromStringAndSize(NULL, bufsize);
2141 if (!ret)
2142 return NULL;
2144 memset(bzs, 0, sizeof(bz_stream));
2146 bzs->next_in = data;
2147 bzs->avail_in = datasize;
2148 bzs->next_out = BUF(ret);
2149 bzs->avail_out = bufsize;
2151 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2152 if (bzerror != BZ_OK) {
2153 Util_CatchBZ2Error(bzerror);
2154 Py_DECREF(ret);
2155 return NULL;
2158 for (;;) {
2159 Py_BEGIN_ALLOW_THREADS
2160 bzerror = BZ2_bzDecompress(bzs);
2161 Py_END_ALLOW_THREADS
2162 if (bzerror == BZ_STREAM_END) {
2163 break;
2164 } else if (bzerror != BZ_OK) {
2165 BZ2_bzDecompressEnd(bzs);
2166 Util_CatchBZ2Error(bzerror);
2167 Py_DECREF(ret);
2168 return NULL;
2170 if (bzs->avail_in == 0) {
2171 BZ2_bzDecompressEnd(bzs);
2172 PyErr_SetString(PyExc_ValueError,
2173 "couldn't find end of stream");
2174 Py_DECREF(ret);
2175 return NULL;
2177 if (bzs->avail_out == 0) {
2178 bufsize = Util_NewBufferSize(bufsize);
2179 if (_PyString_Resize(&ret, bufsize) < 0) {
2180 BZ2_bzDecompressEnd(bzs);
2181 Py_DECREF(ret);
2182 return NULL;
2184 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2185 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2189 if (bzs->avail_out != 0)
2190 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2191 BZ2_bzDecompressEnd(bzs);
2193 return ret;
2196 static PyMethodDef bz2_methods[] = {
2197 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2198 bz2_compress__doc__},
2199 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2200 bz2_decompress__doc__},
2201 {NULL, NULL} /* sentinel */
2204 /* ===================================================================== */
2205 /* Initialization function. */
2207 PyDoc_STRVAR(bz2__doc__,
2208 "The python bz2 module provides a comprehensive interface for\n\
2209 the bz2 compression library. It implements a complete file\n\
2210 interface, one shot (de)compression functions, and types for\n\
2211 sequential (de)compression.\n\
2214 PyMODINIT_FUNC
2215 initbz2(void)
2217 PyObject *m;
2219 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2220 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2221 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2223 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2224 if (m == NULL)
2225 return;
2227 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2229 Py_INCREF(&BZ2File_Type);
2230 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2232 Py_INCREF(&BZ2Comp_Type);
2233 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2235 Py_INCREF(&BZ2Decomp_Type);
2236 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);