3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t
;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t
;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t
;
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
41 #define MODE_READ_EOF 2
44 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
56 #define BZS_TOTAL_OUT(bzs) \
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
101 char* f_buf
; /* Allocated readahead buffer */
102 char* f_bufend
; /* Points after last occupied position */
103 char* f_bufptr
; /* Current buffer position */
105 int f_softspace
; /* Flag used by 'print' command */
107 int f_univ_newline
; /* Handle any newline convention */
108 int f_newlinetypes
; /* Types of newlines seen */
109 int f_skipnextlf
; /* Skip next \n */
116 PyThread_type_lock lock
;
125 PyThread_type_lock lock
;
133 PyObject
*unused_data
;
135 PyThread_type_lock lock
;
139 /* ===================================================================== */
140 /* Utility functions. */
143 Util_CatchBZ2Error(int bzerror
)
151 #ifdef BZ_CONFIG_ERROR
152 case BZ_CONFIG_ERROR
:
153 PyErr_SetString(PyExc_SystemError
,
154 "the bz2 library was not compiled "
161 PyErr_SetString(PyExc_ValueError
,
162 "the bz2 library has received wrong "
173 case BZ_DATA_ERROR_MAGIC
:
174 PyErr_SetString(PyExc_IOError
, "invalid data stream");
179 PyErr_SetString(PyExc_IOError
, "unknown IO error");
183 case BZ_UNEXPECTED_EOF
:
184 PyErr_SetString(PyExc_EOFError
,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
190 case BZ_SEQUENCE_ERROR
:
191 PyErr_SetString(PyExc_RuntimeError
,
192 "wrong sequence of bz2 library "
201 #define SMALLCHUNK 8192
203 #define SMALLCHUNK BUFSIZ
207 #define BIGCHUNK (512 * 32)
209 #define BIGCHUNK (512 * 1024)
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
214 Util_NewBufferSize(size_t currentsize
)
216 if (currentsize
> SMALLCHUNK
) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize
<= BIGCHUNK
)
220 return currentsize
+ currentsize
;
222 return currentsize
+ BIGCHUNK
;
224 return currentsize
+ SMALLCHUNK
;
227 /* This is a hacked version of Python's fileobject.c:get_line(). */
229 Util_GetLine(BZ2FileObject
*f
, int n
)
233 size_t total_v_size
; /* total # of slots in buffer */
234 size_t used_v_size
; /* # used slots in buffer */
235 size_t increment
; /* amount to increment the buffer */
238 int newlinetypes
= f
->f_newlinetypes
;
239 int skipnextlf
= f
->f_skipnextlf
;
240 int univ_newline
= f
->f_univ_newline
;
242 total_v_size
= n
> 0 ? n
: 100;
243 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
248 end
= buf
+ total_v_size
;
251 Py_BEGIN_ALLOW_THREADS
254 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
256 if (bzerror
!= BZ_OK
|| buf
== end
)
261 /* Seeing a \n here with
262 * skipnextlf true means we
265 newlinetypes
|= NEWLINE_CRLF
;
266 BZ2_bzRead(&bzerror
, f
->fp
,
268 if (bzerror
!= BZ_OK
)
271 newlinetypes
|= NEWLINE_CR
;
277 } else if ( c
== '\n')
278 newlinetypes
|= NEWLINE_LF
;
280 if (c
== '\n') break;
282 if (bzerror
== BZ_STREAM_END
&& skipnextlf
)
283 newlinetypes
|= NEWLINE_CR
;
284 } else /* If not universal newlines use the normal loop */
286 BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
289 } while (bzerror
== BZ_OK
&& c
!= '\n' && buf
!= end
);
291 f
->f_newlinetypes
= newlinetypes
;
292 f
->f_skipnextlf
= skipnextlf
;
293 if (bzerror
== BZ_STREAM_END
) {
295 f
->mode
= MODE_READ_EOF
;
297 } else if (bzerror
!= BZ_OK
) {
298 Util_CatchBZ2Error(bzerror
);
304 /* Must be because buf == end */
307 used_v_size
= total_v_size
;
308 increment
= total_v_size
>> 2; /* mild exponential growth */
309 total_v_size
+= increment
;
310 if (total_v_size
> INT_MAX
) {
311 PyErr_SetString(PyExc_OverflowError
,
312 "line is longer than a Python string can hold");
316 if (_PyString_Resize(&v
, total_v_size
) < 0)
318 buf
= BUF(v
) + used_v_size
;
319 end
= BUF(v
) + total_v_size
;
322 used_v_size
= buf
- BUF(v
);
323 if (used_v_size
!= total_v_size
)
324 _PyString_Resize(&v
, used_v_size
);
328 /* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
331 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
332 char* buf
, size_t n
, BZ2FileObject
*f
)
335 int newlinetypes
, skipnextlf
;
338 assert(stream
!= NULL
);
340 if (!f
->f_univ_newline
)
341 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
343 newlinetypes
= f
->f_newlinetypes
;
344 skipnextlf
= f
->f_skipnextlf
;
346 /* Invariant: n is the number of bytes remaining to be filled
354 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
356 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
357 shortread
= n
!= 0; /* true iff EOF or error */
361 /* Save as LF and set flag to skip next LF. */
365 else if (skipnextlf
&& c
== '\n') {
366 /* Skip LF, and remember we saw CR LF. */
368 newlinetypes
|= NEWLINE_CRLF
;
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
377 newlinetypes
|= NEWLINE_LF
;
379 newlinetypes
|= NEWLINE_CR
;
385 /* If this is EOF, update type flags. */
386 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
387 newlinetypes
|= NEWLINE_CR
;
391 f
->f_newlinetypes
= newlinetypes
;
392 f
->f_skipnextlf
= skipnextlf
;
396 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
398 Util_DropReadAhead(BZ2FileObject
*f
)
400 if (f
->f_buf
!= NULL
) {
401 PyMem_Free(f
->f_buf
);
406 /* This is a hacked version of Python's fileobject.c:readahead(). */
408 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
413 if (f
->f_buf
!= NULL
) {
414 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
417 Util_DropReadAhead(f
);
419 if (f
->mode
== MODE_READ_EOF
) {
420 f
->f_bufptr
= f
->f_buf
;
421 f
->f_bufend
= f
->f_buf
;
424 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
427 Py_BEGIN_ALLOW_THREADS
428 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
432 if (bzerror
== BZ_STREAM_END
) {
434 f
->mode
= MODE_READ_EOF
;
435 } else if (bzerror
!= BZ_OK
) {
436 Util_CatchBZ2Error(bzerror
);
437 Util_DropReadAhead(f
);
440 f
->f_bufptr
= f
->f_buf
;
441 f
->f_bufend
= f
->f_buf
+ chunksize
;
445 /* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447 static PyStringObject
*
448 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
455 if (f
->f_buf
== NULL
)
456 if (Util_ReadAhead(f
, bufsize
) < 0)
459 len
= f
->f_bufend
- f
->f_bufptr
;
461 return (PyStringObject
*)
462 PyString_FromStringAndSize(NULL
, skip
);
463 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
464 if (bufptr
!= NULL
) {
465 bufptr
++; /* Count the '\n' */
466 len
= bufptr
- f
->f_bufptr
;
467 s
= (PyStringObject
*)
468 PyString_FromStringAndSize(NULL
, skip
+len
);
471 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
472 f
->f_bufptr
= bufptr
;
473 if (bufptr
== f
->f_bufend
)
474 Util_DropReadAhead(f
);
476 bufptr
= f
->f_bufptr
;
478 f
->f_buf
= NULL
; /* Force new readahead buffer */
479 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
480 bufsize
+ (bufsize
>>2));
485 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
491 /* ===================================================================== */
492 /* Methods of BZ2File. */
494 PyDoc_STRVAR(BZ2File_read__doc__
,
495 "read([size]) -> string\n\
497 Read at most size uncompressed bytes, returned as a string. If the size\n\
498 argument is negative or omitted, read until EOF is reached.\n\
501 /* This is a hacked version of Python's fileobject.c:file_read(). */
503 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
505 long bytesrequested
= -1;
506 size_t bytesread
, buffersize
, chunksize
;
508 PyObject
*ret
= NULL
;
510 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
514 switch (self
->mode
) {
518 ret
= PyString_FromString("");
521 PyErr_SetString(PyExc_ValueError
,
522 "I/O operation on closed file");
525 PyErr_SetString(PyExc_IOError
,
526 "file is not ready for reading");
530 if (bytesrequested
< 0)
531 buffersize
= Util_NewBufferSize((size_t)0);
533 buffersize
= bytesrequested
;
534 if (buffersize
> INT_MAX
) {
535 PyErr_SetString(PyExc_OverflowError
,
536 "requested number of bytes is "
537 "more than a Python string can hold");
540 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
546 Py_BEGIN_ALLOW_THREADS
547 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
549 buffersize
-bytesread
,
551 self
->pos
+= chunksize
;
553 bytesread
+= chunksize
;
554 if (bzerror
== BZ_STREAM_END
) {
555 self
->size
= self
->pos
;
556 self
->mode
= MODE_READ_EOF
;
558 } else if (bzerror
!= BZ_OK
) {
559 Util_CatchBZ2Error(bzerror
);
564 if (bytesrequested
< 0) {
565 buffersize
= Util_NewBufferSize(buffersize
);
566 if (_PyString_Resize(&ret
, buffersize
) < 0)
572 if (bytesread
!= buffersize
)
573 _PyString_Resize(&ret
, bytesread
);
580 PyDoc_STRVAR(BZ2File_readline__doc__
,
581 "readline([size]) -> string\n\
583 Return the next line from the file, as a string, retaining newline.\n\
584 A non-negative size argument will limit the maximum number of bytes to\n\
585 return (an incomplete line may be returned then). Return an empty\n\
590 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
592 PyObject
*ret
= NULL
;
595 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
599 switch (self
->mode
) {
603 ret
= PyString_FromString("");
606 PyErr_SetString(PyExc_ValueError
,
607 "I/O operation on closed file");
610 PyErr_SetString(PyExc_IOError
,
611 "file is not ready for reading");
616 ret
= PyString_FromString("");
618 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
625 PyDoc_STRVAR(BZ2File_readlines__doc__
,
626 "readlines([size]) -> list\n\
628 Call readline() repeatedly and return a list of lines read.\n\
629 The optional size argument, if given, is an approximate bound on the\n\
630 total number of bytes in the lines returned.\n\
633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
635 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
638 PyObject
*list
= NULL
;
640 char small_buffer
[SMALLCHUNK
];
641 char *buffer
= small_buffer
;
642 size_t buffersize
= SMALLCHUNK
;
643 PyObject
*big_buffer
= NULL
;
646 size_t totalread
= 0;
652 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
656 switch (self
->mode
) {
660 list
= PyList_New(0);
663 PyErr_SetString(PyExc_ValueError
,
664 "I/O operation on closed file");
667 PyErr_SetString(PyExc_IOError
,
668 "file is not ready for reading");
672 if ((list
= PyList_New(0)) == NULL
)
676 Py_BEGIN_ALLOW_THREADS
677 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
679 buffersize
-nfilled
, self
);
682 if (bzerror
== BZ_STREAM_END
) {
683 self
->size
= self
->pos
;
684 self
->mode
= MODE_READ_EOF
;
690 } else if (bzerror
!= BZ_OK
) {
691 Util_CatchBZ2Error(bzerror
);
698 p
= memchr(buffer
+nfilled
, '\n', nread
);
699 if (!shortread
&& p
== NULL
) {
700 /* Need a larger buffer to fit this line */
703 if (buffersize
> INT_MAX
) {
704 PyErr_SetString(PyExc_OverflowError
,
705 "line is longer than a Python string can hold");
708 if (big_buffer
== NULL
) {
709 /* Create the big buffer */
710 big_buffer
= PyString_FromStringAndSize(
712 if (big_buffer
== NULL
)
714 buffer
= PyString_AS_STRING(big_buffer
);
715 memcpy(buffer
, small_buffer
, nfilled
);
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer
, buffersize
);
720 buffer
= PyString_AS_STRING(big_buffer
);
724 end
= buffer
+nfilled
+nread
;
727 /* Process complete lines */
729 line
= PyString_FromStringAndSize(q
, p
-q
);
732 err
= PyList_Append(list
, line
);
737 p
= memchr(q
, '\n', end
-q
);
739 /* Move the remaining incomplete line to the start */
741 memmove(buffer
, q
, nfilled
);
743 if (totalread
>= (size_t)sizehint
)
751 /* Partial last line */
752 line
= PyString_FromStringAndSize(buffer
, nfilled
);
756 /* Need to complete the last line */
757 PyObject
*rest
= Util_GetLine(self
, 0);
762 PyString_Concat(&line
, rest
);
767 err
= PyList_Append(list
, line
);
776 Py_DECREF(big_buffer
);
781 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
782 "xreadlines() -> self\n\
784 For backward compatibility. BZ2File objects now include the performance\n\
785 optimizations previously implemented in the xreadlines module.\n\
788 PyDoc_STRVAR(BZ2File_write__doc__
,
789 "write(data) -> None\n\
791 Write the 'data' string to file. Note that due to buffering, close() may\n\
792 be needed before the file on disk reflects the data written.\n\
795 /* This is a hacked version of Python's fileobject.c:file_write(). */
797 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
799 PyObject
*ret
= NULL
;
804 if (!PyArg_ParseTuple(args
, "s#:write", &buf
, &len
))
808 switch (self
->mode
) {
813 PyErr_SetString(PyExc_ValueError
,
814 "I/O operation on closed file");
818 PyErr_SetString(PyExc_IOError
,
819 "file is not ready for writing");
823 self
->f_softspace
= 0;
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
830 if (bzerror
!= BZ_OK
) {
831 Util_CatchBZ2Error(bzerror
);
843 PyDoc_STRVAR(BZ2File_writelines__doc__
,
844 "writelines(sequence_of_strings) -> None\n\
846 Write the sequence of strings to the file. Note that newlines are not\n\
847 added. The sequence can be any iterable object producing strings. This is\n\
848 equivalent to calling write() for each string.\n\
851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
853 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
855 #define CHUNKSIZE 1000
856 PyObject
*list
= NULL
;
857 PyObject
*iter
= NULL
;
858 PyObject
*ret
= NULL
;
860 int i
, j
, index
, len
, islist
;
864 islist
= PyList_Check(seq
);
866 iter
= PyObject_GetIter(seq
);
868 PyErr_SetString(PyExc_TypeError
,
869 "writelines() requires an iterable argument");
872 list
= PyList_New(CHUNKSIZE
);
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index
= 0; ; index
+= CHUNKSIZE
) {
883 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
886 j
= PyList_GET_SIZE(list
);
889 for (j
= 0; j
< CHUNKSIZE
; j
++) {
890 line
= PyIter_Next(iter
);
892 if (PyErr_Occurred())
896 PyList_SetItem(list
, j
, line
);
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i
= 0; i
< j
; i
++) {
908 PyObject
*v
= PyList_GET_ITEM(list
, i
);
909 if (!PyString_Check(v
)) {
912 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
913 PyErr_SetString(PyExc_TypeError
,
920 line
= PyString_FromStringAndSize(buffer
,
925 PyList_SET_ITEM(list
, i
, line
);
929 self
->f_softspace
= 0;
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i
= 0; i
< j
; i
++) {
935 line
= PyList_GET_ITEM(list
, i
);
936 len
= PyString_GET_SIZE(line
);
937 BZ2_bzWrite (&bzerror
, self
->fp
,
938 PyString_AS_STRING(line
), len
);
939 if (bzerror
!= BZ_OK
) {
941 Util_CatchBZ2Error(bzerror
);
962 PyDoc_STRVAR(BZ2File_seek__doc__
,
963 "seek(offset [, whence]) -> None\n\
965 Move to new file position. Argument offset is a byte count. Optional\n\
966 argument whence defaults to 0 (offset from start of file, offset\n\
967 should be >= 0); other values are 1 (move relative to current position,\n\
968 positive or negative), and 2 (move relative to end of file, usually\n\
969 negative, although many platforms allow seeking beyond the end of a file).\n\
971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972 the operation may be extremely slow.\n\
976 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
981 char small_buffer
[SMALLCHUNK
];
982 char *buffer
= small_buffer
;
983 size_t buffersize
= SMALLCHUNK
;
988 PyObject
*ret
= NULL
;
990 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &where
))
992 #if !defined(HAVE_LARGEFILE_SUPPORT)
993 offset
= PyInt_AsLong(offobj
);
995 offset
= PyLong_Check(offobj
) ?
996 PyLong_AsLongLong(offobj
) : PyInt_AsLong(offobj
);
998 if (PyErr_Occurred())
1002 Util_DropReadAhead(self
);
1003 switch (self
->mode
) {
1009 PyErr_SetString(PyExc_ValueError
,
1010 "I/O operation on closed file");
1014 PyErr_SetString(PyExc_IOError
,
1015 "seek works only while reading");
1020 if (self
->size
== -1) {
1021 assert(self
->mode
!= MODE_READ_EOF
);
1023 Py_BEGIN_ALLOW_THREADS
1024 chunksize
= Util_UnivNewlineRead(
1028 self
->pos
+= chunksize
;
1029 Py_END_ALLOW_THREADS
1031 bytesread
+= chunksize
;
1032 if (bzerror
== BZ_STREAM_END
) {
1034 } else if (bzerror
!= BZ_OK
) {
1035 Util_CatchBZ2Error(bzerror
);
1039 self
->mode
= MODE_READ_EOF
;
1040 self
->size
= self
->pos
;
1043 offset
= self
->size
+ offset
;
1044 } else if (where
== 1) {
1045 offset
= self
->pos
+ offset
;
1048 /* Before getting here, offset must be the absolute position the file
1049 * pointer should be set to. */
1051 if (offset
>= self
->pos
) {
1052 /* we can move forward */
1053 offset
-= self
->pos
;
1055 /* we cannot move back, so rewind the stream */
1056 BZ2_bzReadClose(&bzerror
, self
->fp
);
1057 if (bzerror
!= BZ_OK
) {
1058 Util_CatchBZ2Error(bzerror
);
1061 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1067 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1069 if (bzerror
!= BZ_OK
) {
1070 Util_CatchBZ2Error(bzerror
);
1073 self
->mode
= MODE_READ
;
1076 if (offset
<= 0 || self
->mode
== MODE_READ_EOF
)
1079 /* Before getting here, offset must be set to the number of bytes
1080 * to walk forward. */
1082 if (offset
-bytesread
> buffersize
)
1083 readsize
= buffersize
;
1085 /* offset might be wider that readsize, but the result
1086 * of the subtraction is bound by buffersize (see the
1087 * condition above). buffersize is 8192. */
1088 readsize
= (size_t)(offset
-bytesread
);
1089 Py_BEGIN_ALLOW_THREADS
1090 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1091 buffer
, readsize
, self
);
1092 self
->pos
+= chunksize
;
1093 Py_END_ALLOW_THREADS
1094 bytesread
+= chunksize
;
1095 if (bzerror
== BZ_STREAM_END
) {
1096 self
->size
= self
->pos
;
1097 self
->mode
= MODE_READ_EOF
;
1099 } else if (bzerror
!= BZ_OK
) {
1100 Util_CatchBZ2Error(bzerror
);
1103 if (bytesread
== offset
)
1116 PyDoc_STRVAR(BZ2File_tell__doc__
,
1119 Return the current file position, an integer (may be a long integer).\n\
1123 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1125 PyObject
*ret
= NULL
;
1127 if (self
->mode
== MODE_CLOSED
) {
1128 PyErr_SetString(PyExc_ValueError
,
1129 "I/O operation on closed file");
1133 #if !defined(HAVE_LARGEFILE_SUPPORT)
1134 ret
= PyInt_FromLong(self
->pos
);
1136 ret
= PyLong_FromLongLong(self
->pos
);
1143 PyDoc_STRVAR(BZ2File_close__doc__
,
1144 "close() -> None or (perhaps) an integer\n\
1146 Close the file. Sets data attribute .closed to true. A closed file\n\
1147 cannot be used for further I/O operations. close() may be called more\n\
1148 than once without error.\n\
1152 BZ2File_close(BZ2FileObject
*self
)
1154 PyObject
*ret
= NULL
;
1155 int bzerror
= BZ_OK
;
1158 switch (self
->mode
) {
1161 BZ2_bzReadClose(&bzerror
, self
->fp
);
1164 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1168 self
->mode
= MODE_CLOSED
;
1169 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1170 if (bzerror
!= BZ_OK
) {
1171 Util_CatchBZ2Error(bzerror
);
1180 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1182 static PyMethodDef BZ2File_methods
[] = {
1183 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1184 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1185 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1186 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1187 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1188 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1189 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1190 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1191 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1192 {NULL
, NULL
} /* sentinel */
1196 /* ===================================================================== */
1197 /* Getters and setters of BZ2File. */
1199 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1201 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1203 switch (self
->f_newlinetypes
) {
1204 case NEWLINE_UNKNOWN
:
1208 return PyString_FromString("\r");
1210 return PyString_FromString("\n");
1211 case NEWLINE_CR
|NEWLINE_LF
:
1212 return Py_BuildValue("(ss)", "\r", "\n");
1214 return PyString_FromString("\r\n");
1215 case NEWLINE_CR
|NEWLINE_CRLF
:
1216 return Py_BuildValue("(ss)", "\r", "\r\n");
1217 case NEWLINE_LF
|NEWLINE_CRLF
:
1218 return Py_BuildValue("(ss)", "\n", "\r\n");
1219 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1220 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1222 PyErr_Format(PyExc_SystemError
,
1223 "Unknown newlines value 0x%x\n",
1224 self
->f_newlinetypes
);
1230 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1232 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1236 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1238 return PyObject_GetAttrString(self
->file
, "mode");
1242 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1244 return PyObject_GetAttrString(self
->file
, "name");
1247 static PyGetSetDef BZ2File_getset
[] = {
1248 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1249 "True if the file is closed"},
1250 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1251 "end-of-line convention used in this file"},
1252 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1253 "file mode ('r', 'w', or 'U')"},
1254 {"name", (getter
)BZ2File_get_name
, NULL
,
1256 {NULL
} /* Sentinel */
1260 /* ===================================================================== */
1261 /* Members of BZ2File_Type. */
1264 #define OFF(x) offsetof(BZ2FileObject, x)
1266 static PyMemberDef BZ2File_members
[] = {
1267 {"softspace", T_INT
, OFF(f_softspace
), 0,
1268 "flag indicating that a space needs to be printed; used by print"},
1269 {NULL
} /* Sentinel */
1272 /* ===================================================================== */
1273 /* Slot definitions for BZ2File_Type. */
1276 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1278 static char *kwlist
[] = {"filename", "mode", "buffering",
1279 "compresslevel", 0};
1283 int compresslevel
= 9;
1289 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1290 kwlist
, &name
, &mode
, &buffering
,
1294 if (compresslevel
< 1 || compresslevel
> 9) {
1295 PyErr_SetString(PyExc_ValueError
,
1296 "compresslevel must be between 1 and 9");
1315 self
->f_univ_newline
= 0;
1317 self
->f_univ_newline
= 1;
1326 PyErr_Format(PyExc_ValueError
,
1327 "invalid mode char %c", *mode
);
1335 if (mode_char
== 0) {
1339 mode
= (mode_char
== 'r') ? "rb" : "wb";
1341 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1342 name
, mode
, buffering
);
1343 if (self
->file
== NULL
)
1346 /* From now on, we have stuff to dealloc, so jump to error label
1347 * instead of returning */
1350 self
->lock
= PyThread_allocate_lock();
1352 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1357 if (mode_char
== 'r')
1358 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1359 PyFile_AsFile(self
->file
),
1362 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1363 PyFile_AsFile(self
->file
),
1364 compresslevel
, 0, 0);
1366 if (bzerror
!= BZ_OK
) {
1367 Util_CatchBZ2Error(bzerror
);
1371 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1376 Py_CLEAR(self
->file
);
1379 PyThread_free_lock(self
->lock
);
1387 BZ2File_dealloc(BZ2FileObject
*self
)
1392 PyThread_free_lock(self
->lock
);
1394 switch (self
->mode
) {
1397 BZ2_bzReadClose(&bzerror
, self
->fp
);
1400 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1404 Util_DropReadAhead(self
);
1405 Py_XDECREF(self
->file
);
1406 self
->ob_type
->tp_free((PyObject
*)self
);
1409 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1411 BZ2File_getiter(BZ2FileObject
*self
)
1413 if (self
->mode
== MODE_CLOSED
) {
1414 PyErr_SetString(PyExc_ValueError
,
1415 "I/O operation on closed file");
1418 Py_INCREF((PyObject
*)self
);
1419 return (PyObject
*)self
;
1422 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1423 #define READAHEAD_BUFSIZE 8192
1425 BZ2File_iternext(BZ2FileObject
*self
)
1427 PyStringObject
* ret
;
1429 if (self
->mode
== MODE_CLOSED
) {
1430 PyErr_SetString(PyExc_ValueError
,
1431 "I/O operation on closed file");
1434 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1436 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1440 return (PyObject
*)ret
;
1443 /* ===================================================================== */
1444 /* BZ2File_Type definition. */
1446 PyDoc_VAR(BZ2File__doc__
) =
1448 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1450 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1451 writing. When opened for writing, the file will be created if it doesn't\n\
1452 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1453 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1454 is given, must be a number between 1 and 9.\n\
1458 Add a 'U' to mode to open the file for input with universal newline\n\
1459 support. Any line ending in the input file will be seen as a '\\n' in\n\
1460 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1461 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1462 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1463 newlines are available only when reading.\n\
1467 static PyTypeObject BZ2File_Type
= {
1468 PyObject_HEAD_INIT(NULL
)
1470 "bz2.BZ2File", /*tp_name*/
1471 sizeof(BZ2FileObject
), /*tp_basicsize*/
1473 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1480 0, /*tp_as_sequence*/
1481 0, /*tp_as_mapping*/
1485 PyObject_GenericGetAttr
,/*tp_getattro*/
1486 PyObject_GenericSetAttr
,/*tp_setattro*/
1488 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1489 BZ2File__doc__
, /*tp_doc*/
1492 0, /*tp_richcompare*/
1493 0, /*tp_weaklistoffset*/
1494 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1495 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1496 BZ2File_methods
, /*tp_methods*/
1497 BZ2File_members
, /*tp_members*/
1498 BZ2File_getset
, /*tp_getset*/
1503 0, /*tp_dictoffset*/
1504 (initproc
)BZ2File_init
, /*tp_init*/
1505 PyType_GenericAlloc
, /*tp_alloc*/
1506 PyType_GenericNew
, /*tp_new*/
1507 _PyObject_Del
, /*tp_free*/
1512 /* ===================================================================== */
1513 /* Methods of BZ2Comp. */
1515 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1516 "compress(data) -> string\n\
1518 Provide more data to the compressor object. It will return chunks of\n\
1519 compressed data whenever possible. When you've finished providing data\n\
1520 to compress, call the flush() method to finish the compression process,\n\
1521 and return what is left in the internal buffers.\n\
1525 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1529 int bufsize
= SMALLCHUNK
;
1530 PY_LONG_LONG totalout
;
1531 PyObject
*ret
= NULL
;
1532 bz_stream
*bzs
= &self
->bzs
;
1535 if (!PyArg_ParseTuple(args
, "s#:compress", &data
, &datasize
))
1539 return PyString_FromString("");
1542 if (!self
->running
) {
1543 PyErr_SetString(PyExc_ValueError
,
1544 "this object was already flushed");
1548 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1552 bzs
->next_in
= data
;
1553 bzs
->avail_in
= datasize
;
1554 bzs
->next_out
= BUF(ret
);
1555 bzs
->avail_out
= bufsize
;
1557 totalout
= BZS_TOTAL_OUT(bzs
);
1560 Py_BEGIN_ALLOW_THREADS
1561 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1562 Py_END_ALLOW_THREADS
1563 if (bzerror
!= BZ_RUN_OK
) {
1564 Util_CatchBZ2Error(bzerror
);
1567 if (bzs
->avail_out
== 0) {
1568 bufsize
= Util_NewBufferSize(bufsize
);
1569 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1570 BZ2_bzCompressEnd(bzs
);
1573 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1575 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1576 } else if (bzs
->avail_in
== 0) {
1581 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1592 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1593 "flush() -> string\n\
1595 Finish the compression process and return what is left in internal buffers.\n\
1596 You must not use the compressor object after calling this method.\n\
1600 BZ2Comp_flush(BZ2CompObject
*self
)
1602 int bufsize
= SMALLCHUNK
;
1603 PyObject
*ret
= NULL
;
1604 bz_stream
*bzs
= &self
->bzs
;
1605 PY_LONG_LONG totalout
;
1609 if (!self
->running
) {
1610 PyErr_SetString(PyExc_ValueError
, "object was already "
1616 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1620 bzs
->next_out
= BUF(ret
);
1621 bzs
->avail_out
= bufsize
;
1623 totalout
= BZS_TOTAL_OUT(bzs
);
1626 Py_BEGIN_ALLOW_THREADS
1627 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1628 Py_END_ALLOW_THREADS
1629 if (bzerror
== BZ_STREAM_END
) {
1631 } else if (bzerror
!= BZ_FINISH_OK
) {
1632 Util_CatchBZ2Error(bzerror
);
1635 if (bzs
->avail_out
== 0) {
1636 bufsize
= Util_NewBufferSize(bufsize
);
1637 if (_PyString_Resize(&ret
, bufsize
) < 0)
1639 bzs
->next_out
= BUF(ret
);
1640 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1642 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1646 if (bzs
->avail_out
!= 0)
1647 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1658 static PyMethodDef BZ2Comp_methods
[] = {
1659 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1660 BZ2Comp_compress__doc__
},
1661 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1662 BZ2Comp_flush__doc__
},
1663 {NULL
, NULL
} /* sentinel */
1667 /* ===================================================================== */
1668 /* Slot definitions for BZ2Comp_Type. */
1671 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1673 int compresslevel
= 9;
1675 static char *kwlist
[] = {"compresslevel", 0};
1677 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1678 kwlist
, &compresslevel
))
1681 if (compresslevel
< 1 || compresslevel
> 9) {
1682 PyErr_SetString(PyExc_ValueError
,
1683 "compresslevel must be between 1 and 9");
1688 self
->lock
= PyThread_allocate_lock();
1690 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1695 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1696 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1697 if (bzerror
!= BZ_OK
) {
1698 Util_CatchBZ2Error(bzerror
);
1708 PyThread_free_lock(self
->lock
);
1716 BZ2Comp_dealloc(BZ2CompObject
*self
)
1720 PyThread_free_lock(self
->lock
);
1722 BZ2_bzCompressEnd(&self
->bzs
);
1723 self
->ob_type
->tp_free((PyObject
*)self
);
1727 /* ===================================================================== */
1728 /* BZ2Comp_Type definition. */
1730 PyDoc_STRVAR(BZ2Comp__doc__
,
1731 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1733 Create a new compressor object. This object may be used to compress\n\
1734 data sequentially. If you want to compress data in one shot, use the\n\
1735 compress() function instead. The compresslevel parameter, if given,\n\
1736 must be a number between 1 and 9.\n\
1739 static PyTypeObject BZ2Comp_Type
= {
1740 PyObject_HEAD_INIT(NULL
)
1742 "bz2.BZ2Compressor", /*tp_name*/
1743 sizeof(BZ2CompObject
), /*tp_basicsize*/
1745 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1752 0, /*tp_as_sequence*/
1753 0, /*tp_as_mapping*/
1757 PyObject_GenericGetAttr
,/*tp_getattro*/
1758 PyObject_GenericSetAttr
,/*tp_setattro*/
1760 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1761 BZ2Comp__doc__
, /*tp_doc*/
1764 0, /*tp_richcompare*/
1765 0, /*tp_weaklistoffset*/
1768 BZ2Comp_methods
, /*tp_methods*/
1775 0, /*tp_dictoffset*/
1776 (initproc
)BZ2Comp_init
, /*tp_init*/
1777 PyType_GenericAlloc
, /*tp_alloc*/
1778 PyType_GenericNew
, /*tp_new*/
1779 _PyObject_Del
, /*tp_free*/
1784 /* ===================================================================== */
1785 /* Members of BZ2Decomp. */
1788 #define OFF(x) offsetof(BZ2DecompObject, x)
1790 static PyMemberDef BZ2Decomp_members
[] = {
1791 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1792 {NULL
} /* Sentinel */
1796 /* ===================================================================== */
1797 /* Methods of BZ2Decomp. */
1799 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1800 "decompress(data) -> string\n\
1802 Provide more data to the decompressor object. It will return chunks\n\
1803 of decompressed data whenever possible. If you try to decompress data\n\
1804 after the end of stream is found, EOFError will be raised. If any data\n\
1805 was found after the end of stream, it'll be ignored and saved in\n\
1806 unused_data attribute.\n\
1810 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1814 int bufsize
= SMALLCHUNK
;
1815 PY_LONG_LONG totalout
;
1816 PyObject
*ret
= NULL
;
1817 bz_stream
*bzs
= &self
->bzs
;
1820 if (!PyArg_ParseTuple(args
, "s#:decompress", &data
, &datasize
))
1824 if (!self
->running
) {
1825 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1830 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1834 bzs
->next_in
= data
;
1835 bzs
->avail_in
= datasize
;
1836 bzs
->next_out
= BUF(ret
);
1837 bzs
->avail_out
= bufsize
;
1839 totalout
= BZS_TOTAL_OUT(bzs
);
1842 Py_BEGIN_ALLOW_THREADS
1843 bzerror
= BZ2_bzDecompress(bzs
);
1844 Py_END_ALLOW_THREADS
1845 if (bzerror
== BZ_STREAM_END
) {
1846 if (bzs
->avail_in
!= 0) {
1847 Py_DECREF(self
->unused_data
);
1849 PyString_FromStringAndSize(bzs
->next_in
,
1855 if (bzerror
!= BZ_OK
) {
1856 Util_CatchBZ2Error(bzerror
);
1859 if (bzs
->avail_out
== 0) {
1860 bufsize
= Util_NewBufferSize(bufsize
);
1861 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1862 BZ2_bzDecompressEnd(bzs
);
1865 bzs
->next_out
= BUF(ret
);
1866 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1868 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1869 } else if (bzs
->avail_in
== 0) {
1874 if (bzs
->avail_out
!= 0)
1875 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1886 static PyMethodDef BZ2Decomp_methods
[] = {
1887 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1888 {NULL
, NULL
} /* sentinel */
1892 /* ===================================================================== */
1893 /* Slot definitions for BZ2Decomp_Type. */
1896 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1900 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1904 self
->lock
= PyThread_allocate_lock();
1906 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1911 self
->unused_data
= PyString_FromString("");
1912 if (!self
->unused_data
)
1915 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1916 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1917 if (bzerror
!= BZ_OK
) {
1918 Util_CatchBZ2Error(bzerror
);
1929 PyThread_free_lock(self
->lock
);
1933 Py_CLEAR(self
->unused_data
);
1938 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
1942 PyThread_free_lock(self
->lock
);
1944 Py_XDECREF(self
->unused_data
);
1945 BZ2_bzDecompressEnd(&self
->bzs
);
1946 self
->ob_type
->tp_free((PyObject
*)self
);
1950 /* ===================================================================== */
1951 /* BZ2Decomp_Type definition. */
1953 PyDoc_STRVAR(BZ2Decomp__doc__
,
1954 "BZ2Decompressor() -> decompressor object\n\
1956 Create a new decompressor object. This object may be used to decompress\n\
1957 data sequentially. If you want to decompress data in one shot, use the\n\
1958 decompress() function instead.\n\
1961 static PyTypeObject BZ2Decomp_Type
= {
1962 PyObject_HEAD_INIT(NULL
)
1964 "bz2.BZ2Decompressor", /*tp_name*/
1965 sizeof(BZ2DecompObject
), /*tp_basicsize*/
1967 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
1974 0, /*tp_as_sequence*/
1975 0, /*tp_as_mapping*/
1979 PyObject_GenericGetAttr
,/*tp_getattro*/
1980 PyObject_GenericSetAttr
,/*tp_setattro*/
1982 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1983 BZ2Decomp__doc__
, /*tp_doc*/
1986 0, /*tp_richcompare*/
1987 0, /*tp_weaklistoffset*/
1990 BZ2Decomp_methods
, /*tp_methods*/
1991 BZ2Decomp_members
, /*tp_members*/
1997 0, /*tp_dictoffset*/
1998 (initproc
)BZ2Decomp_init
, /*tp_init*/
1999 PyType_GenericAlloc
, /*tp_alloc*/
2000 PyType_GenericNew
, /*tp_new*/
2001 _PyObject_Del
, /*tp_free*/
2006 /* ===================================================================== */
2007 /* Module functions. */
2009 PyDoc_STRVAR(bz2_compress__doc__
,
2010 "compress(data [, compresslevel=9]) -> string\n\
2012 Compress data in one shot. If you want to compress data sequentially,\n\
2013 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2014 given, must be a number between 1 and 9.\n\
2018 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2020 int compresslevel
=9;
2024 PyObject
*ret
= NULL
;
2026 bz_stream
*bzs
= &_bzs
;
2028 static char *kwlist
[] = {"data", "compresslevel", 0};
2030 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s#|i",
2031 kwlist
, &data
, &datasize
,
2035 if (compresslevel
< 1 || compresslevel
> 9) {
2036 PyErr_SetString(PyExc_ValueError
,
2037 "compresslevel must be between 1 and 9");
2041 /* Conforming to bz2 manual, this is large enough to fit compressed
2042 * data in one shot. We will check it later anyway. */
2043 bufsize
= datasize
+ (datasize
/100+1) + 600;
2045 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2049 memset(bzs
, 0, sizeof(bz_stream
));
2051 bzs
->next_in
= data
;
2052 bzs
->avail_in
= datasize
;
2053 bzs
->next_out
= BUF(ret
);
2054 bzs
->avail_out
= bufsize
;
2056 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2057 if (bzerror
!= BZ_OK
) {
2058 Util_CatchBZ2Error(bzerror
);
2064 Py_BEGIN_ALLOW_THREADS
2065 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2066 Py_END_ALLOW_THREADS
2067 if (bzerror
== BZ_STREAM_END
) {
2069 } else if (bzerror
!= BZ_FINISH_OK
) {
2070 BZ2_bzCompressEnd(bzs
);
2071 Util_CatchBZ2Error(bzerror
);
2075 if (bzs
->avail_out
== 0) {
2076 bufsize
= Util_NewBufferSize(bufsize
);
2077 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2078 BZ2_bzCompressEnd(bzs
);
2082 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2083 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2087 if (bzs
->avail_out
!= 0)
2088 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2089 BZ2_bzCompressEnd(bzs
);
2094 PyDoc_STRVAR(bz2_decompress__doc__
,
2095 "decompress(data) -> decompressed data\n\
2097 Decompress data in one shot. If you want to decompress data sequentially,\n\
2098 use an instance of BZ2Decompressor instead.\n\
2102 bz2_decompress(PyObject
*self
, PyObject
*args
)
2106 int bufsize
= SMALLCHUNK
;
2109 bz_stream
*bzs
= &_bzs
;
2112 if (!PyArg_ParseTuple(args
, "s#:decompress", &data
, &datasize
))
2116 return PyString_FromString("");
2118 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2122 memset(bzs
, 0, sizeof(bz_stream
));
2124 bzs
->next_in
= data
;
2125 bzs
->avail_in
= datasize
;
2126 bzs
->next_out
= BUF(ret
);
2127 bzs
->avail_out
= bufsize
;
2129 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2130 if (bzerror
!= BZ_OK
) {
2131 Util_CatchBZ2Error(bzerror
);
2137 Py_BEGIN_ALLOW_THREADS
2138 bzerror
= BZ2_bzDecompress(bzs
);
2139 Py_END_ALLOW_THREADS
2140 if (bzerror
== BZ_STREAM_END
) {
2142 } else if (bzerror
!= BZ_OK
) {
2143 BZ2_bzDecompressEnd(bzs
);
2144 Util_CatchBZ2Error(bzerror
);
2148 if (bzs
->avail_out
== 0) {
2149 bufsize
= Util_NewBufferSize(bufsize
);
2150 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2151 BZ2_bzDecompressEnd(bzs
);
2155 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2156 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2157 } else if (bzs
->avail_in
== 0) {
2158 BZ2_bzDecompressEnd(bzs
);
2159 PyErr_SetString(PyExc_ValueError
,
2160 "couldn't find end of stream");
2166 if (bzs
->avail_out
!= 0)
2167 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2168 BZ2_bzDecompressEnd(bzs
);
2173 static PyMethodDef bz2_methods
[] = {
2174 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2175 bz2_compress__doc__
},
2176 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2177 bz2_decompress__doc__
},
2178 {NULL
, NULL
} /* sentinel */
2181 /* ===================================================================== */
2182 /* Initialization function. */
2184 PyDoc_STRVAR(bz2__doc__
,
2185 "The python bz2 module provides a comprehensive interface for\n\
2186 the bz2 compression library. It implements a complete file\n\
2187 interface, one shot (de)compression functions, and types for\n\
2188 sequential (de)compression.\n\
2196 BZ2File_Type
.ob_type
= &PyType_Type
;
2197 BZ2Comp_Type
.ob_type
= &PyType_Type
;
2198 BZ2Decomp_Type
.ob_type
= &PyType_Type
;
2200 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2204 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2206 Py_INCREF(&BZ2File_Type
);
2207 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2209 Py_INCREF(&BZ2Comp_Type
);
2210 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2212 Py_INCREF(&BZ2Decomp_Type
);
2213 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);