3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t
;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t
;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t
;
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
41 #define MODE_READ_EOF 2
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
56 #define BZS_TOTAL_OUT(bzs) \
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
101 char* f_buf
; /* Allocated readahead buffer */
102 char* f_bufend
; /* Points after last occupied position */
103 char* f_bufptr
; /* Current buffer position */
105 int f_softspace
; /* Flag used by 'print' command */
107 int f_univ_newline
; /* Handle any newline convention */
108 int f_newlinetypes
; /* Types of newlines seen */
109 int f_skipnextlf
; /* Skip next \n */
116 PyThread_type_lock lock
;
125 PyThread_type_lock lock
;
133 PyObject
*unused_data
;
135 PyThread_type_lock lock
;
139 /* ===================================================================== */
140 /* Utility functions. */
143 Util_CatchBZ2Error(int bzerror
)
151 #ifdef BZ_CONFIG_ERROR
152 case BZ_CONFIG_ERROR
:
153 PyErr_SetString(PyExc_SystemError
,
154 "the bz2 library was not compiled "
161 PyErr_SetString(PyExc_ValueError
,
162 "the bz2 library has received wrong "
173 case BZ_DATA_ERROR_MAGIC
:
174 PyErr_SetString(PyExc_IOError
, "invalid data stream");
179 PyErr_SetString(PyExc_IOError
, "unknown IO error");
183 case BZ_UNEXPECTED_EOF
:
184 PyErr_SetString(PyExc_EOFError
,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
190 case BZ_SEQUENCE_ERROR
:
191 PyErr_SetString(PyExc_RuntimeError
,
192 "wrong sequence of bz2 library "
201 #define SMALLCHUNK 8192
203 #define SMALLCHUNK BUFSIZ
207 #define BIGCHUNK (512 * 32)
209 #define BIGCHUNK (512 * 1024)
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
214 Util_NewBufferSize(size_t currentsize
)
216 if (currentsize
> SMALLCHUNK
) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize
<= BIGCHUNK
)
220 return currentsize
+ currentsize
;
222 return currentsize
+ BIGCHUNK
;
224 return currentsize
+ SMALLCHUNK
;
227 /* This is a hacked version of Python's fileobject.c:get_line(). */
229 Util_GetLine(BZ2FileObject
*f
, int n
)
233 size_t total_v_size
; /* total # of slots in buffer */
234 size_t used_v_size
; /* # used slots in buffer */
235 size_t increment
; /* amount to increment the buffer */
239 int newlinetypes
= f
->f_newlinetypes
;
240 int skipnextlf
= f
->f_skipnextlf
;
241 int univ_newline
= f
->f_univ_newline
;
243 total_v_size
= n
> 0 ? n
: 100;
244 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
249 end
= buf
+ total_v_size
;
252 Py_BEGIN_ALLOW_THREADS
254 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
256 if (bytes_read
== 0) break;
261 /* Seeing a \n here with skipnextlf true means we
264 newlinetypes
|= NEWLINE_CRLF
;
265 if (bzerror
!= BZ_OK
) break;
266 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
268 if (bytes_read
== 0) break;
270 newlinetypes
|= NEWLINE_CR
;
276 } else if (c
== '\n')
277 newlinetypes
|= NEWLINE_LF
;
280 if (bzerror
!= BZ_OK
|| c
== '\n') break;
282 if (univ_newline
&& bzerror
== BZ_STREAM_END
&& skipnextlf
)
283 newlinetypes
|= NEWLINE_CR
;
285 f
->f_newlinetypes
= newlinetypes
;
286 f
->f_skipnextlf
= skipnextlf
;
287 if (bzerror
== BZ_STREAM_END
) {
289 f
->mode
= MODE_READ_EOF
;
291 } else if (bzerror
!= BZ_OK
) {
292 Util_CatchBZ2Error(bzerror
);
298 /* Must be because buf == end */
301 used_v_size
= total_v_size
;
302 increment
= total_v_size
>> 2; /* mild exponential growth */
303 total_v_size
+= increment
;
304 if (total_v_size
> INT_MAX
) {
305 PyErr_SetString(PyExc_OverflowError
,
306 "line is longer than a Python string can hold");
310 if (_PyString_Resize(&v
, total_v_size
) < 0)
312 buf
= BUF(v
) + used_v_size
;
313 end
= BUF(v
) + total_v_size
;
316 used_v_size
= buf
- BUF(v
);
317 if (used_v_size
!= total_v_size
)
318 _PyString_Resize(&v
, used_v_size
);
322 /* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
325 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
326 char* buf
, size_t n
, BZ2FileObject
*f
)
329 int newlinetypes
, skipnextlf
;
332 assert(stream
!= NULL
);
334 if (!f
->f_univ_newline
)
335 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
337 newlinetypes
= f
->f_newlinetypes
;
338 skipnextlf
= f
->f_skipnextlf
;
340 /* Invariant: n is the number of bytes remaining to be filled
348 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
350 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
351 shortread
= n
!= 0; /* true iff EOF or error */
355 /* Save as LF and set flag to skip next LF. */
359 else if (skipnextlf
&& c
== '\n') {
360 /* Skip LF, and remember we saw CR LF. */
362 newlinetypes
|= NEWLINE_CRLF
;
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
371 newlinetypes
|= NEWLINE_LF
;
373 newlinetypes
|= NEWLINE_CR
;
379 /* If this is EOF, update type flags. */
380 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
381 newlinetypes
|= NEWLINE_CR
;
385 f
->f_newlinetypes
= newlinetypes
;
386 f
->f_skipnextlf
= skipnextlf
;
390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
392 Util_DropReadAhead(BZ2FileObject
*f
)
394 if (f
->f_buf
!= NULL
) {
395 PyMem_Free(f
->f_buf
);
400 /* This is a hacked version of Python's fileobject.c:readahead(). */
402 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
407 if (f
->f_buf
!= NULL
) {
408 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
411 Util_DropReadAhead(f
);
413 if (f
->mode
== MODE_READ_EOF
) {
414 f
->f_bufptr
= f
->f_buf
;
415 f
->f_bufend
= f
->f_buf
;
418 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
421 Py_BEGIN_ALLOW_THREADS
422 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
426 if (bzerror
== BZ_STREAM_END
) {
428 f
->mode
= MODE_READ_EOF
;
429 } else if (bzerror
!= BZ_OK
) {
430 Util_CatchBZ2Error(bzerror
);
431 Util_DropReadAhead(f
);
434 f
->f_bufptr
= f
->f_buf
;
435 f
->f_bufend
= f
->f_buf
+ chunksize
;
439 /* This is a hacked version of Python's
440 * fileobject.c:readahead_get_line_skip(). */
441 static PyStringObject
*
442 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
449 if (f
->f_buf
== NULL
)
450 if (Util_ReadAhead(f
, bufsize
) < 0)
453 len
= f
->f_bufend
- f
->f_bufptr
;
455 return (PyStringObject
*)
456 PyString_FromStringAndSize(NULL
, skip
);
457 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
458 if (bufptr
!= NULL
) {
459 bufptr
++; /* Count the '\n' */
460 len
= bufptr
- f
->f_bufptr
;
461 s
= (PyStringObject
*)
462 PyString_FromStringAndSize(NULL
, skip
+len
);
465 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
466 f
->f_bufptr
= bufptr
;
467 if (bufptr
== f
->f_bufend
)
468 Util_DropReadAhead(f
);
470 bufptr
= f
->f_bufptr
;
472 f
->f_buf
= NULL
; /* Force new readahead buffer */
473 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
474 bufsize
+ (bufsize
>>2));
479 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
485 /* ===================================================================== */
486 /* Methods of BZ2File. */
488 PyDoc_STRVAR(BZ2File_read__doc__
,
489 "read([size]) -> string\n\
491 Read at most size uncompressed bytes, returned as a string. If the size\n\
492 argument is negative or omitted, read until EOF is reached.\n\
495 /* This is a hacked version of Python's fileobject.c:file_read(). */
497 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
499 long bytesrequested
= -1;
500 size_t bytesread
, buffersize
, chunksize
;
502 PyObject
*ret
= NULL
;
504 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
508 switch (self
->mode
) {
512 ret
= PyString_FromString("");
515 PyErr_SetString(PyExc_ValueError
,
516 "I/O operation on closed file");
519 PyErr_SetString(PyExc_IOError
,
520 "file is not ready for reading");
524 if (bytesrequested
< 0)
525 buffersize
= Util_NewBufferSize((size_t)0);
527 buffersize
= bytesrequested
;
528 if (buffersize
> INT_MAX
) {
529 PyErr_SetString(PyExc_OverflowError
,
530 "requested number of bytes is "
531 "more than a Python string can hold");
534 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
540 Py_BEGIN_ALLOW_THREADS
541 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
543 buffersize
-bytesread
,
545 self
->pos
+= chunksize
;
547 bytesread
+= chunksize
;
548 if (bzerror
== BZ_STREAM_END
) {
549 self
->size
= self
->pos
;
550 self
->mode
= MODE_READ_EOF
;
552 } else if (bzerror
!= BZ_OK
) {
553 Util_CatchBZ2Error(bzerror
);
558 if (bytesrequested
< 0) {
559 buffersize
= Util_NewBufferSize(buffersize
);
560 if (_PyString_Resize(&ret
, buffersize
) < 0)
566 if (bytesread
!= buffersize
)
567 _PyString_Resize(&ret
, bytesread
);
574 PyDoc_STRVAR(BZ2File_readline__doc__
,
575 "readline([size]) -> string\n\
577 Return the next line from the file, as a string, retaining newline.\n\
578 A non-negative size argument will limit the maximum number of bytes to\n\
579 return (an incomplete line may be returned then). Return an empty\n\
584 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
586 PyObject
*ret
= NULL
;
589 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
593 switch (self
->mode
) {
597 ret
= PyString_FromString("");
600 PyErr_SetString(PyExc_ValueError
,
601 "I/O operation on closed file");
604 PyErr_SetString(PyExc_IOError
,
605 "file is not ready for reading");
610 ret
= PyString_FromString("");
612 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
619 PyDoc_STRVAR(BZ2File_readlines__doc__
,
620 "readlines([size]) -> list\n\
622 Call readline() repeatedly and return a list of lines read.\n\
623 The optional size argument, if given, is an approximate bound on the\n\
624 total number of bytes in the lines returned.\n\
627 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
629 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
632 PyObject
*list
= NULL
;
634 char small_buffer
[SMALLCHUNK
];
635 char *buffer
= small_buffer
;
636 size_t buffersize
= SMALLCHUNK
;
637 PyObject
*big_buffer
= NULL
;
640 size_t totalread
= 0;
646 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
650 switch (self
->mode
) {
654 list
= PyList_New(0);
657 PyErr_SetString(PyExc_ValueError
,
658 "I/O operation on closed file");
661 PyErr_SetString(PyExc_IOError
,
662 "file is not ready for reading");
666 if ((list
= PyList_New(0)) == NULL
)
670 Py_BEGIN_ALLOW_THREADS
671 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
673 buffersize
-nfilled
, self
);
676 if (bzerror
== BZ_STREAM_END
) {
677 self
->size
= self
->pos
;
678 self
->mode
= MODE_READ_EOF
;
684 } else if (bzerror
!= BZ_OK
) {
685 Util_CatchBZ2Error(bzerror
);
692 p
= memchr(buffer
+nfilled
, '\n', nread
);
693 if (!shortread
&& p
== NULL
) {
694 /* Need a larger buffer to fit this line */
697 if (buffersize
> INT_MAX
) {
698 PyErr_SetString(PyExc_OverflowError
,
699 "line is longer than a Python string can hold");
702 if (big_buffer
== NULL
) {
703 /* Create the big buffer */
704 big_buffer
= PyString_FromStringAndSize(
706 if (big_buffer
== NULL
)
708 buffer
= PyString_AS_STRING(big_buffer
);
709 memcpy(buffer
, small_buffer
, nfilled
);
712 /* Grow the big buffer */
713 _PyString_Resize(&big_buffer
, buffersize
);
714 buffer
= PyString_AS_STRING(big_buffer
);
718 end
= buffer
+nfilled
+nread
;
721 /* Process complete lines */
723 line
= PyString_FromStringAndSize(q
, p
-q
);
726 err
= PyList_Append(list
, line
);
731 p
= memchr(q
, '\n', end
-q
);
733 /* Move the remaining incomplete line to the start */
735 memmove(buffer
, q
, nfilled
);
737 if (totalread
>= (size_t)sizehint
)
745 /* Partial last line */
746 line
= PyString_FromStringAndSize(buffer
, nfilled
);
750 /* Need to complete the last line */
751 PyObject
*rest
= Util_GetLine(self
, 0);
756 PyString_Concat(&line
, rest
);
761 err
= PyList_Append(list
, line
);
770 Py_DECREF(big_buffer
);
775 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
776 "xreadlines() -> self\n\
778 For backward compatibility. BZ2File objects now include the performance\n\
779 optimizations previously implemented in the xreadlines module.\n\
782 PyDoc_STRVAR(BZ2File_write__doc__
,
783 "write(data) -> None\n\
785 Write the 'data' string to file. Note that due to buffering, close() may\n\
786 be needed before the file on disk reflects the data written.\n\
789 /* This is a hacked version of Python's fileobject.c:file_write(). */
791 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
793 PyObject
*ret
= NULL
;
798 if (!PyArg_ParseTuple(args
, "s#:write", &buf
, &len
))
802 switch (self
->mode
) {
807 PyErr_SetString(PyExc_ValueError
,
808 "I/O operation on closed file");
812 PyErr_SetString(PyExc_IOError
,
813 "file is not ready for writing");
817 self
->f_softspace
= 0;
819 Py_BEGIN_ALLOW_THREADS
820 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
824 if (bzerror
!= BZ_OK
) {
825 Util_CatchBZ2Error(bzerror
);
837 PyDoc_STRVAR(BZ2File_writelines__doc__
,
838 "writelines(sequence_of_strings) -> None\n\
840 Write the sequence of strings to the file. Note that newlines are not\n\
841 added. The sequence can be any iterable object producing strings. This is\n\
842 equivalent to calling write() for each string.\n\
845 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
847 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
849 #define CHUNKSIZE 1000
850 PyObject
*list
= NULL
;
851 PyObject
*iter
= NULL
;
852 PyObject
*ret
= NULL
;
854 int i
, j
, index
, len
, islist
;
858 switch (self
->mode
) {
863 PyErr_SetString(PyExc_ValueError
,
864 "I/O operation on closed file");
868 PyErr_SetString(PyExc_IOError
,
869 "file is not ready for writing");
873 islist
= PyList_Check(seq
);
875 iter
= PyObject_GetIter(seq
);
877 PyErr_SetString(PyExc_TypeError
,
878 "writelines() requires an iterable argument");
881 list
= PyList_New(CHUNKSIZE
);
886 /* Strategy: slurp CHUNKSIZE lines into a private list,
887 checking that they are all strings, then write that list
888 without holding the interpreter lock, then come back for more. */
889 for (index
= 0; ; index
+= CHUNKSIZE
) {
892 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
895 j
= PyList_GET_SIZE(list
);
898 for (j
= 0; j
< CHUNKSIZE
; j
++) {
899 line
= PyIter_Next(iter
);
901 if (PyErr_Occurred())
905 PyList_SetItem(list
, j
, line
);
911 /* Check that all entries are indeed strings. If not,
912 apply the same rules as for file.write() and
913 convert the rets to strings. This is slow, but
914 seems to be the only way since all conversion APIs
915 could potentially execute Python code. */
916 for (i
= 0; i
< j
; i
++) {
917 PyObject
*v
= PyList_GET_ITEM(list
, i
);
918 if (!PyString_Check(v
)) {
921 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
922 PyErr_SetString(PyExc_TypeError
,
929 line
= PyString_FromStringAndSize(buffer
,
934 PyList_SET_ITEM(list
, i
, line
);
938 self
->f_softspace
= 0;
940 /* Since we are releasing the global lock, the
941 following code may *not* execute Python code. */
942 Py_BEGIN_ALLOW_THREADS
943 for (i
= 0; i
< j
; i
++) {
944 line
= PyList_GET_ITEM(list
, i
);
945 len
= PyString_GET_SIZE(line
);
946 BZ2_bzWrite (&bzerror
, self
->fp
,
947 PyString_AS_STRING(line
), len
);
948 if (bzerror
!= BZ_OK
) {
950 Util_CatchBZ2Error(bzerror
);
971 PyDoc_STRVAR(BZ2File_seek__doc__
,
972 "seek(offset [, whence]) -> None\n\
974 Move to new file position. Argument offset is a byte count. Optional\n\
975 argument whence defaults to 0 (offset from start of file, offset\n\
976 should be >= 0); other values are 1 (move relative to current position,\n\
977 positive or negative), and 2 (move relative to end of file, usually\n\
978 negative, although many platforms allow seeking beyond the end of a file).\n\
980 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
981 the operation may be extremely slow.\n\
985 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
990 char small_buffer
[SMALLCHUNK
];
991 char *buffer
= small_buffer
;
992 size_t buffersize
= SMALLCHUNK
;
993 Py_off_t bytesread
= 0;
997 PyObject
*ret
= NULL
;
999 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &where
))
1001 #if !defined(HAVE_LARGEFILE_SUPPORT)
1002 offset
= PyInt_AsLong(offobj
);
1004 offset
= PyLong_Check(offobj
) ?
1005 PyLong_AsLongLong(offobj
) : PyInt_AsLong(offobj
);
1007 if (PyErr_Occurred())
1011 Util_DropReadAhead(self
);
1012 switch (self
->mode
) {
1018 PyErr_SetString(PyExc_ValueError
,
1019 "I/O operation on closed file");
1023 PyErr_SetString(PyExc_IOError
,
1024 "seek works only while reading");
1029 if (self
->size
== -1) {
1030 assert(self
->mode
!= MODE_READ_EOF
);
1032 Py_BEGIN_ALLOW_THREADS
1033 chunksize
= Util_UnivNewlineRead(
1037 self
->pos
+= chunksize
;
1038 Py_END_ALLOW_THREADS
1040 bytesread
+= chunksize
;
1041 if (bzerror
== BZ_STREAM_END
) {
1043 } else if (bzerror
!= BZ_OK
) {
1044 Util_CatchBZ2Error(bzerror
);
1048 self
->mode
= MODE_READ_EOF
;
1049 self
->size
= self
->pos
;
1052 offset
= self
->size
+ offset
;
1053 } else if (where
== 1) {
1054 offset
= self
->pos
+ offset
;
1057 /* Before getting here, offset must be the absolute position the file
1058 * pointer should be set to. */
1060 if (offset
>= self
->pos
) {
1061 /* we can move forward */
1062 offset
-= self
->pos
;
1064 /* we cannot move back, so rewind the stream */
1065 BZ2_bzReadClose(&bzerror
, self
->fp
);
1067 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1070 if (bzerror
!= BZ_OK
) {
1071 Util_CatchBZ2Error(bzerror
);
1074 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1080 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1083 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1084 if (bzerror
!= BZ_OK
) {
1085 Util_CatchBZ2Error(bzerror
);
1088 self
->mode
= MODE_READ
;
1091 if (offset
<= 0 || self
->mode
== MODE_READ_EOF
)
1094 /* Before getting here, offset must be set to the number of bytes
1095 * to walk forward. */
1097 if (offset
-bytesread
> buffersize
)
1098 readsize
= buffersize
;
1100 /* offset might be wider that readsize, but the result
1101 * of the subtraction is bound by buffersize (see the
1102 * condition above). buffersize is 8192. */
1103 readsize
= (size_t)(offset
-bytesread
);
1104 Py_BEGIN_ALLOW_THREADS
1105 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1106 buffer
, readsize
, self
);
1107 self
->pos
+= chunksize
;
1108 Py_END_ALLOW_THREADS
1109 bytesread
+= chunksize
;
1110 if (bzerror
== BZ_STREAM_END
) {
1111 self
->size
= self
->pos
;
1112 self
->mode
= MODE_READ_EOF
;
1114 } else if (bzerror
!= BZ_OK
) {
1115 Util_CatchBZ2Error(bzerror
);
1118 if (bytesread
== offset
)
1131 PyDoc_STRVAR(BZ2File_tell__doc__
,
1134 Return the current file position, an integer (may be a long integer).\n\
1138 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1140 PyObject
*ret
= NULL
;
1142 if (self
->mode
== MODE_CLOSED
) {
1143 PyErr_SetString(PyExc_ValueError
,
1144 "I/O operation on closed file");
1148 #if !defined(HAVE_LARGEFILE_SUPPORT)
1149 ret
= PyInt_FromLong(self
->pos
);
1151 ret
= PyLong_FromLongLong(self
->pos
);
1158 PyDoc_STRVAR(BZ2File_close__doc__
,
1159 "close() -> None or (perhaps) an integer\n\
1161 Close the file. Sets data attribute .closed to true. A closed file\n\
1162 cannot be used for further I/O operations. close() may be called more\n\
1163 than once without error.\n\
1167 BZ2File_close(BZ2FileObject
*self
)
1169 PyObject
*ret
= NULL
;
1170 int bzerror
= BZ_OK
;
1173 switch (self
->mode
) {
1176 BZ2_bzReadClose(&bzerror
, self
->fp
);
1179 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1184 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1187 self
->mode
= MODE_CLOSED
;
1188 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1189 if (bzerror
!= BZ_OK
) {
1190 Util_CatchBZ2Error(bzerror
);
1199 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1201 static PyMethodDef BZ2File_methods
[] = {
1202 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1203 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1204 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1205 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1206 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1207 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1208 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1209 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1210 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1211 {NULL
, NULL
} /* sentinel */
1215 /* ===================================================================== */
1216 /* Getters and setters of BZ2File. */
1218 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1220 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1222 switch (self
->f_newlinetypes
) {
1223 case NEWLINE_UNKNOWN
:
1227 return PyString_FromString("\r");
1229 return PyString_FromString("\n");
1230 case NEWLINE_CR
|NEWLINE_LF
:
1231 return Py_BuildValue("(ss)", "\r", "\n");
1233 return PyString_FromString("\r\n");
1234 case NEWLINE_CR
|NEWLINE_CRLF
:
1235 return Py_BuildValue("(ss)", "\r", "\r\n");
1236 case NEWLINE_LF
|NEWLINE_CRLF
:
1237 return Py_BuildValue("(ss)", "\n", "\r\n");
1238 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1239 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1241 PyErr_Format(PyExc_SystemError
,
1242 "Unknown newlines value 0x%x\n",
1243 self
->f_newlinetypes
);
1249 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1251 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1255 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1257 return PyObject_GetAttrString(self
->file
, "mode");
1261 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1263 return PyObject_GetAttrString(self
->file
, "name");
1266 static PyGetSetDef BZ2File_getset
[] = {
1267 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1268 "True if the file is closed"},
1269 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1270 "end-of-line convention used in this file"},
1271 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1272 "file mode ('r', 'w', or 'U')"},
1273 {"name", (getter
)BZ2File_get_name
, NULL
,
1275 {NULL
} /* Sentinel */
1279 /* ===================================================================== */
1280 /* Members of BZ2File_Type. */
1283 #define OFF(x) offsetof(BZ2FileObject, x)
1285 static PyMemberDef BZ2File_members
[] = {
1286 {"softspace", T_INT
, OFF(f_softspace
), 0,
1287 "flag indicating that a space needs to be printed; used by print"},
1288 {NULL
} /* Sentinel */
1291 /* ===================================================================== */
1292 /* Slot definitions for BZ2File_Type. */
1295 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1297 static char *kwlist
[] = {"filename", "mode", "buffering",
1298 "compresslevel", 0};
1302 int compresslevel
= 9;
1308 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1309 kwlist
, &name
, &mode
, &buffering
,
1313 if (compresslevel
< 1 || compresslevel
> 9) {
1314 PyErr_SetString(PyExc_ValueError
,
1315 "compresslevel must be between 1 and 9");
1334 self
->f_univ_newline
= 0;
1336 self
->f_univ_newline
= 1;
1345 PyErr_Format(PyExc_ValueError
,
1346 "invalid mode char %c", *mode
);
1354 if (mode_char
== 0) {
1358 mode
= (mode_char
== 'r') ? "rb" : "wb";
1360 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1361 name
, mode
, buffering
);
1362 if (self
->file
== NULL
)
1365 /* From now on, we have stuff to dealloc, so jump to error label
1366 * instead of returning */
1369 self
->lock
= PyThread_allocate_lock();
1371 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1376 if (mode_char
== 'r')
1377 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1378 PyFile_AsFile(self
->file
),
1381 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1382 PyFile_AsFile(self
->file
),
1383 compresslevel
, 0, 0);
1385 if (bzerror
!= BZ_OK
) {
1386 Util_CatchBZ2Error(bzerror
);
1389 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1391 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1396 Py_CLEAR(self
->file
);
1399 PyThread_free_lock(self
->lock
);
1407 BZ2File_dealloc(BZ2FileObject
*self
)
1412 PyThread_free_lock(self
->lock
);
1414 switch (self
->mode
) {
1417 BZ2_bzReadClose(&bzerror
, self
->fp
);
1420 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1425 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1428 Util_DropReadAhead(self
);
1429 Py_XDECREF(self
->file
);
1430 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1433 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1435 BZ2File_getiter(BZ2FileObject
*self
)
1437 if (self
->mode
== MODE_CLOSED
) {
1438 PyErr_SetString(PyExc_ValueError
,
1439 "I/O operation on closed file");
1442 Py_INCREF((PyObject
*)self
);
1443 return (PyObject
*)self
;
1446 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1447 #define READAHEAD_BUFSIZE 8192
1449 BZ2File_iternext(BZ2FileObject
*self
)
1451 PyStringObject
* ret
;
1453 if (self
->mode
== MODE_CLOSED
) {
1454 PyErr_SetString(PyExc_ValueError
,
1455 "I/O operation on closed file");
1458 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1460 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1464 return (PyObject
*)ret
;
1467 /* ===================================================================== */
1468 /* BZ2File_Type definition. */
1470 PyDoc_VAR(BZ2File__doc__
) =
1472 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1474 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1475 writing. When opened for writing, the file will be created if it doesn't\n\
1476 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1477 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1478 is given, must be a number between 1 and 9.\n\
1482 Add a 'U' to mode to open the file for input with universal newline\n\
1483 support. Any line ending in the input file will be seen as a '\\n' in\n\
1484 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1485 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1486 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1487 newlines are available only when reading.\n\
1491 static PyTypeObject BZ2File_Type
= {
1492 PyVarObject_HEAD_INIT(NULL
, 0)
1493 "bz2.BZ2File", /*tp_name*/
1494 sizeof(BZ2FileObject
), /*tp_basicsize*/
1496 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1503 0, /*tp_as_sequence*/
1504 0, /*tp_as_mapping*/
1508 PyObject_GenericGetAttr
,/*tp_getattro*/
1509 PyObject_GenericSetAttr
,/*tp_setattro*/
1511 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1512 BZ2File__doc__
, /*tp_doc*/
1515 0, /*tp_richcompare*/
1516 0, /*tp_weaklistoffset*/
1517 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1518 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1519 BZ2File_methods
, /*tp_methods*/
1520 BZ2File_members
, /*tp_members*/
1521 BZ2File_getset
, /*tp_getset*/
1526 0, /*tp_dictoffset*/
1527 (initproc
)BZ2File_init
, /*tp_init*/
1528 PyType_GenericAlloc
, /*tp_alloc*/
1529 PyType_GenericNew
, /*tp_new*/
1530 _PyObject_Del
, /*tp_free*/
1535 /* ===================================================================== */
1536 /* Methods of BZ2Comp. */
1538 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1539 "compress(data) -> string\n\
1541 Provide more data to the compressor object. It will return chunks of\n\
1542 compressed data whenever possible. When you've finished providing data\n\
1543 to compress, call the flush() method to finish the compression process,\n\
1544 and return what is left in the internal buffers.\n\
1548 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1552 int bufsize
= SMALLCHUNK
;
1553 PY_LONG_LONG totalout
;
1554 PyObject
*ret
= NULL
;
1555 bz_stream
*bzs
= &self
->bzs
;
1558 if (!PyArg_ParseTuple(args
, "s#:compress", &data
, &datasize
))
1562 return PyString_FromString("");
1565 if (!self
->running
) {
1566 PyErr_SetString(PyExc_ValueError
,
1567 "this object was already flushed");
1571 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1575 bzs
->next_in
= data
;
1576 bzs
->avail_in
= datasize
;
1577 bzs
->next_out
= BUF(ret
);
1578 bzs
->avail_out
= bufsize
;
1580 totalout
= BZS_TOTAL_OUT(bzs
);
1583 Py_BEGIN_ALLOW_THREADS
1584 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1585 Py_END_ALLOW_THREADS
1586 if (bzerror
!= BZ_RUN_OK
) {
1587 Util_CatchBZ2Error(bzerror
);
1590 if (bzs
->avail_in
== 0)
1591 break; /* no more input data */
1592 if (bzs
->avail_out
== 0) {
1593 bufsize
= Util_NewBufferSize(bufsize
);
1594 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1595 BZ2_bzCompressEnd(bzs
);
1598 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1600 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1604 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1615 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1616 "flush() -> string\n\
1618 Finish the compression process and return what is left in internal buffers.\n\
1619 You must not use the compressor object after calling this method.\n\
1623 BZ2Comp_flush(BZ2CompObject
*self
)
1625 int bufsize
= SMALLCHUNK
;
1626 PyObject
*ret
= NULL
;
1627 bz_stream
*bzs
= &self
->bzs
;
1628 PY_LONG_LONG totalout
;
1632 if (!self
->running
) {
1633 PyErr_SetString(PyExc_ValueError
, "object was already "
1639 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1643 bzs
->next_out
= BUF(ret
);
1644 bzs
->avail_out
= bufsize
;
1646 totalout
= BZS_TOTAL_OUT(bzs
);
1649 Py_BEGIN_ALLOW_THREADS
1650 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1651 Py_END_ALLOW_THREADS
1652 if (bzerror
== BZ_STREAM_END
) {
1654 } else if (bzerror
!= BZ_FINISH_OK
) {
1655 Util_CatchBZ2Error(bzerror
);
1658 if (bzs
->avail_out
== 0) {
1659 bufsize
= Util_NewBufferSize(bufsize
);
1660 if (_PyString_Resize(&ret
, bufsize
) < 0)
1662 bzs
->next_out
= BUF(ret
);
1663 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1665 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1669 if (bzs
->avail_out
!= 0)
1670 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1681 static PyMethodDef BZ2Comp_methods
[] = {
1682 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1683 BZ2Comp_compress__doc__
},
1684 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1685 BZ2Comp_flush__doc__
},
1686 {NULL
, NULL
} /* sentinel */
1690 /* ===================================================================== */
1691 /* Slot definitions for BZ2Comp_Type. */
1694 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1696 int compresslevel
= 9;
1698 static char *kwlist
[] = {"compresslevel", 0};
1700 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1701 kwlist
, &compresslevel
))
1704 if (compresslevel
< 1 || compresslevel
> 9) {
1705 PyErr_SetString(PyExc_ValueError
,
1706 "compresslevel must be between 1 and 9");
1711 self
->lock
= PyThread_allocate_lock();
1713 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1718 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1719 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1720 if (bzerror
!= BZ_OK
) {
1721 Util_CatchBZ2Error(bzerror
);
1731 PyThread_free_lock(self
->lock
);
1739 BZ2Comp_dealloc(BZ2CompObject
*self
)
1743 PyThread_free_lock(self
->lock
);
1745 BZ2_bzCompressEnd(&self
->bzs
);
1746 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1750 /* ===================================================================== */
1751 /* BZ2Comp_Type definition. */
1753 PyDoc_STRVAR(BZ2Comp__doc__
,
1754 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1756 Create a new compressor object. This object may be used to compress\n\
1757 data sequentially. If you want to compress data in one shot, use the\n\
1758 compress() function instead. The compresslevel parameter, if given,\n\
1759 must be a number between 1 and 9.\n\
1762 static PyTypeObject BZ2Comp_Type
= {
1763 PyVarObject_HEAD_INIT(NULL
, 0)
1764 "bz2.BZ2Compressor", /*tp_name*/
1765 sizeof(BZ2CompObject
), /*tp_basicsize*/
1767 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1774 0, /*tp_as_sequence*/
1775 0, /*tp_as_mapping*/
1779 PyObject_GenericGetAttr
,/*tp_getattro*/
1780 PyObject_GenericSetAttr
,/*tp_setattro*/
1782 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1783 BZ2Comp__doc__
, /*tp_doc*/
1786 0, /*tp_richcompare*/
1787 0, /*tp_weaklistoffset*/
1790 BZ2Comp_methods
, /*tp_methods*/
1797 0, /*tp_dictoffset*/
1798 (initproc
)BZ2Comp_init
, /*tp_init*/
1799 PyType_GenericAlloc
, /*tp_alloc*/
1800 PyType_GenericNew
, /*tp_new*/
1801 _PyObject_Del
, /*tp_free*/
1806 /* ===================================================================== */
1807 /* Members of BZ2Decomp. */
1810 #define OFF(x) offsetof(BZ2DecompObject, x)
1812 static PyMemberDef BZ2Decomp_members
[] = {
1813 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1814 {NULL
} /* Sentinel */
1818 /* ===================================================================== */
1819 /* Methods of BZ2Decomp. */
1821 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1822 "decompress(data) -> string\n\
1824 Provide more data to the decompressor object. It will return chunks\n\
1825 of decompressed data whenever possible. If you try to decompress data\n\
1826 after the end of stream is found, EOFError will be raised. If any data\n\
1827 was found after the end of stream, it'll be ignored and saved in\n\
1828 unused_data attribute.\n\
1832 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1836 int bufsize
= SMALLCHUNK
;
1837 PY_LONG_LONG totalout
;
1838 PyObject
*ret
= NULL
;
1839 bz_stream
*bzs
= &self
->bzs
;
1842 if (!PyArg_ParseTuple(args
, "s#:decompress", &data
, &datasize
))
1846 if (!self
->running
) {
1847 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1852 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1856 bzs
->next_in
= data
;
1857 bzs
->avail_in
= datasize
;
1858 bzs
->next_out
= BUF(ret
);
1859 bzs
->avail_out
= bufsize
;
1861 totalout
= BZS_TOTAL_OUT(bzs
);
1864 Py_BEGIN_ALLOW_THREADS
1865 bzerror
= BZ2_bzDecompress(bzs
);
1866 Py_END_ALLOW_THREADS
1867 if (bzerror
== BZ_STREAM_END
) {
1868 if (bzs
->avail_in
!= 0) {
1869 Py_DECREF(self
->unused_data
);
1871 PyString_FromStringAndSize(bzs
->next_in
,
1877 if (bzerror
!= BZ_OK
) {
1878 Util_CatchBZ2Error(bzerror
);
1881 if (bzs
->avail_in
== 0)
1882 break; /* no more input data */
1883 if (bzs
->avail_out
== 0) {
1884 bufsize
= Util_NewBufferSize(bufsize
);
1885 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1886 BZ2_bzDecompressEnd(bzs
);
1889 bzs
->next_out
= BUF(ret
);
1890 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1892 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1896 if (bzs
->avail_out
!= 0)
1897 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1908 static PyMethodDef BZ2Decomp_methods
[] = {
1909 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1910 {NULL
, NULL
} /* sentinel */
1914 /* ===================================================================== */
1915 /* Slot definitions for BZ2Decomp_Type. */
1918 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1922 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1926 self
->lock
= PyThread_allocate_lock();
1928 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1933 self
->unused_data
= PyString_FromString("");
1934 if (!self
->unused_data
)
1937 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1938 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1939 if (bzerror
!= BZ_OK
) {
1940 Util_CatchBZ2Error(bzerror
);
1951 PyThread_free_lock(self
->lock
);
1955 Py_CLEAR(self
->unused_data
);
1960 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
1964 PyThread_free_lock(self
->lock
);
1966 Py_XDECREF(self
->unused_data
);
1967 BZ2_bzDecompressEnd(&self
->bzs
);
1968 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1972 /* ===================================================================== */
1973 /* BZ2Decomp_Type definition. */
1975 PyDoc_STRVAR(BZ2Decomp__doc__
,
1976 "BZ2Decompressor() -> decompressor object\n\
1978 Create a new decompressor object. This object may be used to decompress\n\
1979 data sequentially. If you want to decompress data in one shot, use the\n\
1980 decompress() function instead.\n\
1983 static PyTypeObject BZ2Decomp_Type
= {
1984 PyVarObject_HEAD_INIT(NULL
, 0)
1985 "bz2.BZ2Decompressor", /*tp_name*/
1986 sizeof(BZ2DecompObject
), /*tp_basicsize*/
1988 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
1995 0, /*tp_as_sequence*/
1996 0, /*tp_as_mapping*/
2000 PyObject_GenericGetAttr
,/*tp_getattro*/
2001 PyObject_GenericSetAttr
,/*tp_setattro*/
2003 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
2004 BZ2Decomp__doc__
, /*tp_doc*/
2007 0, /*tp_richcompare*/
2008 0, /*tp_weaklistoffset*/
2011 BZ2Decomp_methods
, /*tp_methods*/
2012 BZ2Decomp_members
, /*tp_members*/
2018 0, /*tp_dictoffset*/
2019 (initproc
)BZ2Decomp_init
, /*tp_init*/
2020 PyType_GenericAlloc
, /*tp_alloc*/
2021 PyType_GenericNew
, /*tp_new*/
2022 _PyObject_Del
, /*tp_free*/
2027 /* ===================================================================== */
2028 /* Module functions. */
2030 PyDoc_STRVAR(bz2_compress__doc__
,
2031 "compress(data [, compresslevel=9]) -> string\n\
2033 Compress data in one shot. If you want to compress data sequentially,\n\
2034 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2035 given, must be a number between 1 and 9.\n\
2039 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2041 int compresslevel
=9;
2045 PyObject
*ret
= NULL
;
2047 bz_stream
*bzs
= &_bzs
;
2049 static char *kwlist
[] = {"data", "compresslevel", 0};
2051 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s#|i",
2052 kwlist
, &data
, &datasize
,
2056 if (compresslevel
< 1 || compresslevel
> 9) {
2057 PyErr_SetString(PyExc_ValueError
,
2058 "compresslevel must be between 1 and 9");
2062 /* Conforming to bz2 manual, this is large enough to fit compressed
2063 * data in one shot. We will check it later anyway. */
2064 bufsize
= datasize
+ (datasize
/100+1) + 600;
2066 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2070 memset(bzs
, 0, sizeof(bz_stream
));
2072 bzs
->next_in
= data
;
2073 bzs
->avail_in
= datasize
;
2074 bzs
->next_out
= BUF(ret
);
2075 bzs
->avail_out
= bufsize
;
2077 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2078 if (bzerror
!= BZ_OK
) {
2079 Util_CatchBZ2Error(bzerror
);
2085 Py_BEGIN_ALLOW_THREADS
2086 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2087 Py_END_ALLOW_THREADS
2088 if (bzerror
== BZ_STREAM_END
) {
2090 } else if (bzerror
!= BZ_FINISH_OK
) {
2091 BZ2_bzCompressEnd(bzs
);
2092 Util_CatchBZ2Error(bzerror
);
2096 if (bzs
->avail_out
== 0) {
2097 bufsize
= Util_NewBufferSize(bufsize
);
2098 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2099 BZ2_bzCompressEnd(bzs
);
2103 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2104 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2108 if (bzs
->avail_out
!= 0)
2109 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2110 BZ2_bzCompressEnd(bzs
);
2115 PyDoc_STRVAR(bz2_decompress__doc__
,
2116 "decompress(data) -> decompressed data\n\
2118 Decompress data in one shot. If you want to decompress data sequentially,\n\
2119 use an instance of BZ2Decompressor instead.\n\
2123 bz2_decompress(PyObject
*self
, PyObject
*args
)
2127 int bufsize
= SMALLCHUNK
;
2130 bz_stream
*bzs
= &_bzs
;
2133 if (!PyArg_ParseTuple(args
, "s#:decompress", &data
, &datasize
))
2137 return PyString_FromString("");
2139 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2143 memset(bzs
, 0, sizeof(bz_stream
));
2145 bzs
->next_in
= data
;
2146 bzs
->avail_in
= datasize
;
2147 bzs
->next_out
= BUF(ret
);
2148 bzs
->avail_out
= bufsize
;
2150 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2151 if (bzerror
!= BZ_OK
) {
2152 Util_CatchBZ2Error(bzerror
);
2158 Py_BEGIN_ALLOW_THREADS
2159 bzerror
= BZ2_bzDecompress(bzs
);
2160 Py_END_ALLOW_THREADS
2161 if (bzerror
== BZ_STREAM_END
) {
2163 } else if (bzerror
!= BZ_OK
) {
2164 BZ2_bzDecompressEnd(bzs
);
2165 Util_CatchBZ2Error(bzerror
);
2169 if (bzs
->avail_in
== 0) {
2170 BZ2_bzDecompressEnd(bzs
);
2171 PyErr_SetString(PyExc_ValueError
,
2172 "couldn't find end of stream");
2176 if (bzs
->avail_out
== 0) {
2177 bufsize
= Util_NewBufferSize(bufsize
);
2178 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2179 BZ2_bzDecompressEnd(bzs
);
2183 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2184 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2188 if (bzs
->avail_out
!= 0)
2189 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2190 BZ2_bzDecompressEnd(bzs
);
2195 static PyMethodDef bz2_methods
[] = {
2196 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2197 bz2_compress__doc__
},
2198 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2199 bz2_decompress__doc__
},
2200 {NULL
, NULL
} /* sentinel */
2203 /* ===================================================================== */
2204 /* Initialization function. */
2206 PyDoc_STRVAR(bz2__doc__
,
2207 "The python bz2 module provides a comprehensive interface for\n\
2208 the bz2 compression library. It implements a complete file\n\
2209 interface, one shot (de)compression functions, and types for\n\
2210 sequential (de)compression.\n\
2218 Py_TYPE(&BZ2File_Type
) = &PyType_Type
;
2219 Py_TYPE(&BZ2Comp_Type
) = &PyType_Type
;
2220 Py_TYPE(&BZ2Decomp_Type
) = &PyType_Type
;
2222 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2226 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2228 Py_INCREF(&BZ2File_Type
);
2229 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2231 Py_INCREF(&BZ2Comp_Type
);
2232 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2234 Py_INCREF(&BZ2Decomp_Type
);
2235 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);