3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t
;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t
;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t
;
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
41 #define MODE_READ_EOF 2
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
56 #define BZS_TOTAL_OUT(bzs) \
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
81 #define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
89 #define ACQUIRE_LOCK(obj)
90 #define RELEASE_LOCK(obj)
93 /* Bits in f_newlinetypes */
94 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95 #define NEWLINE_CR 1 /* \r newline seen */
96 #define NEWLINE_LF 2 /* \n newline seen */
97 #define NEWLINE_CRLF 4 /* \r\n newline seen */
99 /* ===================================================================== */
100 /* Structure definitions. */
106 char* f_buf
; /* Allocated readahead buffer */
107 char* f_bufend
; /* Points after last occupied position */
108 char* f_bufptr
; /* Current buffer position */
110 int f_softspace
; /* Flag used by 'print' command */
112 int f_univ_newline
; /* Handle any newline convention */
113 int f_newlinetypes
; /* Types of newlines seen */
114 int f_skipnextlf
; /* Skip next \n */
121 PyThread_type_lock lock
;
130 PyThread_type_lock lock
;
138 PyObject
*unused_data
;
140 PyThread_type_lock lock
;
144 /* ===================================================================== */
145 /* Utility functions. */
148 Util_CatchBZ2Error(int bzerror
)
156 #ifdef BZ_CONFIG_ERROR
157 case BZ_CONFIG_ERROR
:
158 PyErr_SetString(PyExc_SystemError
,
159 "the bz2 library was not compiled "
166 PyErr_SetString(PyExc_ValueError
,
167 "the bz2 library has received wrong "
178 case BZ_DATA_ERROR_MAGIC
:
179 PyErr_SetString(PyExc_IOError
, "invalid data stream");
184 PyErr_SetString(PyExc_IOError
, "unknown IO error");
188 case BZ_UNEXPECTED_EOF
:
189 PyErr_SetString(PyExc_EOFError
,
190 "compressed file ended before the "
191 "logical end-of-stream was detected");
195 case BZ_SEQUENCE_ERROR
:
196 PyErr_SetString(PyExc_RuntimeError
,
197 "wrong sequence of bz2 library "
206 #define SMALLCHUNK 8192
208 #define SMALLCHUNK BUFSIZ
212 #define BIGCHUNK (512 * 32)
214 #define BIGCHUNK (512 * 1024)
217 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
219 Util_NewBufferSize(size_t currentsize
)
221 if (currentsize
> SMALLCHUNK
) {
222 /* Keep doubling until we reach BIGCHUNK;
223 then keep adding BIGCHUNK. */
224 if (currentsize
<= BIGCHUNK
)
225 return currentsize
+ currentsize
;
227 return currentsize
+ BIGCHUNK
;
229 return currentsize
+ SMALLCHUNK
;
232 /* This is a hacked version of Python's fileobject.c:get_line(). */
234 Util_GetLine(BZ2FileObject
*f
, int n
)
238 size_t total_v_size
; /* total # of slots in buffer */
239 size_t used_v_size
; /* # used slots in buffer */
240 size_t increment
; /* amount to increment the buffer */
244 int newlinetypes
= f
->f_newlinetypes
;
245 int skipnextlf
= f
->f_skipnextlf
;
246 int univ_newline
= f
->f_univ_newline
;
248 total_v_size
= n
> 0 ? n
: 100;
249 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
254 end
= buf
+ total_v_size
;
257 Py_BEGIN_ALLOW_THREADS
259 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
261 if (bytes_read
== 0) break;
266 /* Seeing a \n here with skipnextlf true means we
269 newlinetypes
|= NEWLINE_CRLF
;
270 if (bzerror
!= BZ_OK
) break;
271 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
273 if (bytes_read
== 0) break;
275 newlinetypes
|= NEWLINE_CR
;
281 } else if (c
== '\n')
282 newlinetypes
|= NEWLINE_LF
;
285 if (bzerror
!= BZ_OK
|| c
== '\n') break;
287 if (univ_newline
&& bzerror
== BZ_STREAM_END
&& skipnextlf
)
288 newlinetypes
|= NEWLINE_CR
;
290 f
->f_newlinetypes
= newlinetypes
;
291 f
->f_skipnextlf
= skipnextlf
;
292 if (bzerror
== BZ_STREAM_END
) {
294 f
->mode
= MODE_READ_EOF
;
296 } else if (bzerror
!= BZ_OK
) {
297 Util_CatchBZ2Error(bzerror
);
303 /* Must be because buf == end */
306 used_v_size
= total_v_size
;
307 increment
= total_v_size
>> 2; /* mild exponential growth */
308 total_v_size
+= increment
;
309 if (total_v_size
> INT_MAX
) {
310 PyErr_SetString(PyExc_OverflowError
,
311 "line is longer than a Python string can hold");
315 if (_PyString_Resize(&v
, total_v_size
) < 0)
317 buf
= BUF(v
) + used_v_size
;
318 end
= BUF(v
) + total_v_size
;
321 used_v_size
= buf
- BUF(v
);
322 if (used_v_size
!= total_v_size
)
323 _PyString_Resize(&v
, used_v_size
);
327 /* This is a hacked version of Python's
328 * fileobject.c:Py_UniversalNewlineFread(). */
330 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
331 char* buf
, size_t n
, BZ2FileObject
*f
)
334 int newlinetypes
, skipnextlf
;
337 assert(stream
!= NULL
);
339 if (!f
->f_univ_newline
)
340 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
342 newlinetypes
= f
->f_newlinetypes
;
343 skipnextlf
= f
->f_skipnextlf
;
345 /* Invariant: n is the number of bytes remaining to be filled
353 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
355 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
356 shortread
= n
!= 0; /* true iff EOF or error */
360 /* Save as LF and set flag to skip next LF. */
364 else if (skipnextlf
&& c
== '\n') {
365 /* Skip LF, and remember we saw CR LF. */
367 newlinetypes
|= NEWLINE_CRLF
;
371 /* Normal char to be stored in buffer. Also
372 * update the newlinetypes flag if either this
373 * is an LF or the previous char was a CR.
376 newlinetypes
|= NEWLINE_LF
;
378 newlinetypes
|= NEWLINE_CR
;
384 /* If this is EOF, update type flags. */
385 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
386 newlinetypes
|= NEWLINE_CR
;
390 f
->f_newlinetypes
= newlinetypes
;
391 f
->f_skipnextlf
= skipnextlf
;
395 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397 Util_DropReadAhead(BZ2FileObject
*f
)
399 if (f
->f_buf
!= NULL
) {
400 PyMem_Free(f
->f_buf
);
405 /* This is a hacked version of Python's fileobject.c:readahead(). */
407 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
412 if (f
->f_buf
!= NULL
) {
413 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
416 Util_DropReadAhead(f
);
418 if (f
->mode
== MODE_READ_EOF
) {
419 f
->f_bufptr
= f
->f_buf
;
420 f
->f_bufend
= f
->f_buf
;
423 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
427 Py_BEGIN_ALLOW_THREADS
428 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
432 if (bzerror
== BZ_STREAM_END
) {
434 f
->mode
= MODE_READ_EOF
;
435 } else if (bzerror
!= BZ_OK
) {
436 Util_CatchBZ2Error(bzerror
);
437 Util_DropReadAhead(f
);
440 f
->f_bufptr
= f
->f_buf
;
441 f
->f_bufend
= f
->f_buf
+ chunksize
;
445 /* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447 static PyStringObject
*
448 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
455 if (f
->f_buf
== NULL
)
456 if (Util_ReadAhead(f
, bufsize
) < 0)
459 len
= f
->f_bufend
- f
->f_bufptr
;
461 return (PyStringObject
*)
462 PyString_FromStringAndSize(NULL
, skip
);
463 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
464 if (bufptr
!= NULL
) {
465 bufptr
++; /* Count the '\n' */
466 len
= bufptr
- f
->f_bufptr
;
467 s
= (PyStringObject
*)
468 PyString_FromStringAndSize(NULL
, skip
+len
);
471 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
472 f
->f_bufptr
= bufptr
;
473 if (bufptr
== f
->f_bufend
)
474 Util_DropReadAhead(f
);
476 bufptr
= f
->f_bufptr
;
478 f
->f_buf
= NULL
; /* Force new readahead buffer */
479 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
480 bufsize
+ (bufsize
>>2));
485 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
491 /* ===================================================================== */
492 /* Methods of BZ2File. */
494 PyDoc_STRVAR(BZ2File_read__doc__
,
495 "read([size]) -> string\n\
497 Read at most size uncompressed bytes, returned as a string. If the size\n\
498 argument is negative or omitted, read until EOF is reached.\n\
501 /* This is a hacked version of Python's fileobject.c:file_read(). */
503 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
505 long bytesrequested
= -1;
506 size_t bytesread
, buffersize
, chunksize
;
508 PyObject
*ret
= NULL
;
510 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
514 switch (self
->mode
) {
518 ret
= PyString_FromString("");
521 PyErr_SetString(PyExc_ValueError
,
522 "I/O operation on closed file");
525 PyErr_SetString(PyExc_IOError
,
526 "file is not ready for reading");
530 if (bytesrequested
< 0)
531 buffersize
= Util_NewBufferSize((size_t)0);
533 buffersize
= bytesrequested
;
534 if (buffersize
> INT_MAX
) {
535 PyErr_SetString(PyExc_OverflowError
,
536 "requested number of bytes is "
537 "more than a Python string can hold");
540 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
546 Py_BEGIN_ALLOW_THREADS
547 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
549 buffersize
-bytesread
,
551 self
->pos
+= chunksize
;
553 bytesread
+= chunksize
;
554 if (bzerror
== BZ_STREAM_END
) {
555 self
->size
= self
->pos
;
556 self
->mode
= MODE_READ_EOF
;
558 } else if (bzerror
!= BZ_OK
) {
559 Util_CatchBZ2Error(bzerror
);
564 if (bytesrequested
< 0) {
565 buffersize
= Util_NewBufferSize(buffersize
);
566 if (_PyString_Resize(&ret
, buffersize
) < 0)
572 if (bytesread
!= buffersize
)
573 _PyString_Resize(&ret
, bytesread
);
580 PyDoc_STRVAR(BZ2File_readline__doc__
,
581 "readline([size]) -> string\n\
583 Return the next line from the file, as a string, retaining newline.\n\
584 A non-negative size argument will limit the maximum number of bytes to\n\
585 return (an incomplete line may be returned then). Return an empty\n\
590 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
592 PyObject
*ret
= NULL
;
595 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
599 switch (self
->mode
) {
603 ret
= PyString_FromString("");
606 PyErr_SetString(PyExc_ValueError
,
607 "I/O operation on closed file");
610 PyErr_SetString(PyExc_IOError
,
611 "file is not ready for reading");
616 ret
= PyString_FromString("");
618 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
625 PyDoc_STRVAR(BZ2File_readlines__doc__
,
626 "readlines([size]) -> list\n\
628 Call readline() repeatedly and return a list of lines read.\n\
629 The optional size argument, if given, is an approximate bound on the\n\
630 total number of bytes in the lines returned.\n\
633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
635 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
638 PyObject
*list
= NULL
;
640 char small_buffer
[SMALLCHUNK
];
641 char *buffer
= small_buffer
;
642 size_t buffersize
= SMALLCHUNK
;
643 PyObject
*big_buffer
= NULL
;
646 size_t totalread
= 0;
652 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
656 switch (self
->mode
) {
660 list
= PyList_New(0);
663 PyErr_SetString(PyExc_ValueError
,
664 "I/O operation on closed file");
667 PyErr_SetString(PyExc_IOError
,
668 "file is not ready for reading");
672 if ((list
= PyList_New(0)) == NULL
)
676 Py_BEGIN_ALLOW_THREADS
677 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
679 buffersize
-nfilled
, self
);
682 if (bzerror
== BZ_STREAM_END
) {
683 self
->size
= self
->pos
;
684 self
->mode
= MODE_READ_EOF
;
690 } else if (bzerror
!= BZ_OK
) {
691 Util_CatchBZ2Error(bzerror
);
698 p
= memchr(buffer
+nfilled
, '\n', nread
);
699 if (!shortread
&& p
== NULL
) {
700 /* Need a larger buffer to fit this line */
703 if (buffersize
> INT_MAX
) {
704 PyErr_SetString(PyExc_OverflowError
,
705 "line is longer than a Python string can hold");
708 if (big_buffer
== NULL
) {
709 /* Create the big buffer */
710 big_buffer
= PyString_FromStringAndSize(
712 if (big_buffer
== NULL
)
714 buffer
= PyString_AS_STRING(big_buffer
);
715 memcpy(buffer
, small_buffer
, nfilled
);
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer
, buffersize
);
720 buffer
= PyString_AS_STRING(big_buffer
);
724 end
= buffer
+nfilled
+nread
;
727 /* Process complete lines */
729 line
= PyString_FromStringAndSize(q
, p
-q
);
732 err
= PyList_Append(list
, line
);
737 p
= memchr(q
, '\n', end
-q
);
739 /* Move the remaining incomplete line to the start */
741 memmove(buffer
, q
, nfilled
);
743 if (totalread
>= (size_t)sizehint
)
751 /* Partial last line */
752 line
= PyString_FromStringAndSize(buffer
, nfilled
);
756 /* Need to complete the last line */
757 PyObject
*rest
= Util_GetLine(self
, 0);
762 PyString_Concat(&line
, rest
);
767 err
= PyList_Append(list
, line
);
776 Py_DECREF(big_buffer
);
781 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
782 "xreadlines() -> self\n\
784 For backward compatibility. BZ2File objects now include the performance\n\
785 optimizations previously implemented in the xreadlines module.\n\
788 PyDoc_STRVAR(BZ2File_write__doc__
,
789 "write(data) -> None\n\
791 Write the 'data' string to file. Note that due to buffering, close() may\n\
792 be needed before the file on disk reflects the data written.\n\
795 /* This is a hacked version of Python's fileobject.c:file_write(). */
797 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
799 PyObject
*ret
= NULL
;
805 if (!PyArg_ParseTuple(args
, "s*:write", &pbuf
))
811 switch (self
->mode
) {
816 PyErr_SetString(PyExc_ValueError
,
817 "I/O operation on closed file");
821 PyErr_SetString(PyExc_IOError
,
822 "file is not ready for writing");
826 self
->f_softspace
= 0;
828 Py_BEGIN_ALLOW_THREADS
829 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
833 if (bzerror
!= BZ_OK
) {
834 Util_CatchBZ2Error(bzerror
);
842 PyBuffer_Release(&pbuf
);
847 PyDoc_STRVAR(BZ2File_writelines__doc__
,
848 "writelines(sequence_of_strings) -> None\n\
850 Write the sequence of strings to the file. Note that newlines are not\n\
851 added. The sequence can be any iterable object producing strings. This is\n\
852 equivalent to calling write() for each string.\n\
855 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
857 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
859 #define CHUNKSIZE 1000
860 PyObject
*list
= NULL
;
861 PyObject
*iter
= NULL
;
862 PyObject
*ret
= NULL
;
864 int i
, j
, index
, len
, islist
;
868 switch (self
->mode
) {
873 PyErr_SetString(PyExc_ValueError
,
874 "I/O operation on closed file");
878 PyErr_SetString(PyExc_IOError
,
879 "file is not ready for writing");
883 islist
= PyList_Check(seq
);
885 iter
= PyObject_GetIter(seq
);
887 PyErr_SetString(PyExc_TypeError
,
888 "writelines() requires an iterable argument");
891 list
= PyList_New(CHUNKSIZE
);
896 /* Strategy: slurp CHUNKSIZE lines into a private list,
897 checking that they are all strings, then write that list
898 without holding the interpreter lock, then come back for more. */
899 for (index
= 0; ; index
+= CHUNKSIZE
) {
902 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
905 j
= PyList_GET_SIZE(list
);
908 for (j
= 0; j
< CHUNKSIZE
; j
++) {
909 line
= PyIter_Next(iter
);
911 if (PyErr_Occurred())
915 PyList_SetItem(list
, j
, line
);
921 /* Check that all entries are indeed strings. If not,
922 apply the same rules as for file.write() and
923 convert the rets to strings. This is slow, but
924 seems to be the only way since all conversion APIs
925 could potentially execute Python code. */
926 for (i
= 0; i
< j
; i
++) {
927 PyObject
*v
= PyList_GET_ITEM(list
, i
);
928 if (!PyString_Check(v
)) {
931 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
932 PyErr_SetString(PyExc_TypeError
,
939 line
= PyString_FromStringAndSize(buffer
,
944 PyList_SET_ITEM(list
, i
, line
);
948 self
->f_softspace
= 0;
950 /* Since we are releasing the global lock, the
951 following code may *not* execute Python code. */
952 Py_BEGIN_ALLOW_THREADS
953 for (i
= 0; i
< j
; i
++) {
954 line
= PyList_GET_ITEM(list
, i
);
955 len
= PyString_GET_SIZE(line
);
956 BZ2_bzWrite (&bzerror
, self
->fp
,
957 PyString_AS_STRING(line
), len
);
958 if (bzerror
!= BZ_OK
) {
960 Util_CatchBZ2Error(bzerror
);
981 PyDoc_STRVAR(BZ2File_seek__doc__
,
982 "seek(offset [, whence]) -> None\n\
984 Move to new file position. Argument offset is a byte count. Optional\n\
985 argument whence defaults to 0 (offset from start of file, offset\n\
986 should be >= 0); other values are 1 (move relative to current position,\n\
987 positive or negative), and 2 (move relative to end of file, usually\n\
988 negative, although many platforms allow seeking beyond the end of a file).\n\
990 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
991 the operation may be extremely slow.\n\
995 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
1000 char small_buffer
[SMALLCHUNK
];
1001 char *buffer
= small_buffer
;
1002 size_t buffersize
= SMALLCHUNK
;
1003 Py_off_t bytesread
= 0;
1007 PyObject
*ret
= NULL
;
1009 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &where
))
1011 #if !defined(HAVE_LARGEFILE_SUPPORT)
1012 offset
= PyInt_AsLong(offobj
);
1014 offset
= PyLong_Check(offobj
) ?
1015 PyLong_AsLongLong(offobj
) : PyInt_AsLong(offobj
);
1017 if (PyErr_Occurred())
1021 Util_DropReadAhead(self
);
1022 switch (self
->mode
) {
1028 PyErr_SetString(PyExc_ValueError
,
1029 "I/O operation on closed file");
1033 PyErr_SetString(PyExc_IOError
,
1034 "seek works only while reading");
1039 if (self
->size
== -1) {
1040 assert(self
->mode
!= MODE_READ_EOF
);
1042 Py_BEGIN_ALLOW_THREADS
1043 chunksize
= Util_UnivNewlineRead(
1047 self
->pos
+= chunksize
;
1048 Py_END_ALLOW_THREADS
1050 bytesread
+= chunksize
;
1051 if (bzerror
== BZ_STREAM_END
) {
1053 } else if (bzerror
!= BZ_OK
) {
1054 Util_CatchBZ2Error(bzerror
);
1058 self
->mode
= MODE_READ_EOF
;
1059 self
->size
= self
->pos
;
1062 offset
= self
->size
+ offset
;
1063 } else if (where
== 1) {
1064 offset
= self
->pos
+ offset
;
1067 /* Before getting here, offset must be the absolute position the file
1068 * pointer should be set to. */
1070 if (offset
>= self
->pos
) {
1071 /* we can move forward */
1072 offset
-= self
->pos
;
1074 /* we cannot move back, so rewind the stream */
1075 BZ2_bzReadClose(&bzerror
, self
->fp
);
1077 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1080 if (bzerror
!= BZ_OK
) {
1081 Util_CatchBZ2Error(bzerror
);
1084 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1090 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1093 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1094 if (bzerror
!= BZ_OK
) {
1095 Util_CatchBZ2Error(bzerror
);
1098 self
->mode
= MODE_READ
;
1101 if (offset
<= 0 || self
->mode
== MODE_READ_EOF
)
1104 /* Before getting here, offset must be set to the number of bytes
1105 * to walk forward. */
1107 if (offset
-bytesread
> buffersize
)
1108 readsize
= buffersize
;
1110 /* offset might be wider that readsize, but the result
1111 * of the subtraction is bound by buffersize (see the
1112 * condition above). buffersize is 8192. */
1113 readsize
= (size_t)(offset
-bytesread
);
1114 Py_BEGIN_ALLOW_THREADS
1115 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1116 buffer
, readsize
, self
);
1117 self
->pos
+= chunksize
;
1118 Py_END_ALLOW_THREADS
1119 bytesread
+= chunksize
;
1120 if (bzerror
== BZ_STREAM_END
) {
1121 self
->size
= self
->pos
;
1122 self
->mode
= MODE_READ_EOF
;
1124 } else if (bzerror
!= BZ_OK
) {
1125 Util_CatchBZ2Error(bzerror
);
1128 if (bytesread
== offset
)
1141 PyDoc_STRVAR(BZ2File_tell__doc__
,
1144 Return the current file position, an integer (may be a long integer).\n\
1148 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1150 PyObject
*ret
= NULL
;
1152 if (self
->mode
== MODE_CLOSED
) {
1153 PyErr_SetString(PyExc_ValueError
,
1154 "I/O operation on closed file");
1158 #if !defined(HAVE_LARGEFILE_SUPPORT)
1159 ret
= PyInt_FromLong(self
->pos
);
1161 ret
= PyLong_FromLongLong(self
->pos
);
1168 PyDoc_STRVAR(BZ2File_close__doc__
,
1169 "close() -> None or (perhaps) an integer\n\
1171 Close the file. Sets data attribute .closed to true. A closed file\n\
1172 cannot be used for further I/O operations. close() may be called more\n\
1173 than once without error.\n\
1177 BZ2File_close(BZ2FileObject
*self
)
1179 PyObject
*ret
= NULL
;
1180 int bzerror
= BZ_OK
;
1183 switch (self
->mode
) {
1186 BZ2_bzReadClose(&bzerror
, self
->fp
);
1189 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1194 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1197 self
->mode
= MODE_CLOSED
;
1198 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1199 if (bzerror
!= BZ_OK
) {
1200 Util_CatchBZ2Error(bzerror
);
1209 PyDoc_STRVAR(BZ2File_enter_doc
,
1210 "__enter__() -> self.");
1213 BZ2File_enter(BZ2FileObject
*self
)
1215 if (self
->mode
== MODE_CLOSED
) {
1216 PyErr_SetString(PyExc_ValueError
,
1217 "I/O operation on closed file");
1221 return (PyObject
*) self
;
1224 PyDoc_STRVAR(BZ2File_exit_doc
,
1225 "__exit__(*excinfo) -> None. Closes the file.");
1228 BZ2File_exit(BZ2FileObject
*self
, PyObject
*args
)
1230 PyObject
*ret
= PyObject_CallMethod((PyObject
*) self
, "close", NULL
);
1232 /* If error occurred, pass through */
1239 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1241 static PyMethodDef BZ2File_methods
[] = {
1242 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1243 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1244 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1245 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1246 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1247 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1248 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1249 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1250 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1251 {"__enter__", (PyCFunction
)BZ2File_enter
, METH_NOARGS
, BZ2File_enter_doc
},
1252 {"__exit__", (PyCFunction
)BZ2File_exit
, METH_VARARGS
, BZ2File_exit_doc
},
1253 {NULL
, NULL
} /* sentinel */
1257 /* ===================================================================== */
1258 /* Getters and setters of BZ2File. */
1260 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1262 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1264 switch (self
->f_newlinetypes
) {
1265 case NEWLINE_UNKNOWN
:
1269 return PyString_FromString("\r");
1271 return PyString_FromString("\n");
1272 case NEWLINE_CR
|NEWLINE_LF
:
1273 return Py_BuildValue("(ss)", "\r", "\n");
1275 return PyString_FromString("\r\n");
1276 case NEWLINE_CR
|NEWLINE_CRLF
:
1277 return Py_BuildValue("(ss)", "\r", "\r\n");
1278 case NEWLINE_LF
|NEWLINE_CRLF
:
1279 return Py_BuildValue("(ss)", "\n", "\r\n");
1280 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1281 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1283 PyErr_Format(PyExc_SystemError
,
1284 "Unknown newlines value 0x%x\n",
1285 self
->f_newlinetypes
);
1291 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1293 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1297 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1299 return PyObject_GetAttrString(self
->file
, "mode");
1303 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1305 return PyObject_GetAttrString(self
->file
, "name");
1308 static PyGetSetDef BZ2File_getset
[] = {
1309 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1310 "True if the file is closed"},
1311 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1312 "end-of-line convention used in this file"},
1313 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1314 "file mode ('r', 'w', or 'U')"},
1315 {"name", (getter
)BZ2File_get_name
, NULL
,
1317 {NULL
} /* Sentinel */
1321 /* ===================================================================== */
1322 /* Members of BZ2File_Type. */
1325 #define OFF(x) offsetof(BZ2FileObject, x)
1327 static PyMemberDef BZ2File_members
[] = {
1328 {"softspace", T_INT
, OFF(f_softspace
), 0,
1329 "flag indicating that a space needs to be printed; used by print"},
1330 {NULL
} /* Sentinel */
1333 /* ===================================================================== */
1334 /* Slot definitions for BZ2File_Type. */
1337 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1339 static char *kwlist
[] = {"filename", "mode", "buffering",
1340 "compresslevel", 0};
1344 int compresslevel
= 9;
1350 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1351 kwlist
, &name
, &mode
, &buffering
,
1355 if (compresslevel
< 1 || compresslevel
> 9) {
1356 PyErr_SetString(PyExc_ValueError
,
1357 "compresslevel must be between 1 and 9");
1376 self
->f_univ_newline
= 0;
1378 self
->f_univ_newline
= 1;
1387 PyErr_Format(PyExc_ValueError
,
1388 "invalid mode char %c", *mode
);
1396 if (mode_char
== 0) {
1400 mode
= (mode_char
== 'r') ? "rb" : "wb";
1402 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1403 name
, mode
, buffering
);
1404 if (self
->file
== NULL
)
1407 /* From now on, we have stuff to dealloc, so jump to error label
1408 * instead of returning */
1411 self
->lock
= PyThread_allocate_lock();
1413 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1418 if (mode_char
== 'r')
1419 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1420 PyFile_AsFile(self
->file
),
1423 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1424 PyFile_AsFile(self
->file
),
1425 compresslevel
, 0, 0);
1427 if (bzerror
!= BZ_OK
) {
1428 Util_CatchBZ2Error(bzerror
);
1431 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1433 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1438 Py_CLEAR(self
->file
);
1441 PyThread_free_lock(self
->lock
);
1449 BZ2File_dealloc(BZ2FileObject
*self
)
1454 PyThread_free_lock(self
->lock
);
1456 switch (self
->mode
) {
1459 BZ2_bzReadClose(&bzerror
, self
->fp
);
1462 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1467 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1470 Util_DropReadAhead(self
);
1471 Py_XDECREF(self
->file
);
1472 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1475 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1477 BZ2File_getiter(BZ2FileObject
*self
)
1479 if (self
->mode
== MODE_CLOSED
) {
1480 PyErr_SetString(PyExc_ValueError
,
1481 "I/O operation on closed file");
1484 Py_INCREF((PyObject
*)self
);
1485 return (PyObject
*)self
;
1488 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1489 #define READAHEAD_BUFSIZE 8192
1491 BZ2File_iternext(BZ2FileObject
*self
)
1493 PyStringObject
* ret
;
1495 if (self
->mode
== MODE_CLOSED
) {
1497 PyErr_SetString(PyExc_ValueError
,
1498 "I/O operation on closed file");
1501 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1503 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1507 return (PyObject
*)ret
;
1510 /* ===================================================================== */
1511 /* BZ2File_Type definition. */
1513 PyDoc_VAR(BZ2File__doc__
) =
1515 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1517 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1518 writing. When opened for writing, the file will be created if it doesn't\n\
1519 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1520 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1521 is given, must be a number between 1 and 9.\n\
1525 Add a 'U' to mode to open the file for input with universal newline\n\
1526 support. Any line ending in the input file will be seen as a '\\n' in\n\
1527 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1528 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1529 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1530 newlines are available only when reading.\n\
1534 static PyTypeObject BZ2File_Type
= {
1535 PyVarObject_HEAD_INIT(NULL
, 0)
1536 "bz2.BZ2File", /*tp_name*/
1537 sizeof(BZ2FileObject
), /*tp_basicsize*/
1539 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1546 0, /*tp_as_sequence*/
1547 0, /*tp_as_mapping*/
1551 PyObject_GenericGetAttr
,/*tp_getattro*/
1552 PyObject_GenericSetAttr
,/*tp_setattro*/
1554 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1555 BZ2File__doc__
, /*tp_doc*/
1558 0, /*tp_richcompare*/
1559 0, /*tp_weaklistoffset*/
1560 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1561 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1562 BZ2File_methods
, /*tp_methods*/
1563 BZ2File_members
, /*tp_members*/
1564 BZ2File_getset
, /*tp_getset*/
1569 0, /*tp_dictoffset*/
1570 (initproc
)BZ2File_init
, /*tp_init*/
1571 PyType_GenericAlloc
, /*tp_alloc*/
1572 PyType_GenericNew
, /*tp_new*/
1573 _PyObject_Del
, /*tp_free*/
1578 /* ===================================================================== */
1579 /* Methods of BZ2Comp. */
1581 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1582 "compress(data) -> string\n\
1584 Provide more data to the compressor object. It will return chunks of\n\
1585 compressed data whenever possible. When you've finished providing data\n\
1586 to compress, call the flush() method to finish the compression process,\n\
1587 and return what is left in the internal buffers.\n\
1591 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1596 int bufsize
= SMALLCHUNK
;
1597 PY_LONG_LONG totalout
;
1598 PyObject
*ret
= NULL
;
1599 bz_stream
*bzs
= &self
->bzs
;
1602 if (!PyArg_ParseTuple(args
, "s*:compress", &pdata
))
1605 datasize
= pdata
.len
;
1607 if (datasize
== 0) {
1608 PyBuffer_Release(&pdata
);
1609 return PyString_FromString("");
1613 if (!self
->running
) {
1614 PyErr_SetString(PyExc_ValueError
,
1615 "this object was already flushed");
1619 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1623 bzs
->next_in
= data
;
1624 bzs
->avail_in
= datasize
;
1625 bzs
->next_out
= BUF(ret
);
1626 bzs
->avail_out
= bufsize
;
1628 totalout
= BZS_TOTAL_OUT(bzs
);
1631 Py_BEGIN_ALLOW_THREADS
1632 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1633 Py_END_ALLOW_THREADS
1634 if (bzerror
!= BZ_RUN_OK
) {
1635 Util_CatchBZ2Error(bzerror
);
1638 if (bzs
->avail_in
== 0)
1639 break; /* no more input data */
1640 if (bzs
->avail_out
== 0) {
1641 bufsize
= Util_NewBufferSize(bufsize
);
1642 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1643 BZ2_bzCompressEnd(bzs
);
1646 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1648 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1652 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1655 PyBuffer_Release(&pdata
);
1660 PyBuffer_Release(&pdata
);
1665 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1666 "flush() -> string\n\
1668 Finish the compression process and return what is left in internal buffers.\n\
1669 You must not use the compressor object after calling this method.\n\
1673 BZ2Comp_flush(BZ2CompObject
*self
)
1675 int bufsize
= SMALLCHUNK
;
1676 PyObject
*ret
= NULL
;
1677 bz_stream
*bzs
= &self
->bzs
;
1678 PY_LONG_LONG totalout
;
1682 if (!self
->running
) {
1683 PyErr_SetString(PyExc_ValueError
, "object was already "
1689 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1693 bzs
->next_out
= BUF(ret
);
1694 bzs
->avail_out
= bufsize
;
1696 totalout
= BZS_TOTAL_OUT(bzs
);
1699 Py_BEGIN_ALLOW_THREADS
1700 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1701 Py_END_ALLOW_THREADS
1702 if (bzerror
== BZ_STREAM_END
) {
1704 } else if (bzerror
!= BZ_FINISH_OK
) {
1705 Util_CatchBZ2Error(bzerror
);
1708 if (bzs
->avail_out
== 0) {
1709 bufsize
= Util_NewBufferSize(bufsize
);
1710 if (_PyString_Resize(&ret
, bufsize
) < 0)
1712 bzs
->next_out
= BUF(ret
);
1713 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1715 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1719 if (bzs
->avail_out
!= 0)
1720 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1731 static PyMethodDef BZ2Comp_methods
[] = {
1732 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1733 BZ2Comp_compress__doc__
},
1734 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1735 BZ2Comp_flush__doc__
},
1736 {NULL
, NULL
} /* sentinel */
1740 /* ===================================================================== */
1741 /* Slot definitions for BZ2Comp_Type. */
1744 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1746 int compresslevel
= 9;
1748 static char *kwlist
[] = {"compresslevel", 0};
1750 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1751 kwlist
, &compresslevel
))
1754 if (compresslevel
< 1 || compresslevel
> 9) {
1755 PyErr_SetString(PyExc_ValueError
,
1756 "compresslevel must be between 1 and 9");
1761 self
->lock
= PyThread_allocate_lock();
1763 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1768 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1769 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1770 if (bzerror
!= BZ_OK
) {
1771 Util_CatchBZ2Error(bzerror
);
1781 PyThread_free_lock(self
->lock
);
1789 BZ2Comp_dealloc(BZ2CompObject
*self
)
1793 PyThread_free_lock(self
->lock
);
1795 BZ2_bzCompressEnd(&self
->bzs
);
1796 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1800 /* ===================================================================== */
1801 /* BZ2Comp_Type definition. */
1803 PyDoc_STRVAR(BZ2Comp__doc__
,
1804 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1806 Create a new compressor object. This object may be used to compress\n\
1807 data sequentially. If you want to compress data in one shot, use the\n\
1808 compress() function instead. The compresslevel parameter, if given,\n\
1809 must be a number between 1 and 9.\n\
1812 static PyTypeObject BZ2Comp_Type
= {
1813 PyVarObject_HEAD_INIT(NULL
, 0)
1814 "bz2.BZ2Compressor", /*tp_name*/
1815 sizeof(BZ2CompObject
), /*tp_basicsize*/
1817 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1824 0, /*tp_as_sequence*/
1825 0, /*tp_as_mapping*/
1829 PyObject_GenericGetAttr
,/*tp_getattro*/
1830 PyObject_GenericSetAttr
,/*tp_setattro*/
1832 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1833 BZ2Comp__doc__
, /*tp_doc*/
1836 0, /*tp_richcompare*/
1837 0, /*tp_weaklistoffset*/
1840 BZ2Comp_methods
, /*tp_methods*/
1847 0, /*tp_dictoffset*/
1848 (initproc
)BZ2Comp_init
, /*tp_init*/
1849 PyType_GenericAlloc
, /*tp_alloc*/
1850 PyType_GenericNew
, /*tp_new*/
1851 _PyObject_Del
, /*tp_free*/
1856 /* ===================================================================== */
1857 /* Members of BZ2Decomp. */
1860 #define OFF(x) offsetof(BZ2DecompObject, x)
1862 static PyMemberDef BZ2Decomp_members
[] = {
1863 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1864 {NULL
} /* Sentinel */
1868 /* ===================================================================== */
1869 /* Methods of BZ2Decomp. */
1871 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1872 "decompress(data) -> string\n\
1874 Provide more data to the decompressor object. It will return chunks\n\
1875 of decompressed data whenever possible. If you try to decompress data\n\
1876 after the end of stream is found, EOFError will be raised. If any data\n\
1877 was found after the end of stream, it'll be ignored and saved in\n\
1878 unused_data attribute.\n\
1882 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1887 int bufsize
= SMALLCHUNK
;
1888 PY_LONG_LONG totalout
;
1889 PyObject
*ret
= NULL
;
1890 bz_stream
*bzs
= &self
->bzs
;
1893 if (!PyArg_ParseTuple(args
, "s*:decompress", &pdata
))
1896 datasize
= pdata
.len
;
1899 if (!self
->running
) {
1900 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1905 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1909 bzs
->next_in
= data
;
1910 bzs
->avail_in
= datasize
;
1911 bzs
->next_out
= BUF(ret
);
1912 bzs
->avail_out
= bufsize
;
1914 totalout
= BZS_TOTAL_OUT(bzs
);
1917 Py_BEGIN_ALLOW_THREADS
1918 bzerror
= BZ2_bzDecompress(bzs
);
1919 Py_END_ALLOW_THREADS
1920 if (bzerror
== BZ_STREAM_END
) {
1921 if (bzs
->avail_in
!= 0) {
1922 Py_DECREF(self
->unused_data
);
1924 PyString_FromStringAndSize(bzs
->next_in
,
1930 if (bzerror
!= BZ_OK
) {
1931 Util_CatchBZ2Error(bzerror
);
1934 if (bzs
->avail_in
== 0)
1935 break; /* no more input data */
1936 if (bzs
->avail_out
== 0) {
1937 bufsize
= Util_NewBufferSize(bufsize
);
1938 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1939 BZ2_bzDecompressEnd(bzs
);
1942 bzs
->next_out
= BUF(ret
);
1943 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1945 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1949 if (bzs
->avail_out
!= 0)
1950 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1953 PyBuffer_Release(&pdata
);
1958 PyBuffer_Release(&pdata
);
1963 static PyMethodDef BZ2Decomp_methods
[] = {
1964 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1965 {NULL
, NULL
} /* sentinel */
1969 /* ===================================================================== */
1970 /* Slot definitions for BZ2Decomp_Type. */
1973 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1977 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1981 self
->lock
= PyThread_allocate_lock();
1983 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1988 self
->unused_data
= PyString_FromString("");
1989 if (!self
->unused_data
)
1992 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1993 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1994 if (bzerror
!= BZ_OK
) {
1995 Util_CatchBZ2Error(bzerror
);
2006 PyThread_free_lock(self
->lock
);
2010 Py_CLEAR(self
->unused_data
);
2015 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
2019 PyThread_free_lock(self
->lock
);
2021 Py_XDECREF(self
->unused_data
);
2022 BZ2_bzDecompressEnd(&self
->bzs
);
2023 Py_TYPE(self
)->tp_free((PyObject
*)self
);
2027 /* ===================================================================== */
2028 /* BZ2Decomp_Type definition. */
2030 PyDoc_STRVAR(BZ2Decomp__doc__
,
2031 "BZ2Decompressor() -> decompressor object\n\
2033 Create a new decompressor object. This object may be used to decompress\n\
2034 data sequentially. If you want to decompress data in one shot, use the\n\
2035 decompress() function instead.\n\
2038 static PyTypeObject BZ2Decomp_Type
= {
2039 PyVarObject_HEAD_INIT(NULL
, 0)
2040 "bz2.BZ2Decompressor", /*tp_name*/
2041 sizeof(BZ2DecompObject
), /*tp_basicsize*/
2043 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
2050 0, /*tp_as_sequence*/
2051 0, /*tp_as_mapping*/
2055 PyObject_GenericGetAttr
,/*tp_getattro*/
2056 PyObject_GenericSetAttr
,/*tp_setattro*/
2058 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
2059 BZ2Decomp__doc__
, /*tp_doc*/
2062 0, /*tp_richcompare*/
2063 0, /*tp_weaklistoffset*/
2066 BZ2Decomp_methods
, /*tp_methods*/
2067 BZ2Decomp_members
, /*tp_members*/
2073 0, /*tp_dictoffset*/
2074 (initproc
)BZ2Decomp_init
, /*tp_init*/
2075 PyType_GenericAlloc
, /*tp_alloc*/
2076 PyType_GenericNew
, /*tp_new*/
2077 _PyObject_Del
, /*tp_free*/
2082 /* ===================================================================== */
2083 /* Module functions. */
2085 PyDoc_STRVAR(bz2_compress__doc__
,
2086 "compress(data [, compresslevel=9]) -> string\n\
2088 Compress data in one shot. If you want to compress data sequentially,\n\
2089 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2090 given, must be a number between 1 and 9.\n\
2094 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2096 int compresslevel
=9;
2101 PyObject
*ret
= NULL
;
2103 bz_stream
*bzs
= &_bzs
;
2105 static char *kwlist
[] = {"data", "compresslevel", 0};
2107 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s*|i",
2112 datasize
= pdata
.len
;
2114 if (compresslevel
< 1 || compresslevel
> 9) {
2115 PyErr_SetString(PyExc_ValueError
,
2116 "compresslevel must be between 1 and 9");
2117 PyBuffer_Release(&pdata
);
2121 /* Conforming to bz2 manual, this is large enough to fit compressed
2122 * data in one shot. We will check it later anyway. */
2123 bufsize
= datasize
+ (datasize
/100+1) + 600;
2125 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2127 PyBuffer_Release(&pdata
);
2131 memset(bzs
, 0, sizeof(bz_stream
));
2133 bzs
->next_in
= data
;
2134 bzs
->avail_in
= datasize
;
2135 bzs
->next_out
= BUF(ret
);
2136 bzs
->avail_out
= bufsize
;
2138 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2139 if (bzerror
!= BZ_OK
) {
2140 Util_CatchBZ2Error(bzerror
);
2141 PyBuffer_Release(&pdata
);
2147 Py_BEGIN_ALLOW_THREADS
2148 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2149 Py_END_ALLOW_THREADS
2150 if (bzerror
== BZ_STREAM_END
) {
2152 } else if (bzerror
!= BZ_FINISH_OK
) {
2153 BZ2_bzCompressEnd(bzs
);
2154 Util_CatchBZ2Error(bzerror
);
2155 PyBuffer_Release(&pdata
);
2159 if (bzs
->avail_out
== 0) {
2160 bufsize
= Util_NewBufferSize(bufsize
);
2161 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2162 BZ2_bzCompressEnd(bzs
);
2163 PyBuffer_Release(&pdata
);
2167 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2168 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2172 if (bzs
->avail_out
!= 0)
2173 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2174 BZ2_bzCompressEnd(bzs
);
2176 PyBuffer_Release(&pdata
);
2180 PyDoc_STRVAR(bz2_decompress__doc__
,
2181 "decompress(data) -> decompressed data\n\
2183 Decompress data in one shot. If you want to decompress data sequentially,\n\
2184 use an instance of BZ2Decompressor instead.\n\
2188 bz2_decompress(PyObject
*self
, PyObject
*args
)
2193 int bufsize
= SMALLCHUNK
;
2196 bz_stream
*bzs
= &_bzs
;
2199 if (!PyArg_ParseTuple(args
, "s*:decompress", &pdata
))
2202 datasize
= pdata
.len
;
2204 if (datasize
== 0) {
2205 PyBuffer_Release(&pdata
);
2206 return PyString_FromString("");
2209 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2211 PyBuffer_Release(&pdata
);
2215 memset(bzs
, 0, sizeof(bz_stream
));
2217 bzs
->next_in
= data
;
2218 bzs
->avail_in
= datasize
;
2219 bzs
->next_out
= BUF(ret
);
2220 bzs
->avail_out
= bufsize
;
2222 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2223 if (bzerror
!= BZ_OK
) {
2224 Util_CatchBZ2Error(bzerror
);
2226 PyBuffer_Release(&pdata
);
2231 Py_BEGIN_ALLOW_THREADS
2232 bzerror
= BZ2_bzDecompress(bzs
);
2233 Py_END_ALLOW_THREADS
2234 if (bzerror
== BZ_STREAM_END
) {
2236 } else if (bzerror
!= BZ_OK
) {
2237 BZ2_bzDecompressEnd(bzs
);
2238 Util_CatchBZ2Error(bzerror
);
2239 PyBuffer_Release(&pdata
);
2243 if (bzs
->avail_in
== 0) {
2244 BZ2_bzDecompressEnd(bzs
);
2245 PyErr_SetString(PyExc_ValueError
,
2246 "couldn't find end of stream");
2247 PyBuffer_Release(&pdata
);
2251 if (bzs
->avail_out
== 0) {
2252 bufsize
= Util_NewBufferSize(bufsize
);
2253 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2254 BZ2_bzDecompressEnd(bzs
);
2255 PyBuffer_Release(&pdata
);
2259 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2260 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2264 if (bzs
->avail_out
!= 0)
2265 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2266 BZ2_bzDecompressEnd(bzs
);
2267 PyBuffer_Release(&pdata
);
2272 static PyMethodDef bz2_methods
[] = {
2273 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2274 bz2_compress__doc__
},
2275 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2276 bz2_decompress__doc__
},
2277 {NULL
, NULL
} /* sentinel */
2280 /* ===================================================================== */
2281 /* Initialization function. */
2283 PyDoc_STRVAR(bz2__doc__
,
2284 "The python bz2 module provides a comprehensive interface for\n\
2285 the bz2 compression library. It implements a complete file\n\
2286 interface, one shot (de)compression functions, and types for\n\
2287 sequential (de)compression.\n\
2295 Py_TYPE(&BZ2File_Type
) = &PyType_Type
;
2296 Py_TYPE(&BZ2Comp_Type
) = &PyType_Type
;
2297 Py_TYPE(&BZ2Decomp_Type
) = &PyType_Type
;
2299 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2303 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2305 Py_INCREF(&BZ2File_Type
);
2306 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2308 Py_INCREF(&BZ2Comp_Type
);
2309 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2311 Py_INCREF(&BZ2Decomp_Type
);
2312 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);