3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
13 #include "structmember.h"
19 static char __author__
[] =
20 "The bz2 python module was written by:\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t
;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t
;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t
;
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
41 #define MODE_READ_EOF 2
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
56 #define BZS_TOTAL_OUT(bzs) \
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
101 char* f_buf
; /* Allocated readahead buffer */
102 char* f_bufend
; /* Points after last occupied position */
103 char* f_bufptr
; /* Current buffer position */
105 int f_softspace
; /* Flag used by 'print' command */
107 int f_univ_newline
; /* Handle any newline convention */
108 int f_newlinetypes
; /* Types of newlines seen */
109 int f_skipnextlf
; /* Skip next \n */
116 PyThread_type_lock lock
;
125 PyThread_type_lock lock
;
133 PyObject
*unused_data
;
135 PyThread_type_lock lock
;
139 /* ===================================================================== */
140 /* Utility functions. */
143 Util_CatchBZ2Error(int bzerror
)
151 #ifdef BZ_CONFIG_ERROR
152 case BZ_CONFIG_ERROR
:
153 PyErr_SetString(PyExc_SystemError
,
154 "the bz2 library was not compiled "
161 PyErr_SetString(PyExc_ValueError
,
162 "the bz2 library has received wrong "
173 case BZ_DATA_ERROR_MAGIC
:
174 PyErr_SetString(PyExc_IOError
, "invalid data stream");
179 PyErr_SetString(PyExc_IOError
, "unknown IO error");
183 case BZ_UNEXPECTED_EOF
:
184 PyErr_SetString(PyExc_EOFError
,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
190 case BZ_SEQUENCE_ERROR
:
191 PyErr_SetString(PyExc_RuntimeError
,
192 "wrong sequence of bz2 library "
201 #define SMALLCHUNK 8192
203 #define SMALLCHUNK BUFSIZ
207 #define BIGCHUNK (512 * 32)
209 #define BIGCHUNK (512 * 1024)
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
214 Util_NewBufferSize(size_t currentsize
)
216 if (currentsize
> SMALLCHUNK
) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize
<= BIGCHUNK
)
220 return currentsize
+ currentsize
;
222 return currentsize
+ BIGCHUNK
;
224 return currentsize
+ SMALLCHUNK
;
227 /* This is a hacked version of Python's fileobject.c:get_line(). */
229 Util_GetLine(BZ2FileObject
*f
, int n
)
233 size_t total_v_size
; /* total # of slots in buffer */
234 size_t used_v_size
; /* # used slots in buffer */
235 size_t increment
; /* amount to increment the buffer */
239 int newlinetypes
= f
->f_newlinetypes
;
240 int skipnextlf
= f
->f_skipnextlf
;
241 int univ_newline
= f
->f_univ_newline
;
243 total_v_size
= n
> 0 ? n
: 100;
244 v
= PyString_FromStringAndSize((char *)NULL
, total_v_size
);
249 end
= buf
+ total_v_size
;
252 Py_BEGIN_ALLOW_THREADS
254 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
256 if (bytes_read
== 0) break;
261 /* Seeing a \n here with skipnextlf true means we
264 newlinetypes
|= NEWLINE_CRLF
;
265 if (bzerror
!= BZ_OK
) break;
266 bytes_read
= BZ2_bzRead(&bzerror
, f
->fp
, &c
, 1);
268 if (bytes_read
== 0) break;
270 newlinetypes
|= NEWLINE_CR
;
276 } else if (c
== '\n')
277 newlinetypes
|= NEWLINE_LF
;
280 if (bzerror
!= BZ_OK
|| c
== '\n') break;
282 if (univ_newline
&& bzerror
== BZ_STREAM_END
&& skipnextlf
)
283 newlinetypes
|= NEWLINE_CR
;
285 f
->f_newlinetypes
= newlinetypes
;
286 f
->f_skipnextlf
= skipnextlf
;
287 if (bzerror
== BZ_STREAM_END
) {
289 f
->mode
= MODE_READ_EOF
;
291 } else if (bzerror
!= BZ_OK
) {
292 Util_CatchBZ2Error(bzerror
);
298 /* Must be because buf == end */
301 used_v_size
= total_v_size
;
302 increment
= total_v_size
>> 2; /* mild exponential growth */
303 total_v_size
+= increment
;
304 if (total_v_size
> INT_MAX
) {
305 PyErr_SetString(PyExc_OverflowError
,
306 "line is longer than a Python string can hold");
310 if (_PyString_Resize(&v
, total_v_size
) < 0)
312 buf
= BUF(v
) + used_v_size
;
313 end
= BUF(v
) + total_v_size
;
316 used_v_size
= buf
- BUF(v
);
317 if (used_v_size
!= total_v_size
)
318 _PyString_Resize(&v
, used_v_size
);
322 /* This is a hacked version of Python's
323 * fileobject.c:Py_UniversalNewlineFread(). */
325 Util_UnivNewlineRead(int *bzerror
, BZFILE
*stream
,
326 char* buf
, size_t n
, BZ2FileObject
*f
)
329 int newlinetypes
, skipnextlf
;
332 assert(stream
!= NULL
);
334 if (!f
->f_univ_newline
)
335 return BZ2_bzRead(bzerror
, stream
, buf
, n
);
337 newlinetypes
= f
->f_newlinetypes
;
338 skipnextlf
= f
->f_skipnextlf
;
340 /* Invariant: n is the number of bytes remaining to be filled
348 nread
= BZ2_bzRead(bzerror
, stream
, dst
, n
);
350 n
-= nread
; /* assuming 1 byte out for each in; will adjust */
351 shortread
= n
!= 0; /* true iff EOF or error */
355 /* Save as LF and set flag to skip next LF. */
359 else if (skipnextlf
&& c
== '\n') {
360 /* Skip LF, and remember we saw CR LF. */
362 newlinetypes
|= NEWLINE_CRLF
;
366 /* Normal char to be stored in buffer. Also
367 * update the newlinetypes flag if either this
368 * is an LF or the previous char was a CR.
371 newlinetypes
|= NEWLINE_LF
;
373 newlinetypes
|= NEWLINE_CR
;
379 /* If this is EOF, update type flags. */
380 if (skipnextlf
&& *bzerror
== BZ_STREAM_END
)
381 newlinetypes
|= NEWLINE_CR
;
385 f
->f_newlinetypes
= newlinetypes
;
386 f
->f_skipnextlf
= skipnextlf
;
390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
392 Util_DropReadAhead(BZ2FileObject
*f
)
394 if (f
->f_buf
!= NULL
) {
395 PyMem_Free(f
->f_buf
);
400 /* This is a hacked version of Python's fileobject.c:readahead(). */
402 Util_ReadAhead(BZ2FileObject
*f
, int bufsize
)
407 if (f
->f_buf
!= NULL
) {
408 if((f
->f_bufend
- f
->f_bufptr
) >= 1)
411 Util_DropReadAhead(f
);
413 if (f
->mode
== MODE_READ_EOF
) {
414 f
->f_bufptr
= f
->f_buf
;
415 f
->f_bufend
= f
->f_buf
;
418 if ((f
->f_buf
= PyMem_Malloc(bufsize
)) == NULL
) {
422 Py_BEGIN_ALLOW_THREADS
423 chunksize
= Util_UnivNewlineRead(&bzerror
, f
->fp
, f
->f_buf
,
427 if (bzerror
== BZ_STREAM_END
) {
429 f
->mode
= MODE_READ_EOF
;
430 } else if (bzerror
!= BZ_OK
) {
431 Util_CatchBZ2Error(bzerror
);
432 Util_DropReadAhead(f
);
435 f
->f_bufptr
= f
->f_buf
;
436 f
->f_bufend
= f
->f_buf
+ chunksize
;
440 /* This is a hacked version of Python's
441 * fileobject.c:readahead_get_line_skip(). */
442 static PyStringObject
*
443 Util_ReadAheadGetLineSkip(BZ2FileObject
*f
, int skip
, int bufsize
)
450 if (f
->f_buf
== NULL
)
451 if (Util_ReadAhead(f
, bufsize
) < 0)
454 len
= f
->f_bufend
- f
->f_bufptr
;
456 return (PyStringObject
*)
457 PyString_FromStringAndSize(NULL
, skip
);
458 bufptr
= memchr(f
->f_bufptr
, '\n', len
);
459 if (bufptr
!= NULL
) {
460 bufptr
++; /* Count the '\n' */
461 len
= bufptr
- f
->f_bufptr
;
462 s
= (PyStringObject
*)
463 PyString_FromStringAndSize(NULL
, skip
+len
);
466 memcpy(PyString_AS_STRING(s
)+skip
, f
->f_bufptr
, len
);
467 f
->f_bufptr
= bufptr
;
468 if (bufptr
== f
->f_bufend
)
469 Util_DropReadAhead(f
);
471 bufptr
= f
->f_bufptr
;
473 f
->f_buf
= NULL
; /* Force new readahead buffer */
474 s
= Util_ReadAheadGetLineSkip(f
, skip
+len
,
475 bufsize
+ (bufsize
>>2));
480 memcpy(PyString_AS_STRING(s
)+skip
, bufptr
, len
);
486 /* ===================================================================== */
487 /* Methods of BZ2File. */
489 PyDoc_STRVAR(BZ2File_read__doc__
,
490 "read([size]) -> string\n\
492 Read at most size uncompressed bytes, returned as a string. If the size\n\
493 argument is negative or omitted, read until EOF is reached.\n\
496 /* This is a hacked version of Python's fileobject.c:file_read(). */
498 BZ2File_read(BZ2FileObject
*self
, PyObject
*args
)
500 long bytesrequested
= -1;
501 size_t bytesread
, buffersize
, chunksize
;
503 PyObject
*ret
= NULL
;
505 if (!PyArg_ParseTuple(args
, "|l:read", &bytesrequested
))
509 switch (self
->mode
) {
513 ret
= PyString_FromString("");
516 PyErr_SetString(PyExc_ValueError
,
517 "I/O operation on closed file");
520 PyErr_SetString(PyExc_IOError
,
521 "file is not ready for reading");
525 if (bytesrequested
< 0)
526 buffersize
= Util_NewBufferSize((size_t)0);
528 buffersize
= bytesrequested
;
529 if (buffersize
> INT_MAX
) {
530 PyErr_SetString(PyExc_OverflowError
,
531 "requested number of bytes is "
532 "more than a Python string can hold");
535 ret
= PyString_FromStringAndSize((char *)NULL
, buffersize
);
541 Py_BEGIN_ALLOW_THREADS
542 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
544 buffersize
-bytesread
,
546 self
->pos
+= chunksize
;
548 bytesread
+= chunksize
;
549 if (bzerror
== BZ_STREAM_END
) {
550 self
->size
= self
->pos
;
551 self
->mode
= MODE_READ_EOF
;
553 } else if (bzerror
!= BZ_OK
) {
554 Util_CatchBZ2Error(bzerror
);
559 if (bytesrequested
< 0) {
560 buffersize
= Util_NewBufferSize(buffersize
);
561 if (_PyString_Resize(&ret
, buffersize
) < 0)
567 if (bytesread
!= buffersize
)
568 _PyString_Resize(&ret
, bytesread
);
575 PyDoc_STRVAR(BZ2File_readline__doc__
,
576 "readline([size]) -> string\n\
578 Return the next line from the file, as a string, retaining newline.\n\
579 A non-negative size argument will limit the maximum number of bytes to\n\
580 return (an incomplete line may be returned then). Return an empty\n\
585 BZ2File_readline(BZ2FileObject
*self
, PyObject
*args
)
587 PyObject
*ret
= NULL
;
590 if (!PyArg_ParseTuple(args
, "|i:readline", &sizehint
))
594 switch (self
->mode
) {
598 ret
= PyString_FromString("");
601 PyErr_SetString(PyExc_ValueError
,
602 "I/O operation on closed file");
605 PyErr_SetString(PyExc_IOError
,
606 "file is not ready for reading");
611 ret
= PyString_FromString("");
613 ret
= Util_GetLine(self
, (sizehint
< 0) ? 0 : sizehint
);
620 PyDoc_STRVAR(BZ2File_readlines__doc__
,
621 "readlines([size]) -> list\n\
623 Call readline() repeatedly and return a list of lines read.\n\
624 The optional size argument, if given, is an approximate bound on the\n\
625 total number of bytes in the lines returned.\n\
628 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
630 BZ2File_readlines(BZ2FileObject
*self
, PyObject
*args
)
633 PyObject
*list
= NULL
;
635 char small_buffer
[SMALLCHUNK
];
636 char *buffer
= small_buffer
;
637 size_t buffersize
= SMALLCHUNK
;
638 PyObject
*big_buffer
= NULL
;
641 size_t totalread
= 0;
647 if (!PyArg_ParseTuple(args
, "|l:readlines", &sizehint
))
651 switch (self
->mode
) {
655 list
= PyList_New(0);
658 PyErr_SetString(PyExc_ValueError
,
659 "I/O operation on closed file");
662 PyErr_SetString(PyExc_IOError
,
663 "file is not ready for reading");
667 if ((list
= PyList_New(0)) == NULL
)
671 Py_BEGIN_ALLOW_THREADS
672 nread
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
674 buffersize
-nfilled
, self
);
677 if (bzerror
== BZ_STREAM_END
) {
678 self
->size
= self
->pos
;
679 self
->mode
= MODE_READ_EOF
;
685 } else if (bzerror
!= BZ_OK
) {
686 Util_CatchBZ2Error(bzerror
);
693 p
= memchr(buffer
+nfilled
, '\n', nread
);
694 if (!shortread
&& p
== NULL
) {
695 /* Need a larger buffer to fit this line */
698 if (buffersize
> INT_MAX
) {
699 PyErr_SetString(PyExc_OverflowError
,
700 "line is longer than a Python string can hold");
703 if (big_buffer
== NULL
) {
704 /* Create the big buffer */
705 big_buffer
= PyString_FromStringAndSize(
707 if (big_buffer
== NULL
)
709 buffer
= PyString_AS_STRING(big_buffer
);
710 memcpy(buffer
, small_buffer
, nfilled
);
713 /* Grow the big buffer */
714 _PyString_Resize(&big_buffer
, buffersize
);
715 buffer
= PyString_AS_STRING(big_buffer
);
719 end
= buffer
+nfilled
+nread
;
722 /* Process complete lines */
724 line
= PyString_FromStringAndSize(q
, p
-q
);
727 err
= PyList_Append(list
, line
);
732 p
= memchr(q
, '\n', end
-q
);
734 /* Move the remaining incomplete line to the start */
736 memmove(buffer
, q
, nfilled
);
738 if (totalread
>= (size_t)sizehint
)
746 /* Partial last line */
747 line
= PyString_FromStringAndSize(buffer
, nfilled
);
751 /* Need to complete the last line */
752 PyObject
*rest
= Util_GetLine(self
, 0);
757 PyString_Concat(&line
, rest
);
762 err
= PyList_Append(list
, line
);
771 Py_DECREF(big_buffer
);
776 PyDoc_STRVAR(BZ2File_xreadlines__doc__
,
777 "xreadlines() -> self\n\
779 For backward compatibility. BZ2File objects now include the performance\n\
780 optimizations previously implemented in the xreadlines module.\n\
783 PyDoc_STRVAR(BZ2File_write__doc__
,
784 "write(data) -> None\n\
786 Write the 'data' string to file. Note that due to buffering, close() may\n\
787 be needed before the file on disk reflects the data written.\n\
790 /* This is a hacked version of Python's fileobject.c:file_write(). */
792 BZ2File_write(BZ2FileObject
*self
, PyObject
*args
)
794 PyObject
*ret
= NULL
;
800 if (!PyArg_ParseTuple(args
, "s*:write", &pbuf
))
806 switch (self
->mode
) {
811 PyErr_SetString(PyExc_ValueError
,
812 "I/O operation on closed file");
816 PyErr_SetString(PyExc_IOError
,
817 "file is not ready for writing");
821 self
->f_softspace
= 0;
823 Py_BEGIN_ALLOW_THREADS
824 BZ2_bzWrite (&bzerror
, self
->fp
, buf
, len
);
828 if (bzerror
!= BZ_OK
) {
829 Util_CatchBZ2Error(bzerror
);
837 PyBuffer_Release(&pbuf
);
842 PyDoc_STRVAR(BZ2File_writelines__doc__
,
843 "writelines(sequence_of_strings) -> None\n\
845 Write the sequence of strings to the file. Note that newlines are not\n\
846 added. The sequence can be any iterable object producing strings. This is\n\
847 equivalent to calling write() for each string.\n\
850 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
852 BZ2File_writelines(BZ2FileObject
*self
, PyObject
*seq
)
854 #define CHUNKSIZE 1000
855 PyObject
*list
= NULL
;
856 PyObject
*iter
= NULL
;
857 PyObject
*ret
= NULL
;
859 int i
, j
, index
, len
, islist
;
863 switch (self
->mode
) {
868 PyErr_SetString(PyExc_ValueError
,
869 "I/O operation on closed file");
873 PyErr_SetString(PyExc_IOError
,
874 "file is not ready for writing");
878 islist
= PyList_Check(seq
);
880 iter
= PyObject_GetIter(seq
);
882 PyErr_SetString(PyExc_TypeError
,
883 "writelines() requires an iterable argument");
886 list
= PyList_New(CHUNKSIZE
);
891 /* Strategy: slurp CHUNKSIZE lines into a private list,
892 checking that they are all strings, then write that list
893 without holding the interpreter lock, then come back for more. */
894 for (index
= 0; ; index
+= CHUNKSIZE
) {
897 list
= PyList_GetSlice(seq
, index
, index
+CHUNKSIZE
);
900 j
= PyList_GET_SIZE(list
);
903 for (j
= 0; j
< CHUNKSIZE
; j
++) {
904 line
= PyIter_Next(iter
);
906 if (PyErr_Occurred())
910 PyList_SetItem(list
, j
, line
);
916 /* Check that all entries are indeed strings. If not,
917 apply the same rules as for file.write() and
918 convert the rets to strings. This is slow, but
919 seems to be the only way since all conversion APIs
920 could potentially execute Python code. */
921 for (i
= 0; i
< j
; i
++) {
922 PyObject
*v
= PyList_GET_ITEM(list
, i
);
923 if (!PyString_Check(v
)) {
926 if (PyObject_AsCharBuffer(v
, &buffer
, &len
)) {
927 PyErr_SetString(PyExc_TypeError
,
934 line
= PyString_FromStringAndSize(buffer
,
939 PyList_SET_ITEM(list
, i
, line
);
943 self
->f_softspace
= 0;
945 /* Since we are releasing the global lock, the
946 following code may *not* execute Python code. */
947 Py_BEGIN_ALLOW_THREADS
948 for (i
= 0; i
< j
; i
++) {
949 line
= PyList_GET_ITEM(list
, i
);
950 len
= PyString_GET_SIZE(line
);
951 BZ2_bzWrite (&bzerror
, self
->fp
,
952 PyString_AS_STRING(line
), len
);
953 if (bzerror
!= BZ_OK
) {
955 Util_CatchBZ2Error(bzerror
);
976 PyDoc_STRVAR(BZ2File_seek__doc__
,
977 "seek(offset [, whence]) -> None\n\
979 Move to new file position. Argument offset is a byte count. Optional\n\
980 argument whence defaults to 0 (offset from start of file, offset\n\
981 should be >= 0); other values are 1 (move relative to current position,\n\
982 positive or negative), and 2 (move relative to end of file, usually\n\
983 negative, although many platforms allow seeking beyond the end of a file).\n\
985 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
986 the operation may be extremely slow.\n\
990 BZ2File_seek(BZ2FileObject
*self
, PyObject
*args
)
995 char small_buffer
[SMALLCHUNK
];
996 char *buffer
= small_buffer
;
997 size_t buffersize
= SMALLCHUNK
;
998 Py_off_t bytesread
= 0;
1002 PyObject
*ret
= NULL
;
1004 if (!PyArg_ParseTuple(args
, "O|i:seek", &offobj
, &where
))
1006 #if !defined(HAVE_LARGEFILE_SUPPORT)
1007 offset
= PyInt_AsLong(offobj
);
1009 offset
= PyLong_Check(offobj
) ?
1010 PyLong_AsLongLong(offobj
) : PyInt_AsLong(offobj
);
1012 if (PyErr_Occurred())
1016 Util_DropReadAhead(self
);
1017 switch (self
->mode
) {
1023 PyErr_SetString(PyExc_ValueError
,
1024 "I/O operation on closed file");
1028 PyErr_SetString(PyExc_IOError
,
1029 "seek works only while reading");
1034 if (self
->size
== -1) {
1035 assert(self
->mode
!= MODE_READ_EOF
);
1037 Py_BEGIN_ALLOW_THREADS
1038 chunksize
= Util_UnivNewlineRead(
1042 self
->pos
+= chunksize
;
1043 Py_END_ALLOW_THREADS
1045 bytesread
+= chunksize
;
1046 if (bzerror
== BZ_STREAM_END
) {
1048 } else if (bzerror
!= BZ_OK
) {
1049 Util_CatchBZ2Error(bzerror
);
1053 self
->mode
= MODE_READ_EOF
;
1054 self
->size
= self
->pos
;
1057 offset
= self
->size
+ offset
;
1058 } else if (where
== 1) {
1059 offset
= self
->pos
+ offset
;
1062 /* Before getting here, offset must be the absolute position the file
1063 * pointer should be set to. */
1065 if (offset
>= self
->pos
) {
1066 /* we can move forward */
1067 offset
-= self
->pos
;
1069 /* we cannot move back, so rewind the stream */
1070 BZ2_bzReadClose(&bzerror
, self
->fp
);
1072 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1075 if (bzerror
!= BZ_OK
) {
1076 Util_CatchBZ2Error(bzerror
);
1079 ret
= PyObject_CallMethod(self
->file
, "seek", "(i)", 0);
1085 self
->fp
= BZ2_bzReadOpen(&bzerror
, PyFile_AsFile(self
->file
),
1088 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1089 if (bzerror
!= BZ_OK
) {
1090 Util_CatchBZ2Error(bzerror
);
1093 self
->mode
= MODE_READ
;
1096 if (offset
<= 0 || self
->mode
== MODE_READ_EOF
)
1099 /* Before getting here, offset must be set to the number of bytes
1100 * to walk forward. */
1102 if (offset
-bytesread
> buffersize
)
1103 readsize
= buffersize
;
1105 /* offset might be wider that readsize, but the result
1106 * of the subtraction is bound by buffersize (see the
1107 * condition above). buffersize is 8192. */
1108 readsize
= (size_t)(offset
-bytesread
);
1109 Py_BEGIN_ALLOW_THREADS
1110 chunksize
= Util_UnivNewlineRead(&bzerror
, self
->fp
,
1111 buffer
, readsize
, self
);
1112 self
->pos
+= chunksize
;
1113 Py_END_ALLOW_THREADS
1114 bytesread
+= chunksize
;
1115 if (bzerror
== BZ_STREAM_END
) {
1116 self
->size
= self
->pos
;
1117 self
->mode
= MODE_READ_EOF
;
1119 } else if (bzerror
!= BZ_OK
) {
1120 Util_CatchBZ2Error(bzerror
);
1123 if (bytesread
== offset
)
1136 PyDoc_STRVAR(BZ2File_tell__doc__
,
1139 Return the current file position, an integer (may be a long integer).\n\
1143 BZ2File_tell(BZ2FileObject
*self
, PyObject
*args
)
1145 PyObject
*ret
= NULL
;
1147 if (self
->mode
== MODE_CLOSED
) {
1148 PyErr_SetString(PyExc_ValueError
,
1149 "I/O operation on closed file");
1153 #if !defined(HAVE_LARGEFILE_SUPPORT)
1154 ret
= PyInt_FromLong(self
->pos
);
1156 ret
= PyLong_FromLongLong(self
->pos
);
1163 PyDoc_STRVAR(BZ2File_close__doc__
,
1164 "close() -> None or (perhaps) an integer\n\
1166 Close the file. Sets data attribute .closed to true. A closed file\n\
1167 cannot be used for further I/O operations. close() may be called more\n\
1168 than once without error.\n\
1172 BZ2File_close(BZ2FileObject
*self
)
1174 PyObject
*ret
= NULL
;
1175 int bzerror
= BZ_OK
;
1178 switch (self
->mode
) {
1181 BZ2_bzReadClose(&bzerror
, self
->fp
);
1184 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1189 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1192 self
->mode
= MODE_CLOSED
;
1193 ret
= PyObject_CallMethod(self
->file
, "close", NULL
);
1194 if (bzerror
!= BZ_OK
) {
1195 Util_CatchBZ2Error(bzerror
);
1204 PyDoc_STRVAR(BZ2File_enter_doc
,
1205 "__enter__() -> self.");
1208 BZ2File_enter(BZ2FileObject
*self
)
1210 if (self
->mode
== MODE_CLOSED
) {
1211 PyErr_SetString(PyExc_ValueError
,
1212 "I/O operation on closed file");
1216 return (PyObject
*) self
;
1219 PyDoc_STRVAR(BZ2File_exit_doc
,
1220 "__exit__(*excinfo) -> None. Closes the file.");
1223 BZ2File_exit(BZ2FileObject
*self
, PyObject
*args
)
1225 PyObject
*ret
= PyObject_CallMethod((PyObject
*) self
, "close", NULL
);
1227 /* If error occurred, pass through */
1234 static PyObject
*BZ2File_getiter(BZ2FileObject
*self
);
1236 static PyMethodDef BZ2File_methods
[] = {
1237 {"read", (PyCFunction
)BZ2File_read
, METH_VARARGS
, BZ2File_read__doc__
},
1238 {"readline", (PyCFunction
)BZ2File_readline
, METH_VARARGS
, BZ2File_readline__doc__
},
1239 {"readlines", (PyCFunction
)BZ2File_readlines
, METH_VARARGS
, BZ2File_readlines__doc__
},
1240 {"xreadlines", (PyCFunction
)BZ2File_getiter
, METH_VARARGS
, BZ2File_xreadlines__doc__
},
1241 {"write", (PyCFunction
)BZ2File_write
, METH_VARARGS
, BZ2File_write__doc__
},
1242 {"writelines", (PyCFunction
)BZ2File_writelines
, METH_O
, BZ2File_writelines__doc__
},
1243 {"seek", (PyCFunction
)BZ2File_seek
, METH_VARARGS
, BZ2File_seek__doc__
},
1244 {"tell", (PyCFunction
)BZ2File_tell
, METH_NOARGS
, BZ2File_tell__doc__
},
1245 {"close", (PyCFunction
)BZ2File_close
, METH_NOARGS
, BZ2File_close__doc__
},
1246 {"__enter__", (PyCFunction
)BZ2File_enter
, METH_NOARGS
, BZ2File_enter_doc
},
1247 {"__exit__", (PyCFunction
)BZ2File_exit
, METH_VARARGS
, BZ2File_exit_doc
},
1248 {NULL
, NULL
} /* sentinel */
1252 /* ===================================================================== */
1253 /* Getters and setters of BZ2File. */
1255 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1257 BZ2File_get_newlines(BZ2FileObject
*self
, void *closure
)
1259 switch (self
->f_newlinetypes
) {
1260 case NEWLINE_UNKNOWN
:
1264 return PyString_FromString("\r");
1266 return PyString_FromString("\n");
1267 case NEWLINE_CR
|NEWLINE_LF
:
1268 return Py_BuildValue("(ss)", "\r", "\n");
1270 return PyString_FromString("\r\n");
1271 case NEWLINE_CR
|NEWLINE_CRLF
:
1272 return Py_BuildValue("(ss)", "\r", "\r\n");
1273 case NEWLINE_LF
|NEWLINE_CRLF
:
1274 return Py_BuildValue("(ss)", "\n", "\r\n");
1275 case NEWLINE_CR
|NEWLINE_LF
|NEWLINE_CRLF
:
1276 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1278 PyErr_Format(PyExc_SystemError
,
1279 "Unknown newlines value 0x%x\n",
1280 self
->f_newlinetypes
);
1286 BZ2File_get_closed(BZ2FileObject
*self
, void *closure
)
1288 return PyInt_FromLong(self
->mode
== MODE_CLOSED
);
1292 BZ2File_get_mode(BZ2FileObject
*self
, void *closure
)
1294 return PyObject_GetAttrString(self
->file
, "mode");
1298 BZ2File_get_name(BZ2FileObject
*self
, void *closure
)
1300 return PyObject_GetAttrString(self
->file
, "name");
1303 static PyGetSetDef BZ2File_getset
[] = {
1304 {"closed", (getter
)BZ2File_get_closed
, NULL
,
1305 "True if the file is closed"},
1306 {"newlines", (getter
)BZ2File_get_newlines
, NULL
,
1307 "end-of-line convention used in this file"},
1308 {"mode", (getter
)BZ2File_get_mode
, NULL
,
1309 "file mode ('r', 'w', or 'U')"},
1310 {"name", (getter
)BZ2File_get_name
, NULL
,
1312 {NULL
} /* Sentinel */
1316 /* ===================================================================== */
1317 /* Members of BZ2File_Type. */
1320 #define OFF(x) offsetof(BZ2FileObject, x)
1322 static PyMemberDef BZ2File_members
[] = {
1323 {"softspace", T_INT
, OFF(f_softspace
), 0,
1324 "flag indicating that a space needs to be printed; used by print"},
1325 {NULL
} /* Sentinel */
1328 /* ===================================================================== */
1329 /* Slot definitions for BZ2File_Type. */
1332 BZ2File_init(BZ2FileObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1334 static char *kwlist
[] = {"filename", "mode", "buffering",
1335 "compresslevel", 0};
1339 int compresslevel
= 9;
1345 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "O|sii:BZ2File",
1346 kwlist
, &name
, &mode
, &buffering
,
1350 if (compresslevel
< 1 || compresslevel
> 9) {
1351 PyErr_SetString(PyExc_ValueError
,
1352 "compresslevel must be between 1 and 9");
1371 self
->f_univ_newline
= 0;
1373 self
->f_univ_newline
= 1;
1382 PyErr_Format(PyExc_ValueError
,
1383 "invalid mode char %c", *mode
);
1391 if (mode_char
== 0) {
1395 mode
= (mode_char
== 'r') ? "rb" : "wb";
1397 self
->file
= PyObject_CallFunction((PyObject
*)&PyFile_Type
, "(Osi)",
1398 name
, mode
, buffering
);
1399 if (self
->file
== NULL
)
1402 /* From now on, we have stuff to dealloc, so jump to error label
1403 * instead of returning */
1406 self
->lock
= PyThread_allocate_lock();
1408 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1413 if (mode_char
== 'r')
1414 self
->fp
= BZ2_bzReadOpen(&bzerror
,
1415 PyFile_AsFile(self
->file
),
1418 self
->fp
= BZ2_bzWriteOpen(&bzerror
,
1419 PyFile_AsFile(self
->file
),
1420 compresslevel
, 0, 0);
1422 if (bzerror
!= BZ_OK
) {
1423 Util_CatchBZ2Error(bzerror
);
1426 PyFile_IncUseCount((PyFileObject
*)self
->file
);
1428 self
->mode
= (mode_char
== 'r') ? MODE_READ
: MODE_WRITE
;
1433 Py_CLEAR(self
->file
);
1436 PyThread_free_lock(self
->lock
);
1444 BZ2File_dealloc(BZ2FileObject
*self
)
1449 PyThread_free_lock(self
->lock
);
1451 switch (self
->mode
) {
1454 BZ2_bzReadClose(&bzerror
, self
->fp
);
1457 BZ2_bzWriteClose(&bzerror
, self
->fp
,
1462 PyFile_DecUseCount((PyFileObject
*)self
->file
);
1465 Util_DropReadAhead(self
);
1466 Py_XDECREF(self
->file
);
1467 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1470 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1472 BZ2File_getiter(BZ2FileObject
*self
)
1474 if (self
->mode
== MODE_CLOSED
) {
1475 PyErr_SetString(PyExc_ValueError
,
1476 "I/O operation on closed file");
1479 Py_INCREF((PyObject
*)self
);
1480 return (PyObject
*)self
;
1483 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1484 #define READAHEAD_BUFSIZE 8192
1486 BZ2File_iternext(BZ2FileObject
*self
)
1488 PyStringObject
* ret
;
1490 if (self
->mode
== MODE_CLOSED
) {
1492 PyErr_SetString(PyExc_ValueError
,
1493 "I/O operation on closed file");
1496 ret
= Util_ReadAheadGetLineSkip(self
, 0, READAHEAD_BUFSIZE
);
1498 if (ret
== NULL
|| PyString_GET_SIZE(ret
) == 0) {
1502 return (PyObject
*)ret
;
1505 /* ===================================================================== */
1506 /* BZ2File_Type definition. */
1508 PyDoc_VAR(BZ2File__doc__
) =
1510 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1512 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1513 writing. When opened for writing, the file will be created if it doesn't\n\
1514 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1515 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1516 is given, must be a number between 1 and 9.\n\
1520 Add a 'U' to mode to open the file for input with universal newline\n\
1521 support. Any line ending in the input file will be seen as a '\\n' in\n\
1522 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1523 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1524 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1525 newlines are available only when reading.\n\
1529 static PyTypeObject BZ2File_Type
= {
1530 PyVarObject_HEAD_INIT(NULL
, 0)
1531 "bz2.BZ2File", /*tp_name*/
1532 sizeof(BZ2FileObject
), /*tp_basicsize*/
1534 (destructor
)BZ2File_dealloc
, /*tp_dealloc*/
1541 0, /*tp_as_sequence*/
1542 0, /*tp_as_mapping*/
1546 PyObject_GenericGetAttr
,/*tp_getattro*/
1547 PyObject_GenericSetAttr
,/*tp_setattro*/
1549 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1550 BZ2File__doc__
, /*tp_doc*/
1553 0, /*tp_richcompare*/
1554 0, /*tp_weaklistoffset*/
1555 (getiterfunc
)BZ2File_getiter
, /*tp_iter*/
1556 (iternextfunc
)BZ2File_iternext
, /*tp_iternext*/
1557 BZ2File_methods
, /*tp_methods*/
1558 BZ2File_members
, /*tp_members*/
1559 BZ2File_getset
, /*tp_getset*/
1564 0, /*tp_dictoffset*/
1565 (initproc
)BZ2File_init
, /*tp_init*/
1566 PyType_GenericAlloc
, /*tp_alloc*/
1567 PyType_GenericNew
, /*tp_new*/
1568 _PyObject_Del
, /*tp_free*/
1573 /* ===================================================================== */
1574 /* Methods of BZ2Comp. */
1576 PyDoc_STRVAR(BZ2Comp_compress__doc__
,
1577 "compress(data) -> string\n\
1579 Provide more data to the compressor object. It will return chunks of\n\
1580 compressed data whenever possible. When you've finished providing data\n\
1581 to compress, call the flush() method to finish the compression process,\n\
1582 and return what is left in the internal buffers.\n\
1586 BZ2Comp_compress(BZ2CompObject
*self
, PyObject
*args
)
1591 int bufsize
= SMALLCHUNK
;
1592 PY_LONG_LONG totalout
;
1593 PyObject
*ret
= NULL
;
1594 bz_stream
*bzs
= &self
->bzs
;
1597 if (!PyArg_ParseTuple(args
, "s*:compress", &pdata
))
1600 datasize
= pdata
.len
;
1602 if (datasize
== 0) {
1603 PyBuffer_Release(&pdata
);
1604 return PyString_FromString("");
1608 if (!self
->running
) {
1609 PyErr_SetString(PyExc_ValueError
,
1610 "this object was already flushed");
1614 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1618 bzs
->next_in
= data
;
1619 bzs
->avail_in
= datasize
;
1620 bzs
->next_out
= BUF(ret
);
1621 bzs
->avail_out
= bufsize
;
1623 totalout
= BZS_TOTAL_OUT(bzs
);
1626 Py_BEGIN_ALLOW_THREADS
1627 bzerror
= BZ2_bzCompress(bzs
, BZ_RUN
);
1628 Py_END_ALLOW_THREADS
1629 if (bzerror
!= BZ_RUN_OK
) {
1630 Util_CatchBZ2Error(bzerror
);
1633 if (bzs
->avail_in
== 0)
1634 break; /* no more input data */
1635 if (bzs
->avail_out
== 0) {
1636 bufsize
= Util_NewBufferSize(bufsize
);
1637 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1638 BZ2_bzCompressEnd(bzs
);
1641 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1643 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1647 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1650 PyBuffer_Release(&pdata
);
1655 PyBuffer_Release(&pdata
);
1660 PyDoc_STRVAR(BZ2Comp_flush__doc__
,
1661 "flush() -> string\n\
1663 Finish the compression process and return what is left in internal buffers.\n\
1664 You must not use the compressor object after calling this method.\n\
1668 BZ2Comp_flush(BZ2CompObject
*self
)
1670 int bufsize
= SMALLCHUNK
;
1671 PyObject
*ret
= NULL
;
1672 bz_stream
*bzs
= &self
->bzs
;
1673 PY_LONG_LONG totalout
;
1677 if (!self
->running
) {
1678 PyErr_SetString(PyExc_ValueError
, "object was already "
1684 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1688 bzs
->next_out
= BUF(ret
);
1689 bzs
->avail_out
= bufsize
;
1691 totalout
= BZS_TOTAL_OUT(bzs
);
1694 Py_BEGIN_ALLOW_THREADS
1695 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
1696 Py_END_ALLOW_THREADS
1697 if (bzerror
== BZ_STREAM_END
) {
1699 } else if (bzerror
!= BZ_FINISH_OK
) {
1700 Util_CatchBZ2Error(bzerror
);
1703 if (bzs
->avail_out
== 0) {
1704 bufsize
= Util_NewBufferSize(bufsize
);
1705 if (_PyString_Resize(&ret
, bufsize
) < 0)
1707 bzs
->next_out
= BUF(ret
);
1708 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1710 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1714 if (bzs
->avail_out
!= 0)
1715 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1726 static PyMethodDef BZ2Comp_methods
[] = {
1727 {"compress", (PyCFunction
)BZ2Comp_compress
, METH_VARARGS
,
1728 BZ2Comp_compress__doc__
},
1729 {"flush", (PyCFunction
)BZ2Comp_flush
, METH_NOARGS
,
1730 BZ2Comp_flush__doc__
},
1731 {NULL
, NULL
} /* sentinel */
1735 /* ===================================================================== */
1736 /* Slot definitions for BZ2Comp_Type. */
1739 BZ2Comp_init(BZ2CompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1741 int compresslevel
= 9;
1743 static char *kwlist
[] = {"compresslevel", 0};
1745 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|i:BZ2Compressor",
1746 kwlist
, &compresslevel
))
1749 if (compresslevel
< 1 || compresslevel
> 9) {
1750 PyErr_SetString(PyExc_ValueError
,
1751 "compresslevel must be between 1 and 9");
1756 self
->lock
= PyThread_allocate_lock();
1758 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1763 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1764 bzerror
= BZ2_bzCompressInit(&self
->bzs
, compresslevel
, 0, 0);
1765 if (bzerror
!= BZ_OK
) {
1766 Util_CatchBZ2Error(bzerror
);
1776 PyThread_free_lock(self
->lock
);
1784 BZ2Comp_dealloc(BZ2CompObject
*self
)
1788 PyThread_free_lock(self
->lock
);
1790 BZ2_bzCompressEnd(&self
->bzs
);
1791 Py_TYPE(self
)->tp_free((PyObject
*)self
);
1795 /* ===================================================================== */
1796 /* BZ2Comp_Type definition. */
1798 PyDoc_STRVAR(BZ2Comp__doc__
,
1799 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1801 Create a new compressor object. This object may be used to compress\n\
1802 data sequentially. If you want to compress data in one shot, use the\n\
1803 compress() function instead. The compresslevel parameter, if given,\n\
1804 must be a number between 1 and 9.\n\
1807 static PyTypeObject BZ2Comp_Type
= {
1808 PyVarObject_HEAD_INIT(NULL
, 0)
1809 "bz2.BZ2Compressor", /*tp_name*/
1810 sizeof(BZ2CompObject
), /*tp_basicsize*/
1812 (destructor
)BZ2Comp_dealloc
, /*tp_dealloc*/
1819 0, /*tp_as_sequence*/
1820 0, /*tp_as_mapping*/
1824 PyObject_GenericGetAttr
,/*tp_getattro*/
1825 PyObject_GenericSetAttr
,/*tp_setattro*/
1827 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
1828 BZ2Comp__doc__
, /*tp_doc*/
1831 0, /*tp_richcompare*/
1832 0, /*tp_weaklistoffset*/
1835 BZ2Comp_methods
, /*tp_methods*/
1842 0, /*tp_dictoffset*/
1843 (initproc
)BZ2Comp_init
, /*tp_init*/
1844 PyType_GenericAlloc
, /*tp_alloc*/
1845 PyType_GenericNew
, /*tp_new*/
1846 _PyObject_Del
, /*tp_free*/
1851 /* ===================================================================== */
1852 /* Members of BZ2Decomp. */
1855 #define OFF(x) offsetof(BZ2DecompObject, x)
1857 static PyMemberDef BZ2Decomp_members
[] = {
1858 {"unused_data", T_OBJECT
, OFF(unused_data
), RO
},
1859 {NULL
} /* Sentinel */
1863 /* ===================================================================== */
1864 /* Methods of BZ2Decomp. */
1866 PyDoc_STRVAR(BZ2Decomp_decompress__doc__
,
1867 "decompress(data) -> string\n\
1869 Provide more data to the decompressor object. It will return chunks\n\
1870 of decompressed data whenever possible. If you try to decompress data\n\
1871 after the end of stream is found, EOFError will be raised. If any data\n\
1872 was found after the end of stream, it'll be ignored and saved in\n\
1873 unused_data attribute.\n\
1877 BZ2Decomp_decompress(BZ2DecompObject
*self
, PyObject
*args
)
1882 int bufsize
= SMALLCHUNK
;
1883 PY_LONG_LONG totalout
;
1884 PyObject
*ret
= NULL
;
1885 bz_stream
*bzs
= &self
->bzs
;
1888 if (!PyArg_ParseTuple(args
, "s*:decompress", &pdata
))
1891 datasize
= pdata
.len
;
1894 if (!self
->running
) {
1895 PyErr_SetString(PyExc_EOFError
, "end of stream was "
1900 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
1904 bzs
->next_in
= data
;
1905 bzs
->avail_in
= datasize
;
1906 bzs
->next_out
= BUF(ret
);
1907 bzs
->avail_out
= bufsize
;
1909 totalout
= BZS_TOTAL_OUT(bzs
);
1912 Py_BEGIN_ALLOW_THREADS
1913 bzerror
= BZ2_bzDecompress(bzs
);
1914 Py_END_ALLOW_THREADS
1915 if (bzerror
== BZ_STREAM_END
) {
1916 if (bzs
->avail_in
!= 0) {
1917 Py_DECREF(self
->unused_data
);
1919 PyString_FromStringAndSize(bzs
->next_in
,
1925 if (bzerror
!= BZ_OK
) {
1926 Util_CatchBZ2Error(bzerror
);
1929 if (bzs
->avail_in
== 0)
1930 break; /* no more input data */
1931 if (bzs
->avail_out
== 0) {
1932 bufsize
= Util_NewBufferSize(bufsize
);
1933 if (_PyString_Resize(&ret
, bufsize
) < 0) {
1934 BZ2_bzDecompressEnd(bzs
);
1937 bzs
->next_out
= BUF(ret
);
1938 bzs
->next_out
= BUF(ret
) + (BZS_TOTAL_OUT(bzs
)
1940 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
1944 if (bzs
->avail_out
!= 0)
1945 _PyString_Resize(&ret
, (Py_ssize_t
)(BZS_TOTAL_OUT(bzs
) - totalout
));
1948 PyBuffer_Release(&pdata
);
1953 PyBuffer_Release(&pdata
);
1958 static PyMethodDef BZ2Decomp_methods
[] = {
1959 {"decompress", (PyCFunction
)BZ2Decomp_decompress
, METH_VARARGS
, BZ2Decomp_decompress__doc__
},
1960 {NULL
, NULL
} /* sentinel */
1964 /* ===================================================================== */
1965 /* Slot definitions for BZ2Decomp_Type. */
1968 BZ2Decomp_init(BZ2DecompObject
*self
, PyObject
*args
, PyObject
*kwargs
)
1972 if (!PyArg_ParseTuple(args
, ":BZ2Decompressor"))
1976 self
->lock
= PyThread_allocate_lock();
1978 PyErr_SetString(PyExc_MemoryError
, "unable to allocate lock");
1983 self
->unused_data
= PyString_FromString("");
1984 if (!self
->unused_data
)
1987 memset(&self
->bzs
, 0, sizeof(bz_stream
));
1988 bzerror
= BZ2_bzDecompressInit(&self
->bzs
, 0, 0);
1989 if (bzerror
!= BZ_OK
) {
1990 Util_CatchBZ2Error(bzerror
);
2001 PyThread_free_lock(self
->lock
);
2005 Py_CLEAR(self
->unused_data
);
2010 BZ2Decomp_dealloc(BZ2DecompObject
*self
)
2014 PyThread_free_lock(self
->lock
);
2016 Py_XDECREF(self
->unused_data
);
2017 BZ2_bzDecompressEnd(&self
->bzs
);
2018 Py_TYPE(self
)->tp_free((PyObject
*)self
);
2022 /* ===================================================================== */
2023 /* BZ2Decomp_Type definition. */
2025 PyDoc_STRVAR(BZ2Decomp__doc__
,
2026 "BZ2Decompressor() -> decompressor object\n\
2028 Create a new decompressor object. This object may be used to decompress\n\
2029 data sequentially. If you want to decompress data in one shot, use the\n\
2030 decompress() function instead.\n\
2033 static PyTypeObject BZ2Decomp_Type
= {
2034 PyVarObject_HEAD_INIT(NULL
, 0)
2035 "bz2.BZ2Decompressor", /*tp_name*/
2036 sizeof(BZ2DecompObject
), /*tp_basicsize*/
2038 (destructor
)BZ2Decomp_dealloc
, /*tp_dealloc*/
2045 0, /*tp_as_sequence*/
2046 0, /*tp_as_mapping*/
2050 PyObject_GenericGetAttr
,/*tp_getattro*/
2051 PyObject_GenericSetAttr
,/*tp_setattro*/
2053 Py_TPFLAGS_DEFAULT
|Py_TPFLAGS_BASETYPE
, /*tp_flags*/
2054 BZ2Decomp__doc__
, /*tp_doc*/
2057 0, /*tp_richcompare*/
2058 0, /*tp_weaklistoffset*/
2061 BZ2Decomp_methods
, /*tp_methods*/
2062 BZ2Decomp_members
, /*tp_members*/
2068 0, /*tp_dictoffset*/
2069 (initproc
)BZ2Decomp_init
, /*tp_init*/
2070 PyType_GenericAlloc
, /*tp_alloc*/
2071 PyType_GenericNew
, /*tp_new*/
2072 _PyObject_Del
, /*tp_free*/
2077 /* ===================================================================== */
2078 /* Module functions. */
2080 PyDoc_STRVAR(bz2_compress__doc__
,
2081 "compress(data [, compresslevel=9]) -> string\n\
2083 Compress data in one shot. If you want to compress data sequentially,\n\
2084 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2085 given, must be a number between 1 and 9.\n\
2089 bz2_compress(PyObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2091 int compresslevel
=9;
2096 PyObject
*ret
= NULL
;
2098 bz_stream
*bzs
= &_bzs
;
2100 static char *kwlist
[] = {"data", "compresslevel", 0};
2102 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "s*|i",
2107 datasize
= pdata
.len
;
2109 if (compresslevel
< 1 || compresslevel
> 9) {
2110 PyErr_SetString(PyExc_ValueError
,
2111 "compresslevel must be between 1 and 9");
2112 PyBuffer_Release(&pdata
);
2116 /* Conforming to bz2 manual, this is large enough to fit compressed
2117 * data in one shot. We will check it later anyway. */
2118 bufsize
= datasize
+ (datasize
/100+1) + 600;
2120 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2122 PyBuffer_Release(&pdata
);
2126 memset(bzs
, 0, sizeof(bz_stream
));
2128 bzs
->next_in
= data
;
2129 bzs
->avail_in
= datasize
;
2130 bzs
->next_out
= BUF(ret
);
2131 bzs
->avail_out
= bufsize
;
2133 bzerror
= BZ2_bzCompressInit(bzs
, compresslevel
, 0, 0);
2134 if (bzerror
!= BZ_OK
) {
2135 Util_CatchBZ2Error(bzerror
);
2136 PyBuffer_Release(&pdata
);
2142 Py_BEGIN_ALLOW_THREADS
2143 bzerror
= BZ2_bzCompress(bzs
, BZ_FINISH
);
2144 Py_END_ALLOW_THREADS
2145 if (bzerror
== BZ_STREAM_END
) {
2147 } else if (bzerror
!= BZ_FINISH_OK
) {
2148 BZ2_bzCompressEnd(bzs
);
2149 Util_CatchBZ2Error(bzerror
);
2150 PyBuffer_Release(&pdata
);
2154 if (bzs
->avail_out
== 0) {
2155 bufsize
= Util_NewBufferSize(bufsize
);
2156 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2157 BZ2_bzCompressEnd(bzs
);
2158 PyBuffer_Release(&pdata
);
2162 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2163 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2167 if (bzs
->avail_out
!= 0)
2168 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2169 BZ2_bzCompressEnd(bzs
);
2171 PyBuffer_Release(&pdata
);
2175 PyDoc_STRVAR(bz2_decompress__doc__
,
2176 "decompress(data) -> decompressed data\n\
2178 Decompress data in one shot. If you want to decompress data sequentially,\n\
2179 use an instance of BZ2Decompressor instead.\n\
2183 bz2_decompress(PyObject
*self
, PyObject
*args
)
2188 int bufsize
= SMALLCHUNK
;
2191 bz_stream
*bzs
= &_bzs
;
2194 if (!PyArg_ParseTuple(args
, "s*:decompress", &pdata
))
2197 datasize
= pdata
.len
;
2199 if (datasize
== 0) {
2200 PyBuffer_Release(&pdata
);
2201 return PyString_FromString("");
2204 ret
= PyString_FromStringAndSize(NULL
, bufsize
);
2206 PyBuffer_Release(&pdata
);
2210 memset(bzs
, 0, sizeof(bz_stream
));
2212 bzs
->next_in
= data
;
2213 bzs
->avail_in
= datasize
;
2214 bzs
->next_out
= BUF(ret
);
2215 bzs
->avail_out
= bufsize
;
2217 bzerror
= BZ2_bzDecompressInit(bzs
, 0, 0);
2218 if (bzerror
!= BZ_OK
) {
2219 Util_CatchBZ2Error(bzerror
);
2221 PyBuffer_Release(&pdata
);
2226 Py_BEGIN_ALLOW_THREADS
2227 bzerror
= BZ2_bzDecompress(bzs
);
2228 Py_END_ALLOW_THREADS
2229 if (bzerror
== BZ_STREAM_END
) {
2231 } else if (bzerror
!= BZ_OK
) {
2232 BZ2_bzDecompressEnd(bzs
);
2233 Util_CatchBZ2Error(bzerror
);
2234 PyBuffer_Release(&pdata
);
2238 if (bzs
->avail_in
== 0) {
2239 BZ2_bzDecompressEnd(bzs
);
2240 PyErr_SetString(PyExc_ValueError
,
2241 "couldn't find end of stream");
2242 PyBuffer_Release(&pdata
);
2246 if (bzs
->avail_out
== 0) {
2247 bufsize
= Util_NewBufferSize(bufsize
);
2248 if (_PyString_Resize(&ret
, bufsize
) < 0) {
2249 BZ2_bzDecompressEnd(bzs
);
2250 PyBuffer_Release(&pdata
);
2254 bzs
->next_out
= BUF(ret
) + BZS_TOTAL_OUT(bzs
);
2255 bzs
->avail_out
= bufsize
- (bzs
->next_out
- BUF(ret
));
2259 if (bzs
->avail_out
!= 0)
2260 _PyString_Resize(&ret
, (Py_ssize_t
)BZS_TOTAL_OUT(bzs
));
2261 BZ2_bzDecompressEnd(bzs
);
2262 PyBuffer_Release(&pdata
);
2267 static PyMethodDef bz2_methods
[] = {
2268 {"compress", (PyCFunction
) bz2_compress
, METH_VARARGS
|METH_KEYWORDS
,
2269 bz2_compress__doc__
},
2270 {"decompress", (PyCFunction
) bz2_decompress
, METH_VARARGS
,
2271 bz2_decompress__doc__
},
2272 {NULL
, NULL
} /* sentinel */
2275 /* ===================================================================== */
2276 /* Initialization function. */
2278 PyDoc_STRVAR(bz2__doc__
,
2279 "The python bz2 module provides a comprehensive interface for\n\
2280 the bz2 compression library. It implements a complete file\n\
2281 interface, one shot (de)compression functions, and types for\n\
2282 sequential (de)compression.\n\
2290 Py_TYPE(&BZ2File_Type
) = &PyType_Type
;
2291 Py_TYPE(&BZ2Comp_Type
) = &PyType_Type
;
2292 Py_TYPE(&BZ2Decomp_Type
) = &PyType_Type
;
2294 m
= Py_InitModule3("bz2", bz2_methods
, bz2__doc__
);
2298 PyModule_AddObject(m
, "__author__", PyString_FromString(__author__
));
2300 Py_INCREF(&BZ2File_Type
);
2301 PyModule_AddObject(m
, "BZ2File", (PyObject
*)&BZ2File_Type
);
2303 Py_INCREF(&BZ2Comp_Type
);
2304 PyModule_AddObject(m
, "BZ2Compressor", (PyObject
*)&BZ2Comp_Type
);
2306 Py_INCREF(&BZ2Decomp_Type
);
2307 PyModule_AddObject(m
, "BZ2Decompressor", (PyObject
*)&BZ2Decomp_Type
);