Merged revisions 74356-74357 via svnmerge from
[python/dscho.git] / Modules / bz2module.c
blob5f1d01b7751a5bbccf6a4f94bf06645c3e426b5e
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
37 #define BUF(v) PyBytes_AS_STRING(v)
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
44 #define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83 #else
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
86 #endif
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
97 typedef struct {
98 PyObject_HEAD
99 FILE *rawfp;
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
105 BZFILE *fp;
106 int mode;
107 Py_off_t pos;
108 Py_off_t size;
109 #ifdef WITH_THREAD
110 PyThread_type_lock lock;
111 #endif
112 } BZ2FileObject;
114 typedef struct {
115 PyObject_HEAD
116 bz_stream bzs;
117 int running;
118 #ifdef WITH_THREAD
119 PyThread_type_lock lock;
120 #endif
121 } BZ2CompObject;
123 typedef struct {
124 PyObject_HEAD
125 bz_stream bzs;
126 int running;
127 PyObject *unused_data;
128 #ifdef WITH_THREAD
129 PyThread_type_lock lock;
130 #endif
131 } BZ2DecompObject;
133 /* ===================================================================== */
134 /* Utility functions. */
136 static int
137 Util_CatchBZ2Error(int bzerror)
139 int ret = 0;
140 switch(bzerror) {
141 case BZ_OK:
142 case BZ_STREAM_END:
143 break;
145 #ifdef BZ_CONFIG_ERROR
146 case BZ_CONFIG_ERROR:
147 PyErr_SetString(PyExc_SystemError,
148 "the bz2 library was not compiled "
149 "correctly");
150 ret = 1;
151 break;
152 #endif
154 case BZ_PARAM_ERROR:
155 PyErr_SetString(PyExc_ValueError,
156 "the bz2 library has received wrong "
157 "parameters");
158 ret = 1;
159 break;
161 case BZ_MEM_ERROR:
162 PyErr_NoMemory();
163 ret = 1;
164 break;
166 case BZ_DATA_ERROR:
167 case BZ_DATA_ERROR_MAGIC:
168 PyErr_SetString(PyExc_IOError, "invalid data stream");
169 ret = 1;
170 break;
172 case BZ_IO_ERROR:
173 PyErr_SetString(PyExc_IOError, "unknown IO error");
174 ret = 1;
175 break;
177 case BZ_UNEXPECTED_EOF:
178 PyErr_SetString(PyExc_EOFError,
179 "compressed file ended before the "
180 "logical end-of-stream was detected");
181 ret = 1;
182 break;
184 case BZ_SEQUENCE_ERROR:
185 PyErr_SetString(PyExc_RuntimeError,
186 "wrong sequence of bz2 library "
187 "commands used");
188 ret = 1;
189 break;
191 return ret;
194 #if BUFSIZ < 8192
195 #define SMALLCHUNK 8192
196 #else
197 #define SMALLCHUNK BUFSIZ
198 #endif
200 #if SIZEOF_INT < 4
201 #define BIGCHUNK (512 * 32)
202 #else
203 #define BIGCHUNK (512 * 1024)
204 #endif
206 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
207 static size_t
208 Util_NewBufferSize(size_t currentsize)
210 if (currentsize > SMALLCHUNK) {
211 /* Keep doubling until we reach BIGCHUNK;
212 then keep adding BIGCHUNK. */
213 if (currentsize <= BIGCHUNK)
214 return currentsize + currentsize;
215 else
216 return currentsize + BIGCHUNK;
218 return currentsize + SMALLCHUNK;
221 /* This is a hacked version of Python's fileobject.c:get_line(). */
222 static PyObject *
223 Util_GetLine(BZ2FileObject *f, int n)
225 char c;
226 char *buf, *end;
227 size_t total_v_size; /* total # of slots in buffer */
228 size_t used_v_size; /* # used slots in buffer */
229 size_t increment; /* amount to increment the buffer */
230 PyObject *v;
231 int bzerror;
232 int bytes_read;
234 total_v_size = n > 0 ? n : 100;
235 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
236 if (v == NULL)
237 return NULL;
239 buf = BUF(v);
240 end = buf + total_v_size;
242 for (;;) {
243 Py_BEGIN_ALLOW_THREADS
244 do {
245 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
246 f->pos++;
247 if (bytes_read == 0)
248 break;
249 *buf++ = c;
250 } while (bzerror == BZ_OK && c != '\n' && buf != end);
251 Py_END_ALLOW_THREADS
252 if (bzerror == BZ_STREAM_END) {
253 f->size = f->pos;
254 f->mode = MODE_READ_EOF;
255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
275 if (_PyBytes_Resize(&v, total_v_size) < 0) {
276 return NULL;
278 buf = BUF(v) + used_v_size;
279 end = BUF(v) + total_v_size;
282 used_v_size = buf - BUF(v);
283 if (used_v_size != total_v_size) {
284 if (_PyBytes_Resize(&v, used_v_size) < 0) {
285 v = NULL;
288 return v;
291 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
292 static void
293 Util_DropReadAhead(BZ2FileObject *f)
295 if (f->f_buf != NULL) {
296 PyMem_Free(f->f_buf);
297 f->f_buf = NULL;
301 /* This is a hacked version of Python's fileobject.c:readahead(). */
302 static int
303 Util_ReadAhead(BZ2FileObject *f, int bufsize)
305 int chunksize;
306 int bzerror;
308 if (f->f_buf != NULL) {
309 if((f->f_bufend - f->f_bufptr) >= 1)
310 return 0;
311 else
312 Util_DropReadAhead(f);
314 if (f->mode == MODE_READ_EOF) {
315 f->f_bufptr = f->f_buf;
316 f->f_bufend = f->f_buf;
317 return 0;
319 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
320 PyErr_NoMemory();
321 return -1;
323 Py_BEGIN_ALLOW_THREADS
324 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
325 Py_END_ALLOW_THREADS
326 f->pos += chunksize;
327 if (bzerror == BZ_STREAM_END) {
328 f->size = f->pos;
329 f->mode = MODE_READ_EOF;
330 } else if (bzerror != BZ_OK) {
331 Util_CatchBZ2Error(bzerror);
332 Util_DropReadAhead(f);
333 return -1;
335 f->f_bufptr = f->f_buf;
336 f->f_bufend = f->f_buf + chunksize;
337 return 0;
340 /* This is a hacked version of Python's
341 * fileobject.c:readahead_get_line_skip(). */
342 static PyBytesObject *
343 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
345 PyBytesObject* s;
346 char *bufptr;
347 char *buf;
348 int len;
350 if (f->f_buf == NULL)
351 if (Util_ReadAhead(f, bufsize) < 0)
352 return NULL;
354 len = f->f_bufend - f->f_bufptr;
355 if (len == 0)
356 return (PyBytesObject *)
357 PyBytes_FromStringAndSize(NULL, skip);
358 bufptr = memchr(f->f_bufptr, '\n', len);
359 if (bufptr != NULL) {
360 bufptr++; /* Count the '\n' */
361 len = bufptr - f->f_bufptr;
362 s = (PyBytesObject *)
363 PyBytes_FromStringAndSize(NULL, skip+len);
364 if (s == NULL)
365 return NULL;
366 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
367 f->f_bufptr = bufptr;
368 if (bufptr == f->f_bufend)
369 Util_DropReadAhead(f);
370 } else {
371 bufptr = f->f_bufptr;
372 buf = f->f_buf;
373 f->f_buf = NULL; /* Force new readahead buffer */
374 s = Util_ReadAheadGetLineSkip(f, skip+len,
375 bufsize + (bufsize>>2));
376 if (s == NULL) {
377 PyMem_Free(buf);
378 return NULL;
380 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
381 PyMem_Free(buf);
383 return s;
386 /* ===================================================================== */
387 /* Methods of BZ2File. */
389 PyDoc_STRVAR(BZ2File_read__doc__,
390 "read([size]) -> string\n\
392 Read at most size uncompressed bytes, returned as a string. If the size\n\
393 argument is negative or omitted, read until EOF is reached.\n\
396 /* This is a hacked version of Python's fileobject.c:file_read(). */
397 static PyObject *
398 BZ2File_read(BZ2FileObject *self, PyObject *args)
400 long bytesrequested = -1;
401 size_t bytesread, buffersize, chunksize;
402 int bzerror;
403 PyObject *ret = NULL;
405 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
406 return NULL;
408 ACQUIRE_LOCK(self);
409 switch (self->mode) {
410 case MODE_READ:
411 break;
412 case MODE_READ_EOF:
413 ret = PyBytes_FromStringAndSize("", 0);
414 goto cleanup;
415 case MODE_CLOSED:
416 PyErr_SetString(PyExc_ValueError,
417 "I/O operation on closed file");
418 goto cleanup;
419 default:
420 PyErr_SetString(PyExc_IOError,
421 "file is not ready for reading");
422 goto cleanup;
425 if (bytesrequested < 0)
426 buffersize = Util_NewBufferSize((size_t)0);
427 else
428 buffersize = bytesrequested;
429 if (buffersize > INT_MAX) {
430 PyErr_SetString(PyExc_OverflowError,
431 "requested number of bytes is "
432 "more than a Python string can hold");
433 goto cleanup;
435 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
436 if (ret == NULL || buffersize == 0)
437 goto cleanup;
438 bytesread = 0;
440 for (;;) {
441 Py_BEGIN_ALLOW_THREADS
442 chunksize = BZ2_bzRead(&bzerror, self->fp,
443 BUF(ret)+bytesread,
444 buffersize-bytesread);
445 self->pos += chunksize;
446 Py_END_ALLOW_THREADS
447 bytesread += chunksize;
448 if (bzerror == BZ_STREAM_END) {
449 self->size = self->pos;
450 self->mode = MODE_READ_EOF;
451 break;
452 } else if (bzerror != BZ_OK) {
453 Util_CatchBZ2Error(bzerror);
454 Py_DECREF(ret);
455 ret = NULL;
456 goto cleanup;
458 if (bytesrequested < 0) {
459 buffersize = Util_NewBufferSize(buffersize);
460 if (_PyBytes_Resize(&ret, buffersize) < 0) {
461 ret = NULL;
462 goto cleanup;
464 } else {
465 break;
468 if (bytesread != buffersize) {
469 if (_PyBytes_Resize(&ret, bytesread) < 0) {
470 ret = NULL;
474 cleanup:
475 RELEASE_LOCK(self);
476 return ret;
479 PyDoc_STRVAR(BZ2File_readline__doc__,
480 "readline([size]) -> string\n\
482 Return the next line from the file, as a string, retaining newline.\n\
483 A non-negative size argument will limit the maximum number of bytes to\n\
484 return (an incomplete line may be returned then). Return an empty\n\
485 string at EOF.\n\
488 static PyObject *
489 BZ2File_readline(BZ2FileObject *self, PyObject *args)
491 PyObject *ret = NULL;
492 int sizehint = -1;
494 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
495 return NULL;
497 ACQUIRE_LOCK(self);
498 switch (self->mode) {
499 case MODE_READ:
500 break;
501 case MODE_READ_EOF:
502 ret = PyBytes_FromStringAndSize("", 0);
503 goto cleanup;
504 case MODE_CLOSED:
505 PyErr_SetString(PyExc_ValueError,
506 "I/O operation on closed file");
507 goto cleanup;
508 default:
509 PyErr_SetString(PyExc_IOError,
510 "file is not ready for reading");
511 goto cleanup;
514 if (sizehint == 0)
515 ret = PyBytes_FromStringAndSize("", 0);
516 else
517 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
519 cleanup:
520 RELEASE_LOCK(self);
521 return ret;
524 PyDoc_STRVAR(BZ2File_readlines__doc__,
525 "readlines([size]) -> list\n\
527 Call readline() repeatedly and return a list of lines read.\n\
528 The optional size argument, if given, is an approximate bound on the\n\
529 total number of bytes in the lines returned.\n\
532 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
533 static PyObject *
534 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
536 long sizehint = 0;
537 PyObject *list = NULL;
538 PyObject *line;
539 char small_buffer[SMALLCHUNK];
540 char *buffer = small_buffer;
541 size_t buffersize = SMALLCHUNK;
542 PyObject *big_buffer = NULL;
543 size_t nfilled = 0;
544 size_t nread;
545 size_t totalread = 0;
546 char *p, *q, *end;
547 int err;
548 int shortread = 0;
549 int bzerror;
551 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
552 return NULL;
554 ACQUIRE_LOCK(self);
555 switch (self->mode) {
556 case MODE_READ:
557 break;
558 case MODE_READ_EOF:
559 list = PyList_New(0);
560 goto cleanup;
561 case MODE_CLOSED:
562 PyErr_SetString(PyExc_ValueError,
563 "I/O operation on closed file");
564 goto cleanup;
565 default:
566 PyErr_SetString(PyExc_IOError,
567 "file is not ready for reading");
568 goto cleanup;
571 if ((list = PyList_New(0)) == NULL)
572 goto cleanup;
574 for (;;) {
575 Py_BEGIN_ALLOW_THREADS
576 nread = BZ2_bzRead(&bzerror, self->fp,
577 buffer+nfilled, buffersize-nfilled);
578 self->pos += nread;
579 Py_END_ALLOW_THREADS
580 if (bzerror == BZ_STREAM_END) {
581 self->size = self->pos;
582 self->mode = MODE_READ_EOF;
583 if (nread == 0) {
584 sizehint = 0;
585 break;
587 shortread = 1;
588 } else if (bzerror != BZ_OK) {
589 Util_CatchBZ2Error(bzerror);
590 error:
591 Py_DECREF(list);
592 list = NULL;
593 goto cleanup;
595 totalread += nread;
596 p = memchr(buffer+nfilled, '\n', nread);
597 if (!shortread && p == NULL) {
598 /* Need a larger buffer to fit this line */
599 nfilled += nread;
600 buffersize *= 2;
601 if (buffersize > INT_MAX) {
602 PyErr_SetString(PyExc_OverflowError,
603 "line is longer than a Python string can hold");
604 goto error;
606 if (big_buffer == NULL) {
607 /* Create the big buffer */
608 big_buffer = PyBytes_FromStringAndSize(
609 NULL, buffersize);
610 if (big_buffer == NULL)
611 goto error;
612 buffer = PyBytes_AS_STRING(big_buffer);
613 memcpy(buffer, small_buffer, nfilled);
615 else {
616 /* Grow the big buffer */
617 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
618 big_buffer = NULL;
619 goto error;
621 buffer = PyBytes_AS_STRING(big_buffer);
623 continue;
625 end = buffer+nfilled+nread;
626 q = buffer;
627 while (p != NULL) {
628 /* Process complete lines */
629 p++;
630 line = PyBytes_FromStringAndSize(q, p-q);
631 if (line == NULL)
632 goto error;
633 err = PyList_Append(list, line);
634 Py_DECREF(line);
635 if (err != 0)
636 goto error;
637 q = p;
638 p = memchr(q, '\n', end-q);
640 /* Move the remaining incomplete line to the start */
641 nfilled = end-q;
642 memmove(buffer, q, nfilled);
643 if (sizehint > 0)
644 if (totalread >= (size_t)sizehint)
645 break;
646 if (shortread) {
647 sizehint = 0;
648 break;
651 if (nfilled != 0) {
652 /* Partial last line */
653 line = PyBytes_FromStringAndSize(buffer, nfilled);
654 if (line == NULL)
655 goto error;
656 if (sizehint > 0) {
657 /* Need to complete the last line */
658 PyObject *rest = Util_GetLine(self, 0);
659 if (rest == NULL) {
660 Py_DECREF(line);
661 goto error;
663 PyBytes_Concat(&line, rest);
664 Py_DECREF(rest);
665 if (line == NULL)
666 goto error;
668 err = PyList_Append(list, line);
669 Py_DECREF(line);
670 if (err != 0)
671 goto error;
674 cleanup:
675 RELEASE_LOCK(self);
676 if (big_buffer) {
677 Py_DECREF(big_buffer);
679 return list;
682 PyDoc_STRVAR(BZ2File_write__doc__,
683 "write(data) -> None\n\
685 Write the 'data' string to file. Note that due to buffering, close() may\n\
686 be needed before the file on disk reflects the data written.\n\
689 /* This is a hacked version of Python's fileobject.c:file_write(). */
690 static PyObject *
691 BZ2File_write(BZ2FileObject *self, PyObject *args)
693 PyObject *ret = NULL;
694 Py_buffer pbuf;
695 char *buf;
696 int len;
697 int bzerror;
699 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
700 return NULL;
701 buf = pbuf.buf;
702 len = pbuf.len;
704 ACQUIRE_LOCK(self);
705 switch (self->mode) {
706 case MODE_WRITE:
707 break;
709 case MODE_CLOSED:
710 PyErr_SetString(PyExc_ValueError,
711 "I/O operation on closed file");
712 goto cleanup;
714 default:
715 PyErr_SetString(PyExc_IOError,
716 "file is not ready for writing");
717 goto cleanup;
720 Py_BEGIN_ALLOW_THREADS
721 BZ2_bzWrite (&bzerror, self->fp, buf, len);
722 self->pos += len;
723 Py_END_ALLOW_THREADS
725 if (bzerror != BZ_OK) {
726 Util_CatchBZ2Error(bzerror);
727 goto cleanup;
730 Py_INCREF(Py_None);
731 ret = Py_None;
733 cleanup:
734 PyBuffer_Release(&pbuf);
735 RELEASE_LOCK(self);
736 return ret;
739 PyDoc_STRVAR(BZ2File_writelines__doc__,
740 "writelines(sequence_of_strings) -> None\n\
742 Write the sequence of strings to the file. Note that newlines are not\n\
743 added. The sequence can be any iterable object producing strings. This is\n\
744 equivalent to calling write() for each string.\n\
747 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
748 static PyObject *
749 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
751 #define CHUNKSIZE 1000
752 PyObject *list = NULL;
753 PyObject *iter = NULL;
754 PyObject *ret = NULL;
755 PyObject *line;
756 int i, j, index, len, islist;
757 int bzerror;
759 ACQUIRE_LOCK(self);
760 switch (self->mode) {
761 case MODE_WRITE:
762 break;
764 case MODE_CLOSED:
765 PyErr_SetString(PyExc_ValueError,
766 "I/O operation on closed file");
767 goto error;
769 default:
770 PyErr_SetString(PyExc_IOError,
771 "file is not ready for writing");
772 goto error;
775 islist = PyList_Check(seq);
776 if (!islist) {
777 iter = PyObject_GetIter(seq);
778 if (iter == NULL) {
779 PyErr_SetString(PyExc_TypeError,
780 "writelines() requires an iterable argument");
781 goto error;
783 list = PyList_New(CHUNKSIZE);
784 if (list == NULL)
785 goto error;
788 /* Strategy: slurp CHUNKSIZE lines into a private list,
789 checking that they are all strings, then write that list
790 without holding the interpreter lock, then come back for more. */
791 for (index = 0; ; index += CHUNKSIZE) {
792 if (islist) {
793 Py_XDECREF(list);
794 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
795 if (list == NULL)
796 goto error;
797 j = PyList_GET_SIZE(list);
799 else {
800 for (j = 0; j < CHUNKSIZE; j++) {
801 line = PyIter_Next(iter);
802 if (line == NULL) {
803 if (PyErr_Occurred())
804 goto error;
805 break;
807 PyList_SetItem(list, j, line);
810 if (j == 0)
811 break;
813 /* Check that all entries are indeed byte strings. If not,
814 apply the same rules as for file.write() and
815 convert the rets to strings. This is slow, but
816 seems to be the only way since all conversion APIs
817 could potentially execute Python code. */
818 for (i = 0; i < j; i++) {
819 PyObject *v = PyList_GET_ITEM(list, i);
820 if (!PyBytes_Check(v)) {
821 const char *buffer;
822 Py_ssize_t len;
823 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
824 PyErr_SetString(PyExc_TypeError,
825 "writelines() "
826 "argument must be "
827 "a sequence of "
828 "bytes objects");
829 goto error;
831 line = PyBytes_FromStringAndSize(buffer,
832 len);
833 if (line == NULL)
834 goto error;
835 Py_DECREF(v);
836 PyList_SET_ITEM(list, i, line);
840 /* Since we are releasing the global lock, the
841 following code may *not* execute Python code. */
842 Py_BEGIN_ALLOW_THREADS
843 for (i = 0; i < j; i++) {
844 line = PyList_GET_ITEM(list, i);
845 len = PyBytes_GET_SIZE(line);
846 BZ2_bzWrite (&bzerror, self->fp,
847 PyBytes_AS_STRING(line), len);
848 if (bzerror != BZ_OK) {
849 Py_BLOCK_THREADS
850 Util_CatchBZ2Error(bzerror);
851 goto error;
854 Py_END_ALLOW_THREADS
856 if (j < CHUNKSIZE)
857 break;
860 Py_INCREF(Py_None);
861 ret = Py_None;
863 error:
864 RELEASE_LOCK(self);
865 Py_XDECREF(list);
866 Py_XDECREF(iter);
867 return ret;
868 #undef CHUNKSIZE
871 PyDoc_STRVAR(BZ2File_seek__doc__,
872 "seek(offset [, whence]) -> None\n\
874 Move to new file position. Argument offset is a byte count. Optional\n\
875 argument whence defaults to 0 (offset from start of file, offset\n\
876 should be >= 0); other values are 1 (move relative to current position,\n\
877 positive or negative), and 2 (move relative to end of file, usually\n\
878 negative, although many platforms allow seeking beyond the end of a file).\n\
880 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
881 the operation may be extremely slow.\n\
884 static PyObject *
885 BZ2File_seek(BZ2FileObject *self, PyObject *args)
887 int where = 0;
888 PyObject *offobj;
889 Py_off_t offset;
890 char small_buffer[SMALLCHUNK];
891 char *buffer = small_buffer;
892 size_t buffersize = SMALLCHUNK;
893 Py_off_t bytesread = 0;
894 size_t readsize;
895 int chunksize;
896 int bzerror;
897 PyObject *ret = NULL;
899 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
900 return NULL;
901 #if !defined(HAVE_LARGEFILE_SUPPORT)
902 offset = PyLong_AsLong(offobj);
903 #else
904 offset = PyLong_Check(offobj) ?
905 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
906 #endif
907 if (PyErr_Occurred())
908 return NULL;
910 ACQUIRE_LOCK(self);
911 Util_DropReadAhead(self);
912 switch (self->mode) {
913 case MODE_READ:
914 case MODE_READ_EOF:
915 break;
917 case MODE_CLOSED:
918 PyErr_SetString(PyExc_ValueError,
919 "I/O operation on closed file");
920 goto cleanup;
922 default:
923 PyErr_SetString(PyExc_IOError,
924 "seek works only while reading");
925 goto cleanup;
928 if (where == 2) {
929 if (self->size == -1) {
930 assert(self->mode != MODE_READ_EOF);
931 for (;;) {
932 Py_BEGIN_ALLOW_THREADS
933 chunksize = BZ2_bzRead(&bzerror, self->fp,
934 buffer, buffersize);
935 self->pos += chunksize;
936 Py_END_ALLOW_THREADS
938 bytesread += chunksize;
939 if (bzerror == BZ_STREAM_END) {
940 break;
941 } else if (bzerror != BZ_OK) {
942 Util_CatchBZ2Error(bzerror);
943 goto cleanup;
946 self->mode = MODE_READ_EOF;
947 self->size = self->pos;
948 bytesread = 0;
950 offset = self->size + offset;
951 } else if (where == 1) {
952 offset = self->pos + offset;
955 /* Before getting here, offset must be the absolute position the file
956 * pointer should be set to. */
958 if (offset >= self->pos) {
959 /* we can move forward */
960 offset -= self->pos;
961 } else {
962 /* we cannot move back, so rewind the stream */
963 BZ2_bzReadClose(&bzerror, self->fp);
964 if (bzerror != BZ_OK) {
965 Util_CatchBZ2Error(bzerror);
966 goto cleanup;
968 rewind(self->rawfp);
969 self->pos = 0;
970 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
971 0, 0, NULL, 0);
972 if (bzerror != BZ_OK) {
973 Util_CatchBZ2Error(bzerror);
974 goto cleanup;
976 self->mode = MODE_READ;
979 if (offset <= 0 || self->mode == MODE_READ_EOF)
980 goto exit;
982 /* Before getting here, offset must be set to the number of bytes
983 * to walk forward. */
984 for (;;) {
985 if (offset-bytesread > buffersize)
986 readsize = buffersize;
987 else
988 /* offset might be wider that readsize, but the result
989 * of the subtraction is bound by buffersize (see the
990 * condition above). buffersize is 8192. */
991 readsize = (size_t)(offset-bytesread);
992 Py_BEGIN_ALLOW_THREADS
993 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
994 self->pos += chunksize;
995 Py_END_ALLOW_THREADS
996 bytesread += chunksize;
997 if (bzerror == BZ_STREAM_END) {
998 self->size = self->pos;
999 self->mode = MODE_READ_EOF;
1000 break;
1001 } else if (bzerror != BZ_OK) {
1002 Util_CatchBZ2Error(bzerror);
1003 goto cleanup;
1005 if (bytesread == offset)
1006 break;
1009 exit:
1010 Py_INCREF(Py_None);
1011 ret = Py_None;
1013 cleanup:
1014 RELEASE_LOCK(self);
1015 return ret;
1018 PyDoc_STRVAR(BZ2File_tell__doc__,
1019 "tell() -> int\n\
1021 Return the current file position, an integer (may be a long integer).\n\
1024 static PyObject *
1025 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1027 PyObject *ret = NULL;
1029 if (self->mode == MODE_CLOSED) {
1030 PyErr_SetString(PyExc_ValueError,
1031 "I/O operation on closed file");
1032 goto cleanup;
1035 #if !defined(HAVE_LARGEFILE_SUPPORT)
1036 ret = PyLong_FromLong(self->pos);
1037 #else
1038 ret = PyLong_FromLongLong(self->pos);
1039 #endif
1041 cleanup:
1042 return ret;
1045 PyDoc_STRVAR(BZ2File_close__doc__,
1046 "close() -> None or (perhaps) an integer\n\
1048 Close the file. Sets data attribute .closed to true. A closed file\n\
1049 cannot be used for further I/O operations. close() may be called more\n\
1050 than once without error.\n\
1053 static PyObject *
1054 BZ2File_close(BZ2FileObject *self)
1056 PyObject *ret = NULL;
1057 int bzerror = BZ_OK;
1059 if (self->mode == MODE_CLOSED) {
1060 Py_RETURN_NONE;
1063 ACQUIRE_LOCK(self);
1064 switch (self->mode) {
1065 case MODE_READ:
1066 case MODE_READ_EOF:
1067 BZ2_bzReadClose(&bzerror, self->fp);
1068 break;
1069 case MODE_WRITE:
1070 BZ2_bzWriteClose(&bzerror, self->fp,
1071 0, NULL, NULL);
1072 break;
1074 self->mode = MODE_CLOSED;
1075 fclose(self->rawfp);
1076 self->rawfp = NULL;
1077 if (bzerror == BZ_OK) {
1078 Py_INCREF(Py_None);
1079 ret = Py_None;
1081 else {
1082 Util_CatchBZ2Error(bzerror);
1085 RELEASE_LOCK(self);
1086 return ret;
1089 PyDoc_STRVAR(BZ2File_enter_doc,
1090 "__enter__() -> self.");
1092 static PyObject *
1093 BZ2File_enter(BZ2FileObject *self)
1095 if (self->mode == MODE_CLOSED) {
1096 PyErr_SetString(PyExc_ValueError,
1097 "I/O operation on closed file");
1098 return NULL;
1100 Py_INCREF(self);
1101 return (PyObject *) self;
1104 PyDoc_STRVAR(BZ2File_exit_doc,
1105 "__exit__(*excinfo) -> None. Closes the file.");
1107 static PyObject *
1108 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1110 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1111 if (!ret)
1112 /* If error occurred, pass through */
1113 return NULL;
1114 Py_DECREF(ret);
1115 Py_RETURN_NONE;
1119 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1121 static PyMethodDef BZ2File_methods[] = {
1122 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1123 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1124 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1125 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1126 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1127 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1128 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1129 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1130 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1131 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1132 {NULL, NULL} /* sentinel */
1136 /* ===================================================================== */
1137 /* Getters and setters of BZ2File. */
1139 static PyObject *
1140 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1142 return PyLong_FromLong(self->mode == MODE_CLOSED);
1145 static PyGetSetDef BZ2File_getset[] = {
1146 {"closed", (getter)BZ2File_get_closed, NULL,
1147 "True if the file is closed"},
1148 {NULL} /* Sentinel */
1152 /* ===================================================================== */
1153 /* Slot definitions for BZ2File_Type. */
1155 static int
1156 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1158 static char *kwlist[] = {"filename", "mode", "buffering",
1159 "compresslevel", 0};
1160 char *name;
1161 char *mode = "r";
1162 int buffering = -1;
1163 int compresslevel = 9;
1164 int bzerror;
1165 int mode_char = 0;
1167 self->size = -1;
1169 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
1170 kwlist, &name, &mode, &buffering,
1171 &compresslevel))
1172 return -1;
1174 if (compresslevel < 1 || compresslevel > 9) {
1175 PyErr_SetString(PyExc_ValueError,
1176 "compresslevel must be between 1 and 9");
1177 return -1;
1180 for (;;) {
1181 int error = 0;
1182 switch (*mode) {
1183 case 'r':
1184 case 'w':
1185 if (mode_char)
1186 error = 1;
1187 mode_char = *mode;
1188 break;
1190 case 'b':
1191 break;
1193 default:
1194 error = 1;
1195 break;
1197 if (error) {
1198 PyErr_Format(PyExc_ValueError,
1199 "invalid mode char %c", *mode);
1200 return -1;
1202 mode++;
1203 if (*mode == '\0')
1204 break;
1207 if (mode_char == 0) {
1208 mode_char = 'r';
1211 mode = (mode_char == 'r') ? "rb" : "wb";
1213 self->rawfp = fopen(name, mode);
1214 if (self->rawfp == NULL) {
1215 PyErr_SetFromErrno(PyExc_IOError);
1216 return -1;
1218 /* XXX Ignore buffering */
1220 /* From now on, we have stuff to dealloc, so jump to error label
1221 * instead of returning */
1223 #ifdef WITH_THREAD
1224 self->lock = PyThread_allocate_lock();
1225 if (!self->lock) {
1226 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1227 goto error;
1229 #endif
1231 if (mode_char == 'r')
1232 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1233 0, 0, NULL, 0);
1234 else
1235 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1236 compresslevel, 0, 0);
1238 if (bzerror != BZ_OK) {
1239 Util_CatchBZ2Error(bzerror);
1240 goto error;
1243 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1245 return 0;
1247 error:
1248 fclose(self->rawfp);
1249 self->rawfp = NULL;
1250 #ifdef WITH_THREAD
1251 if (self->lock) {
1252 PyThread_free_lock(self->lock);
1253 self->lock = NULL;
1255 #endif
1256 return -1;
1259 static void
1260 BZ2File_dealloc(BZ2FileObject *self)
1262 int bzerror;
1263 #ifdef WITH_THREAD
1264 if (self->lock)
1265 PyThread_free_lock(self->lock);
1266 #endif
1267 switch (self->mode) {
1268 case MODE_READ:
1269 case MODE_READ_EOF:
1270 BZ2_bzReadClose(&bzerror, self->fp);
1271 break;
1272 case MODE_WRITE:
1273 BZ2_bzWriteClose(&bzerror, self->fp,
1274 0, NULL, NULL);
1275 break;
1277 Util_DropReadAhead(self);
1278 if (self->rawfp != NULL)
1279 fclose(self->rawfp);
1280 Py_TYPE(self)->tp_free((PyObject *)self);
1283 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1284 static PyObject *
1285 BZ2File_getiter(BZ2FileObject *self)
1287 if (self->mode == MODE_CLOSED) {
1288 PyErr_SetString(PyExc_ValueError,
1289 "I/O operation on closed file");
1290 return NULL;
1292 Py_INCREF((PyObject*)self);
1293 return (PyObject *)self;
1296 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1297 #define READAHEAD_BUFSIZE 8192
1298 static PyObject *
1299 BZ2File_iternext(BZ2FileObject *self)
1301 PyBytesObject* ret;
1302 ACQUIRE_LOCK(self);
1303 if (self->mode == MODE_CLOSED) {
1304 RELEASE_LOCK(self);
1305 PyErr_SetString(PyExc_ValueError,
1306 "I/O operation on closed file");
1307 return NULL;
1309 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1310 RELEASE_LOCK(self);
1311 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1312 Py_XDECREF(ret);
1313 return NULL;
1315 return (PyObject *)ret;
1318 /* ===================================================================== */
1319 /* BZ2File_Type definition. */
1321 PyDoc_VAR(BZ2File__doc__) =
1322 PyDoc_STR(
1323 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1325 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1326 writing. When opened for writing, the file will be created if it doesn't\n\
1327 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1328 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1329 is given, must be a number between 1 and 9.\n\
1330 Data read is always returned in bytes; data written ought to be bytes.\n\
1333 static PyTypeObject BZ2File_Type = {
1334 PyVarObject_HEAD_INIT(NULL, 0)
1335 "bz2.BZ2File", /*tp_name*/
1336 sizeof(BZ2FileObject), /*tp_basicsize*/
1337 0, /*tp_itemsize*/
1338 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1339 0, /*tp_print*/
1340 0, /*tp_getattr*/
1341 0, /*tp_setattr*/
1342 0, /*tp_reserved*/
1343 0, /*tp_repr*/
1344 0, /*tp_as_number*/
1345 0, /*tp_as_sequence*/
1346 0, /*tp_as_mapping*/
1347 0, /*tp_hash*/
1348 0, /*tp_call*/
1349 0, /*tp_str*/
1350 PyObject_GenericGetAttr,/*tp_getattro*/
1351 PyObject_GenericSetAttr,/*tp_setattro*/
1352 0, /*tp_as_buffer*/
1353 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1354 BZ2File__doc__, /*tp_doc*/
1355 0, /*tp_traverse*/
1356 0, /*tp_clear*/
1357 0, /*tp_richcompare*/
1358 0, /*tp_weaklistoffset*/
1359 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1360 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1361 BZ2File_methods, /*tp_methods*/
1362 0, /*tp_members*/
1363 BZ2File_getset, /*tp_getset*/
1364 0, /*tp_base*/
1365 0, /*tp_dict*/
1366 0, /*tp_descr_get*/
1367 0, /*tp_descr_set*/
1368 0, /*tp_dictoffset*/
1369 (initproc)BZ2File_init, /*tp_init*/
1370 PyType_GenericAlloc, /*tp_alloc*/
1371 PyType_GenericNew, /*tp_new*/
1372 PyObject_Free, /*tp_free*/
1373 0, /*tp_is_gc*/
1377 /* ===================================================================== */
1378 /* Methods of BZ2Comp. */
1380 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1381 "compress(data) -> string\n\
1383 Provide more data to the compressor object. It will return chunks of\n\
1384 compressed data whenever possible. When you've finished providing data\n\
1385 to compress, call the flush() method to finish the compression process,\n\
1386 and return what is left in the internal buffers.\n\
1389 static PyObject *
1390 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1392 Py_buffer pdata;
1393 char *data;
1394 int datasize;
1395 int bufsize = SMALLCHUNK;
1396 PY_LONG_LONG totalout;
1397 PyObject *ret = NULL;
1398 bz_stream *bzs = &self->bzs;
1399 int bzerror;
1401 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1402 return NULL;
1403 data = pdata.buf;
1404 datasize = pdata.len;
1406 if (datasize == 0) {
1407 PyBuffer_Release(&pdata);
1408 return PyBytes_FromStringAndSize("", 0);
1411 ACQUIRE_LOCK(self);
1412 if (!self->running) {
1413 PyErr_SetString(PyExc_ValueError,
1414 "this object was already flushed");
1415 goto error;
1418 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1419 if (!ret)
1420 goto error;
1422 bzs->next_in = data;
1423 bzs->avail_in = datasize;
1424 bzs->next_out = BUF(ret);
1425 bzs->avail_out = bufsize;
1427 totalout = BZS_TOTAL_OUT(bzs);
1429 for (;;) {
1430 Py_BEGIN_ALLOW_THREADS
1431 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1432 Py_END_ALLOW_THREADS
1433 if (bzerror != BZ_RUN_OK) {
1434 Util_CatchBZ2Error(bzerror);
1435 goto error;
1437 if (bzs->avail_in == 0)
1438 break; /* no more input data */
1439 if (bzs->avail_out == 0) {
1440 bufsize = Util_NewBufferSize(bufsize);
1441 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1442 BZ2_bzCompressEnd(bzs);
1443 goto error;
1445 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1446 - totalout);
1447 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1451 if (_PyBytes_Resize(&ret,
1452 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1453 goto error;
1455 RELEASE_LOCK(self);
1456 PyBuffer_Release(&pdata);
1457 return ret;
1459 error:
1460 RELEASE_LOCK(self);
1461 PyBuffer_Release(&pdata);
1462 Py_XDECREF(ret);
1463 return NULL;
1466 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1467 "flush() -> string\n\
1469 Finish the compression process and return what is left in internal buffers.\n\
1470 You must not use the compressor object after calling this method.\n\
1473 static PyObject *
1474 BZ2Comp_flush(BZ2CompObject *self)
1476 int bufsize = SMALLCHUNK;
1477 PyObject *ret = NULL;
1478 bz_stream *bzs = &self->bzs;
1479 PY_LONG_LONG totalout;
1480 int bzerror;
1482 ACQUIRE_LOCK(self);
1483 if (!self->running) {
1484 PyErr_SetString(PyExc_ValueError, "object was already "
1485 "flushed");
1486 goto error;
1488 self->running = 0;
1490 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1491 if (!ret)
1492 goto error;
1494 bzs->next_out = BUF(ret);
1495 bzs->avail_out = bufsize;
1497 totalout = BZS_TOTAL_OUT(bzs);
1499 for (;;) {
1500 Py_BEGIN_ALLOW_THREADS
1501 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1502 Py_END_ALLOW_THREADS
1503 if (bzerror == BZ_STREAM_END) {
1504 break;
1505 } else if (bzerror != BZ_FINISH_OK) {
1506 Util_CatchBZ2Error(bzerror);
1507 goto error;
1509 if (bzs->avail_out == 0) {
1510 bufsize = Util_NewBufferSize(bufsize);
1511 if (_PyBytes_Resize(&ret, bufsize) < 0)
1512 goto error;
1513 bzs->next_out = BUF(ret);
1514 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1515 - totalout);
1516 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1520 if (bzs->avail_out != 0) {
1521 if (_PyBytes_Resize(&ret,
1522 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1523 goto error;
1526 RELEASE_LOCK(self);
1527 return ret;
1529 error:
1530 RELEASE_LOCK(self);
1531 Py_XDECREF(ret);
1532 return NULL;
1535 static PyMethodDef BZ2Comp_methods[] = {
1536 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1537 BZ2Comp_compress__doc__},
1538 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1539 BZ2Comp_flush__doc__},
1540 {NULL, NULL} /* sentinel */
1544 /* ===================================================================== */
1545 /* Slot definitions for BZ2Comp_Type. */
1547 static int
1548 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1550 int compresslevel = 9;
1551 int bzerror;
1552 static char *kwlist[] = {"compresslevel", 0};
1554 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1555 kwlist, &compresslevel))
1556 return -1;
1558 if (compresslevel < 1 || compresslevel > 9) {
1559 PyErr_SetString(PyExc_ValueError,
1560 "compresslevel must be between 1 and 9");
1561 goto error;
1564 #ifdef WITH_THREAD
1565 self->lock = PyThread_allocate_lock();
1566 if (!self->lock) {
1567 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1568 goto error;
1570 #endif
1572 memset(&self->bzs, 0, sizeof(bz_stream));
1573 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1574 if (bzerror != BZ_OK) {
1575 Util_CatchBZ2Error(bzerror);
1576 goto error;
1579 self->running = 1;
1581 return 0;
1582 error:
1583 #ifdef WITH_THREAD
1584 if (self->lock) {
1585 PyThread_free_lock(self->lock);
1586 self->lock = NULL;
1588 #endif
1589 return -1;
1592 static void
1593 BZ2Comp_dealloc(BZ2CompObject *self)
1595 #ifdef WITH_THREAD
1596 if (self->lock)
1597 PyThread_free_lock(self->lock);
1598 #endif
1599 BZ2_bzCompressEnd(&self->bzs);
1600 Py_TYPE(self)->tp_free((PyObject *)self);
1604 /* ===================================================================== */
1605 /* BZ2Comp_Type definition. */
1607 PyDoc_STRVAR(BZ2Comp__doc__,
1608 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1610 Create a new compressor object. This object may be used to compress\n\
1611 data sequentially. If you want to compress data in one shot, use the\n\
1612 compress() function instead. The compresslevel parameter, if given,\n\
1613 must be a number between 1 and 9.\n\
1616 static PyTypeObject BZ2Comp_Type = {
1617 PyVarObject_HEAD_INIT(NULL, 0)
1618 "bz2.BZ2Compressor", /*tp_name*/
1619 sizeof(BZ2CompObject), /*tp_basicsize*/
1620 0, /*tp_itemsize*/
1621 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1622 0, /*tp_print*/
1623 0, /*tp_getattr*/
1624 0, /*tp_setattr*/
1625 0, /*tp_reserved*/
1626 0, /*tp_repr*/
1627 0, /*tp_as_number*/
1628 0, /*tp_as_sequence*/
1629 0, /*tp_as_mapping*/
1630 0, /*tp_hash*/
1631 0, /*tp_call*/
1632 0, /*tp_str*/
1633 PyObject_GenericGetAttr,/*tp_getattro*/
1634 PyObject_GenericSetAttr,/*tp_setattro*/
1635 0, /*tp_as_buffer*/
1636 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1637 BZ2Comp__doc__, /*tp_doc*/
1638 0, /*tp_traverse*/
1639 0, /*tp_clear*/
1640 0, /*tp_richcompare*/
1641 0, /*tp_weaklistoffset*/
1642 0, /*tp_iter*/
1643 0, /*tp_iternext*/
1644 BZ2Comp_methods, /*tp_methods*/
1645 0, /*tp_members*/
1646 0, /*tp_getset*/
1647 0, /*tp_base*/
1648 0, /*tp_dict*/
1649 0, /*tp_descr_get*/
1650 0, /*tp_descr_set*/
1651 0, /*tp_dictoffset*/
1652 (initproc)BZ2Comp_init, /*tp_init*/
1653 PyType_GenericAlloc, /*tp_alloc*/
1654 PyType_GenericNew, /*tp_new*/
1655 PyObject_Free, /*tp_free*/
1656 0, /*tp_is_gc*/
1660 /* ===================================================================== */
1661 /* Members of BZ2Decomp. */
1663 #undef OFF
1664 #define OFF(x) offsetof(BZ2DecompObject, x)
1666 static PyMemberDef BZ2Decomp_members[] = {
1667 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1668 {NULL} /* Sentinel */
1672 /* ===================================================================== */
1673 /* Methods of BZ2Decomp. */
1675 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1676 "decompress(data) -> string\n\
1678 Provide more data to the decompressor object. It will return chunks\n\
1679 of decompressed data whenever possible. If you try to decompress data\n\
1680 after the end of stream is found, EOFError will be raised. If any data\n\
1681 was found after the end of stream, it'll be ignored and saved in\n\
1682 unused_data attribute.\n\
1685 static PyObject *
1686 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1688 Py_buffer pdata;
1689 char *data;
1690 int datasize;
1691 int bufsize = SMALLCHUNK;
1692 PY_LONG_LONG totalout;
1693 PyObject *ret = NULL;
1694 bz_stream *bzs = &self->bzs;
1695 int bzerror;
1697 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1698 return NULL;
1699 data = pdata.buf;
1700 datasize = pdata.len;
1702 ACQUIRE_LOCK(self);
1703 if (!self->running) {
1704 PyErr_SetString(PyExc_EOFError, "end of stream was "
1705 "already found");
1706 goto error;
1709 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1710 if (!ret)
1711 goto error;
1713 bzs->next_in = data;
1714 bzs->avail_in = datasize;
1715 bzs->next_out = BUF(ret);
1716 bzs->avail_out = bufsize;
1718 totalout = BZS_TOTAL_OUT(bzs);
1720 for (;;) {
1721 Py_BEGIN_ALLOW_THREADS
1722 bzerror = BZ2_bzDecompress(bzs);
1723 Py_END_ALLOW_THREADS
1724 if (bzerror == BZ_STREAM_END) {
1725 if (bzs->avail_in != 0) {
1726 Py_DECREF(self->unused_data);
1727 self->unused_data =
1728 PyBytes_FromStringAndSize(bzs->next_in,
1729 bzs->avail_in);
1731 self->running = 0;
1732 break;
1734 if (bzerror != BZ_OK) {
1735 Util_CatchBZ2Error(bzerror);
1736 goto error;
1738 if (bzs->avail_in == 0)
1739 break; /* no more input data */
1740 if (bzs->avail_out == 0) {
1741 bufsize = Util_NewBufferSize(bufsize);
1742 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1743 BZ2_bzDecompressEnd(bzs);
1744 goto error;
1746 bzs->next_out = BUF(ret);
1747 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1748 - totalout);
1749 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1753 if (bzs->avail_out != 0) {
1754 if (_PyBytes_Resize(&ret,
1755 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1756 goto error;
1759 RELEASE_LOCK(self);
1760 PyBuffer_Release(&pdata);
1761 return ret;
1763 error:
1764 RELEASE_LOCK(self);
1765 PyBuffer_Release(&pdata);
1766 Py_XDECREF(ret);
1767 return NULL;
1770 static PyMethodDef BZ2Decomp_methods[] = {
1771 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1772 {NULL, NULL} /* sentinel */
1776 /* ===================================================================== */
1777 /* Slot definitions for BZ2Decomp_Type. */
1779 static int
1780 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1782 int bzerror;
1784 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1785 return -1;
1787 #ifdef WITH_THREAD
1788 self->lock = PyThread_allocate_lock();
1789 if (!self->lock) {
1790 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1791 goto error;
1793 #endif
1795 self->unused_data = PyBytes_FromStringAndSize("", 0);
1796 if (!self->unused_data)
1797 goto error;
1799 memset(&self->bzs, 0, sizeof(bz_stream));
1800 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1801 if (bzerror != BZ_OK) {
1802 Util_CatchBZ2Error(bzerror);
1803 goto error;
1806 self->running = 1;
1808 return 0;
1810 error:
1811 #ifdef WITH_THREAD
1812 if (self->lock) {
1813 PyThread_free_lock(self->lock);
1814 self->lock = NULL;
1816 #endif
1817 Py_CLEAR(self->unused_data);
1818 return -1;
1821 static void
1822 BZ2Decomp_dealloc(BZ2DecompObject *self)
1824 #ifdef WITH_THREAD
1825 if (self->lock)
1826 PyThread_free_lock(self->lock);
1827 #endif
1828 Py_XDECREF(self->unused_data);
1829 BZ2_bzDecompressEnd(&self->bzs);
1830 Py_TYPE(self)->tp_free((PyObject *)self);
1834 /* ===================================================================== */
1835 /* BZ2Decomp_Type definition. */
1837 PyDoc_STRVAR(BZ2Decomp__doc__,
1838 "BZ2Decompressor() -> decompressor object\n\
1840 Create a new decompressor object. This object may be used to decompress\n\
1841 data sequentially. If you want to decompress data in one shot, use the\n\
1842 decompress() function instead.\n\
1845 static PyTypeObject BZ2Decomp_Type = {
1846 PyVarObject_HEAD_INIT(NULL, 0)
1847 "bz2.BZ2Decompressor", /*tp_name*/
1848 sizeof(BZ2DecompObject), /*tp_basicsize*/
1849 0, /*tp_itemsize*/
1850 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1851 0, /*tp_print*/
1852 0, /*tp_getattr*/
1853 0, /*tp_setattr*/
1854 0, /*tp_reserved*/
1855 0, /*tp_repr*/
1856 0, /*tp_as_number*/
1857 0, /*tp_as_sequence*/
1858 0, /*tp_as_mapping*/
1859 0, /*tp_hash*/
1860 0, /*tp_call*/
1861 0, /*tp_str*/
1862 PyObject_GenericGetAttr,/*tp_getattro*/
1863 PyObject_GenericSetAttr,/*tp_setattro*/
1864 0, /*tp_as_buffer*/
1865 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1866 BZ2Decomp__doc__, /*tp_doc*/
1867 0, /*tp_traverse*/
1868 0, /*tp_clear*/
1869 0, /*tp_richcompare*/
1870 0, /*tp_weaklistoffset*/
1871 0, /*tp_iter*/
1872 0, /*tp_iternext*/
1873 BZ2Decomp_methods, /*tp_methods*/
1874 BZ2Decomp_members, /*tp_members*/
1875 0, /*tp_getset*/
1876 0, /*tp_base*/
1877 0, /*tp_dict*/
1878 0, /*tp_descr_get*/
1879 0, /*tp_descr_set*/
1880 0, /*tp_dictoffset*/
1881 (initproc)BZ2Decomp_init, /*tp_init*/
1882 PyType_GenericAlloc, /*tp_alloc*/
1883 PyType_GenericNew, /*tp_new*/
1884 PyObject_Free, /*tp_free*/
1885 0, /*tp_is_gc*/
1889 /* ===================================================================== */
1890 /* Module functions. */
1892 PyDoc_STRVAR(bz2_compress__doc__,
1893 "compress(data [, compresslevel=9]) -> string\n\
1895 Compress data in one shot. If you want to compress data sequentially,\n\
1896 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1897 given, must be a number between 1 and 9.\n\
1900 static PyObject *
1901 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1903 int compresslevel=9;
1904 Py_buffer pdata;
1905 char *data;
1906 int datasize;
1907 int bufsize;
1908 PyObject *ret = NULL;
1909 bz_stream _bzs;
1910 bz_stream *bzs = &_bzs;
1911 int bzerror;
1912 static char *kwlist[] = {"data", "compresslevel", 0};
1914 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1915 kwlist, &pdata,
1916 &compresslevel))
1917 return NULL;
1918 data = pdata.buf;
1919 datasize = pdata.len;
1921 if (compresslevel < 1 || compresslevel > 9) {
1922 PyErr_SetString(PyExc_ValueError,
1923 "compresslevel must be between 1 and 9");
1924 PyBuffer_Release(&pdata);
1925 return NULL;
1928 /* Conforming to bz2 manual, this is large enough to fit compressed
1929 * data in one shot. We will check it later anyway. */
1930 bufsize = datasize + (datasize/100+1) + 600;
1932 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1933 if (!ret) {
1934 PyBuffer_Release(&pdata);
1935 return NULL;
1938 memset(bzs, 0, sizeof(bz_stream));
1940 bzs->next_in = data;
1941 bzs->avail_in = datasize;
1942 bzs->next_out = BUF(ret);
1943 bzs->avail_out = bufsize;
1945 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1946 if (bzerror != BZ_OK) {
1947 Util_CatchBZ2Error(bzerror);
1948 PyBuffer_Release(&pdata);
1949 Py_DECREF(ret);
1950 return NULL;
1953 for (;;) {
1954 Py_BEGIN_ALLOW_THREADS
1955 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1956 Py_END_ALLOW_THREADS
1957 if (bzerror == BZ_STREAM_END) {
1958 break;
1959 } else if (bzerror != BZ_FINISH_OK) {
1960 BZ2_bzCompressEnd(bzs);
1961 Util_CatchBZ2Error(bzerror);
1962 PyBuffer_Release(&pdata);
1963 Py_DECREF(ret);
1964 return NULL;
1966 if (bzs->avail_out == 0) {
1967 bufsize = Util_NewBufferSize(bufsize);
1968 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1969 BZ2_bzCompressEnd(bzs);
1970 PyBuffer_Release(&pdata);
1971 return NULL;
1973 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1974 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1978 if (bzs->avail_out != 0) {
1979 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
1980 ret = NULL;
1983 BZ2_bzCompressEnd(bzs);
1985 PyBuffer_Release(&pdata);
1986 return ret;
1989 PyDoc_STRVAR(bz2_decompress__doc__,
1990 "decompress(data) -> decompressed data\n\
1992 Decompress data in one shot. If you want to decompress data sequentially,\n\
1993 use an instance of BZ2Decompressor instead.\n\
1996 static PyObject *
1997 bz2_decompress(PyObject *self, PyObject *args)
1999 Py_buffer pdata;
2000 char *data;
2001 int datasize;
2002 int bufsize = SMALLCHUNK;
2003 PyObject *ret;
2004 bz_stream _bzs;
2005 bz_stream *bzs = &_bzs;
2006 int bzerror;
2008 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2009 return NULL;
2010 data = pdata.buf;
2011 datasize = pdata.len;
2013 if (datasize == 0) {
2014 PyBuffer_Release(&pdata);
2015 return PyBytes_FromStringAndSize("", 0);
2018 ret = PyBytes_FromStringAndSize(NULL, bufsize);
2019 if (!ret) {
2020 PyBuffer_Release(&pdata);
2021 return NULL;
2024 memset(bzs, 0, sizeof(bz_stream));
2026 bzs->next_in = data;
2027 bzs->avail_in = datasize;
2028 bzs->next_out = BUF(ret);
2029 bzs->avail_out = bufsize;
2031 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2032 if (bzerror != BZ_OK) {
2033 Util_CatchBZ2Error(bzerror);
2034 Py_DECREF(ret);
2035 PyBuffer_Release(&pdata);
2036 return NULL;
2039 for (;;) {
2040 Py_BEGIN_ALLOW_THREADS
2041 bzerror = BZ2_bzDecompress(bzs);
2042 Py_END_ALLOW_THREADS
2043 if (bzerror == BZ_STREAM_END) {
2044 break;
2045 } else if (bzerror != BZ_OK) {
2046 BZ2_bzDecompressEnd(bzs);
2047 Util_CatchBZ2Error(bzerror);
2048 PyBuffer_Release(&pdata);
2049 Py_DECREF(ret);
2050 return NULL;
2052 if (bzs->avail_in == 0) {
2053 BZ2_bzDecompressEnd(bzs);
2054 PyErr_SetString(PyExc_ValueError,
2055 "couldn't find end of stream");
2056 PyBuffer_Release(&pdata);
2057 Py_DECREF(ret);
2058 return NULL;
2060 if (bzs->avail_out == 0) {
2061 bufsize = Util_NewBufferSize(bufsize);
2062 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2063 BZ2_bzDecompressEnd(bzs);
2064 PyBuffer_Release(&pdata);
2065 return NULL;
2067 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2068 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2072 if (bzs->avail_out != 0) {
2073 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2074 ret = NULL;
2077 BZ2_bzDecompressEnd(bzs);
2078 PyBuffer_Release(&pdata);
2080 return ret;
2083 static PyMethodDef bz2_methods[] = {
2084 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2085 bz2_compress__doc__},
2086 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2087 bz2_decompress__doc__},
2088 {NULL, NULL} /* sentinel */
2091 /* ===================================================================== */
2092 /* Initialization function. */
2094 PyDoc_STRVAR(bz2__doc__,
2095 "The python bz2 module provides a comprehensive interface for\n\
2096 the bz2 compression library. It implements a complete file\n\
2097 interface, one shot (de)compression functions, and types for\n\
2098 sequential (de)compression.\n\
2102 static struct PyModuleDef bz2module = {
2103 PyModuleDef_HEAD_INIT,
2104 "bz2",
2105 bz2__doc__,
2107 bz2_methods,
2108 NULL,
2109 NULL,
2110 NULL,
2111 NULL
2114 PyMODINIT_FUNC
2115 PyInit_bz2(void)
2117 PyObject *m;
2119 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2120 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2121 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2123 m = PyModule_Create(&bz2module);
2124 if (m == NULL)
2125 return NULL;
2127 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
2129 Py_INCREF(&BZ2File_Type);
2130 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2132 Py_INCREF(&BZ2Comp_Type);
2133 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2135 Py_INCREF(&BZ2Decomp_Type);
2136 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2137 return m;