Documentation clarified to mention optional parameters.
[python.git] / Modules / bz2module.c
blob27a38279a66e2fd89b31ba251bd723c1b53b4784
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
44 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83 #else
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
86 #endif
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
97 typedef struct {
98 PyObject_HEAD
99 PyObject *file;
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
105 int f_softspace; /* Flag used by 'print' command */
107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
111 BZFILE *fp;
112 int mode;
113 Py_off_t pos;
114 Py_off_t size;
115 #ifdef WITH_THREAD
116 PyThread_type_lock lock;
117 #endif
118 } BZ2FileObject;
120 typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124 #ifdef WITH_THREAD
125 PyThread_type_lock lock;
126 #endif
127 } BZ2CompObject;
129 typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134 #ifdef WITH_THREAD
135 PyThread_type_lock lock;
136 #endif
137 } BZ2DecompObject;
139 /* ===================================================================== */
140 /* Utility functions. */
142 static int
143 Util_CatchBZ2Error(int bzerror)
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
151 #ifdef BZ_CONFIG_ERROR
152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
158 #endif
160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
197 return ret;
200 #if BUFSIZ < 8192
201 #define SMALLCHUNK 8192
202 #else
203 #define SMALLCHUNK BUFSIZ
204 #endif
206 #if SIZEOF_INT < 4
207 #define BIGCHUNK (512 * 32)
208 #else
209 #define BIGCHUNK (512 * 1024)
210 #endif
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213 static size_t
214 Util_NewBufferSize(size_t currentsize)
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
224 return currentsize + SMALLCHUNK;
227 /* This is a hacked version of Python's fileobject.c:get_line(). */
228 static PyObject *
229 Util_GetLine(BZ2FileObject *f, int n)
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
247 buf = BUF(v);
248 end = buf + total_v_size;
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
252 if (univ_newline) {
253 while (1) {
254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bzerror != BZ_OK || buf == end)
257 break;
258 if (skipnextlf) {
259 skipnextlf = 0;
260 if (c == '\n') {
261 /* Seeing a \n here with
262 * skipnextlf true means we
263 * saw a \r before.
265 newlinetypes |= NEWLINE_CRLF;
266 BZ2_bzRead(&bzerror, f->fp,
267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
285 do {
286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
293 if (bzerror == BZ_STREAM_END) {
294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
328 /* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330 size_t
331 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
332 char* buf, size_t n, BZ2FileObject *f)
334 char *dst = buf;
335 int newlinetypes, skipnextlf;
337 assert(buf != NULL);
338 assert(stream != NULL);
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
396 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397 static void
398 Util_DropReadAhead(BZ2FileObject *f)
400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
406 /* This is a hacked version of Python's fileobject.c:readahead(). */
407 static int
408 Util_ReadAhead(BZ2FileObject *f, int bufsize)
410 int chunksize;
411 int bzerror;
413 if (f->f_buf != NULL) {
414 if((f->f_bufend - f->f_bufptr) >= 1)
415 return 0;
416 else
417 Util_DropReadAhead(f);
419 if (f->mode == MODE_READ_EOF) {
420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
427 Py_BEGIN_ALLOW_THREADS
428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
430 Py_END_ALLOW_THREADS
431 f->pos += chunksize;
432 if (bzerror == BZ_STREAM_END) {
433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
437 Util_DropReadAhead(f);
438 return -1;
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
445 /* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447 static PyStringObject *
448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
455 if (f->f_buf == NULL)
456 if (Util_ReadAhead(f, bufsize) < 0)
457 return NULL;
459 len = f->f_bufend - f->f_bufptr;
460 if (len == 0)
461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
469 if (s == NULL)
470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
474 Util_DropReadAhead(f);
475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
488 return s;
491 /* ===================================================================== */
492 /* Methods of BZ2File. */
494 PyDoc_STRVAR(BZ2File_read__doc__,
495 "read([size]) -> string\n\
497 Read at most size uncompressed bytes, returned as a string. If the size\n\
498 argument is negative or omitted, read until EOF is reached.\n\
501 /* This is a hacked version of Python's fileobject.c:file_read(). */
502 static PyObject *
503 BZ2File_read(BZ2FileObject *self, PyObject *args)
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
536 "requested number of bytes is "
537 "more than a Python string can hold");
538 goto cleanup;
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
575 cleanup:
576 RELEASE_LOCK(self);
577 return ret;
580 PyDoc_STRVAR(BZ2File_readline__doc__,
581 "readline([size]) -> string\n\
583 Return the next line from the file, as a string, retaining newline.\n\
584 A non-negative size argument will limit the maximum number of bytes to\n\
585 return (an incomplete line may be returned then). Return an empty\n\
586 string at EOF.\n\
589 static PyObject *
590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
592 PyObject *ret = NULL;
593 int sizehint = -1;
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
620 cleanup:
621 RELEASE_LOCK(self);
622 return ret;
625 PyDoc_STRVAR(BZ2File_readlines__doc__,
626 "readlines([size]) -> list\n\
628 Call readline() repeatedly and return a list of lines read.\n\
629 The optional size argument, if given, is an approximate bound on the\n\
630 total number of bytes in the lines returned.\n\
633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
634 static PyObject *
635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
699 if (!shortread && p == NULL) {
700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
705 "line is longer than a Python string can hold");
706 goto error;
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
722 continue;
724 end = buffer+nfilled+nread;
725 q = buffer;
726 while (p != NULL) {
727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
778 return list;
781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782 "xreadlines() -> self\n\
784 For backward compatibility. BZ2File objects now include the performance\n\
785 optimizations previously implemented in the xreadlines module.\n\
788 PyDoc_STRVAR(BZ2File_write__doc__,
789 "write(data) -> None\n\
791 Write the 'data' string to file. Note that due to buffering, close() may\n\
792 be needed before the file on disk reflects the data written.\n\
795 /* This is a hacked version of Python's fileobject.c:file_write(). */
796 static PyObject *
797 BZ2File_write(BZ2FileObject *self, PyObject *args)
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
804 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
805 return NULL;
807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;
817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;
823 self->f_softspace = 0;
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
835 Py_INCREF(Py_None);
836 ret = Py_None;
838 cleanup:
839 RELEASE_LOCK(self);
840 return ret;
843 PyDoc_STRVAR(BZ2File_writelines__doc__,
844 "writelines(sequence_of_strings) -> None\n\
846 Write the sequence of strings to the file. Note that newlines are not\n\
847 added. The sequence can be any iterable object producing strings. This is\n\
848 equivalent to calling write() for each string.\n\
851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
852 static PyObject *
853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
855 #define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
863 ACQUIRE_LOCK(self);
864 switch (self->mode) {
865 case MODE_WRITE:
866 break;
868 case MODE_CLOSED:
869 PyErr_SetString(PyExc_ValueError,
870 "I/O operation on closed file");
871 goto error;
873 default:
874 PyErr_SetString(PyExc_IOError,
875 "file is not ready for writing");
876 goto error;
879 islist = PyList_Check(seq);
880 if (!islist) {
881 iter = PyObject_GetIter(seq);
882 if (iter == NULL) {
883 PyErr_SetString(PyExc_TypeError,
884 "writelines() requires an iterable argument");
885 goto error;
887 list = PyList_New(CHUNKSIZE);
888 if (list == NULL)
889 goto error;
892 /* Strategy: slurp CHUNKSIZE lines into a private list,
893 checking that they are all strings, then write that list
894 without holding the interpreter lock, then come back for more. */
895 for (index = 0; ; index += CHUNKSIZE) {
896 if (islist) {
897 Py_XDECREF(list);
898 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
899 if (list == NULL)
900 goto error;
901 j = PyList_GET_SIZE(list);
903 else {
904 for (j = 0; j < CHUNKSIZE; j++) {
905 line = PyIter_Next(iter);
906 if (line == NULL) {
907 if (PyErr_Occurred())
908 goto error;
909 break;
911 PyList_SetItem(list, j, line);
914 if (j == 0)
915 break;
917 /* Check that all entries are indeed strings. If not,
918 apply the same rules as for file.write() and
919 convert the rets to strings. This is slow, but
920 seems to be the only way since all conversion APIs
921 could potentially execute Python code. */
922 for (i = 0; i < j; i++) {
923 PyObject *v = PyList_GET_ITEM(list, i);
924 if (!PyString_Check(v)) {
925 const char *buffer;
926 Py_ssize_t len;
927 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
928 PyErr_SetString(PyExc_TypeError,
929 "writelines() "
930 "argument must be "
931 "a sequence of "
932 "strings");
933 goto error;
935 line = PyString_FromStringAndSize(buffer,
936 len);
937 if (line == NULL)
938 goto error;
939 Py_DECREF(v);
940 PyList_SET_ITEM(list, i, line);
944 self->f_softspace = 0;
946 /* Since we are releasing the global lock, the
947 following code may *not* execute Python code. */
948 Py_BEGIN_ALLOW_THREADS
949 for (i = 0; i < j; i++) {
950 line = PyList_GET_ITEM(list, i);
951 len = PyString_GET_SIZE(line);
952 BZ2_bzWrite (&bzerror, self->fp,
953 PyString_AS_STRING(line), len);
954 if (bzerror != BZ_OK) {
955 Py_BLOCK_THREADS
956 Util_CatchBZ2Error(bzerror);
957 goto error;
960 Py_END_ALLOW_THREADS
962 if (j < CHUNKSIZE)
963 break;
966 Py_INCREF(Py_None);
967 ret = Py_None;
969 error:
970 RELEASE_LOCK(self);
971 Py_XDECREF(list);
972 Py_XDECREF(iter);
973 return ret;
974 #undef CHUNKSIZE
977 PyDoc_STRVAR(BZ2File_seek__doc__,
978 "seek(offset [, whence]) -> None\n\
980 Move to new file position. Argument offset is a byte count. Optional\n\
981 argument whence defaults to 0 (offset from start of file, offset\n\
982 should be >= 0); other values are 1 (move relative to current position,\n\
983 positive or negative), and 2 (move relative to end of file, usually\n\
984 negative, although many platforms allow seeking beyond the end of a file).\n\
986 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
987 the operation may be extremely slow.\n\
990 static PyObject *
991 BZ2File_seek(BZ2FileObject *self, PyObject *args)
993 int where = 0;
994 PyObject *offobj;
995 Py_off_t offset;
996 char small_buffer[SMALLCHUNK];
997 char *buffer = small_buffer;
998 size_t buffersize = SMALLCHUNK;
999 int bytesread = 0;
1000 size_t readsize;
1001 int chunksize;
1002 int bzerror;
1003 PyObject *ret = NULL;
1005 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1006 return NULL;
1007 #if !defined(HAVE_LARGEFILE_SUPPORT)
1008 offset = PyInt_AsLong(offobj);
1009 #else
1010 offset = PyLong_Check(offobj) ?
1011 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1012 #endif
1013 if (PyErr_Occurred())
1014 return NULL;
1016 ACQUIRE_LOCK(self);
1017 Util_DropReadAhead(self);
1018 switch (self->mode) {
1019 case MODE_READ:
1020 case MODE_READ_EOF:
1021 break;
1023 case MODE_CLOSED:
1024 PyErr_SetString(PyExc_ValueError,
1025 "I/O operation on closed file");
1026 goto cleanup;
1028 default:
1029 PyErr_SetString(PyExc_IOError,
1030 "seek works only while reading");
1031 goto cleanup;
1034 if (where == 2) {
1035 if (self->size == -1) {
1036 assert(self->mode != MODE_READ_EOF);
1037 for (;;) {
1038 Py_BEGIN_ALLOW_THREADS
1039 chunksize = Util_UnivNewlineRead(
1040 &bzerror, self->fp,
1041 buffer, buffersize,
1042 self);
1043 self->pos += chunksize;
1044 Py_END_ALLOW_THREADS
1046 bytesread += chunksize;
1047 if (bzerror == BZ_STREAM_END) {
1048 break;
1049 } else if (bzerror != BZ_OK) {
1050 Util_CatchBZ2Error(bzerror);
1051 goto cleanup;
1054 self->mode = MODE_READ_EOF;
1055 self->size = self->pos;
1056 bytesread = 0;
1058 offset = self->size + offset;
1059 } else if (where == 1) {
1060 offset = self->pos + offset;
1063 /* Before getting here, offset must be the absolute position the file
1064 * pointer should be set to. */
1066 if (offset >= self->pos) {
1067 /* we can move forward */
1068 offset -= self->pos;
1069 } else {
1070 /* we cannot move back, so rewind the stream */
1071 BZ2_bzReadClose(&bzerror, self->fp);
1072 if (bzerror != BZ_OK) {
1073 Util_CatchBZ2Error(bzerror);
1074 goto cleanup;
1076 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1077 if (!ret)
1078 goto cleanup;
1079 Py_DECREF(ret);
1080 ret = NULL;
1081 self->pos = 0;
1082 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1083 0, 0, NULL, 0);
1084 if (bzerror != BZ_OK) {
1085 Util_CatchBZ2Error(bzerror);
1086 goto cleanup;
1088 self->mode = MODE_READ;
1091 if (offset <= 0 || self->mode == MODE_READ_EOF)
1092 goto exit;
1094 /* Before getting here, offset must be set to the number of bytes
1095 * to walk forward. */
1096 for (;;) {
1097 if (offset-bytesread > buffersize)
1098 readsize = buffersize;
1099 else
1100 /* offset might be wider that readsize, but the result
1101 * of the subtraction is bound by buffersize (see the
1102 * condition above). buffersize is 8192. */
1103 readsize = (size_t)(offset-bytesread);
1104 Py_BEGIN_ALLOW_THREADS
1105 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1106 buffer, readsize, self);
1107 self->pos += chunksize;
1108 Py_END_ALLOW_THREADS
1109 bytesread += chunksize;
1110 if (bzerror == BZ_STREAM_END) {
1111 self->size = self->pos;
1112 self->mode = MODE_READ_EOF;
1113 break;
1114 } else if (bzerror != BZ_OK) {
1115 Util_CatchBZ2Error(bzerror);
1116 goto cleanup;
1118 if (bytesread == offset)
1119 break;
1122 exit:
1123 Py_INCREF(Py_None);
1124 ret = Py_None;
1126 cleanup:
1127 RELEASE_LOCK(self);
1128 return ret;
1131 PyDoc_STRVAR(BZ2File_tell__doc__,
1132 "tell() -> int\n\
1134 Return the current file position, an integer (may be a long integer).\n\
1137 static PyObject *
1138 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1140 PyObject *ret = NULL;
1142 if (self->mode == MODE_CLOSED) {
1143 PyErr_SetString(PyExc_ValueError,
1144 "I/O operation on closed file");
1145 goto cleanup;
1148 #if !defined(HAVE_LARGEFILE_SUPPORT)
1149 ret = PyInt_FromLong(self->pos);
1150 #else
1151 ret = PyLong_FromLongLong(self->pos);
1152 #endif
1154 cleanup:
1155 return ret;
1158 PyDoc_STRVAR(BZ2File_close__doc__,
1159 "close() -> None or (perhaps) an integer\n\
1161 Close the file. Sets data attribute .closed to true. A closed file\n\
1162 cannot be used for further I/O operations. close() may be called more\n\
1163 than once without error.\n\
1166 static PyObject *
1167 BZ2File_close(BZ2FileObject *self)
1169 PyObject *ret = NULL;
1170 int bzerror = BZ_OK;
1172 ACQUIRE_LOCK(self);
1173 switch (self->mode) {
1174 case MODE_READ:
1175 case MODE_READ_EOF:
1176 BZ2_bzReadClose(&bzerror, self->fp);
1177 break;
1178 case MODE_WRITE:
1179 BZ2_bzWriteClose(&bzerror, self->fp,
1180 0, NULL, NULL);
1181 break;
1183 self->mode = MODE_CLOSED;
1184 ret = PyObject_CallMethod(self->file, "close", NULL);
1185 if (bzerror != BZ_OK) {
1186 Util_CatchBZ2Error(bzerror);
1187 Py_XDECREF(ret);
1188 ret = NULL;
1191 RELEASE_LOCK(self);
1192 return ret;
1195 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1197 static PyMethodDef BZ2File_methods[] = {
1198 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1199 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1200 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1201 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1202 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1203 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1204 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1205 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1206 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1207 {NULL, NULL} /* sentinel */
1211 /* ===================================================================== */
1212 /* Getters and setters of BZ2File. */
1214 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1215 static PyObject *
1216 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1218 switch (self->f_newlinetypes) {
1219 case NEWLINE_UNKNOWN:
1220 Py_INCREF(Py_None);
1221 return Py_None;
1222 case NEWLINE_CR:
1223 return PyString_FromString("\r");
1224 case NEWLINE_LF:
1225 return PyString_FromString("\n");
1226 case NEWLINE_CR|NEWLINE_LF:
1227 return Py_BuildValue("(ss)", "\r", "\n");
1228 case NEWLINE_CRLF:
1229 return PyString_FromString("\r\n");
1230 case NEWLINE_CR|NEWLINE_CRLF:
1231 return Py_BuildValue("(ss)", "\r", "\r\n");
1232 case NEWLINE_LF|NEWLINE_CRLF:
1233 return Py_BuildValue("(ss)", "\n", "\r\n");
1234 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1235 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1236 default:
1237 PyErr_Format(PyExc_SystemError,
1238 "Unknown newlines value 0x%x\n",
1239 self->f_newlinetypes);
1240 return NULL;
1244 static PyObject *
1245 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1247 return PyInt_FromLong(self->mode == MODE_CLOSED);
1250 static PyObject *
1251 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1253 return PyObject_GetAttrString(self->file, "mode");
1256 static PyObject *
1257 BZ2File_get_name(BZ2FileObject *self, void *closure)
1259 return PyObject_GetAttrString(self->file, "name");
1262 static PyGetSetDef BZ2File_getset[] = {
1263 {"closed", (getter)BZ2File_get_closed, NULL,
1264 "True if the file is closed"},
1265 {"newlines", (getter)BZ2File_get_newlines, NULL,
1266 "end-of-line convention used in this file"},
1267 {"mode", (getter)BZ2File_get_mode, NULL,
1268 "file mode ('r', 'w', or 'U')"},
1269 {"name", (getter)BZ2File_get_name, NULL,
1270 "file name"},
1271 {NULL} /* Sentinel */
1275 /* ===================================================================== */
1276 /* Members of BZ2File_Type. */
1278 #undef OFF
1279 #define OFF(x) offsetof(BZ2FileObject, x)
1281 static PyMemberDef BZ2File_members[] = {
1282 {"softspace", T_INT, OFF(f_softspace), 0,
1283 "flag indicating that a space needs to be printed; used by print"},
1284 {NULL} /* Sentinel */
1287 /* ===================================================================== */
1288 /* Slot definitions for BZ2File_Type. */
1290 static int
1291 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1293 static char *kwlist[] = {"filename", "mode", "buffering",
1294 "compresslevel", 0};
1295 PyObject *name;
1296 char *mode = "r";
1297 int buffering = -1;
1298 int compresslevel = 9;
1299 int bzerror;
1300 int mode_char = 0;
1302 self->size = -1;
1304 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1305 kwlist, &name, &mode, &buffering,
1306 &compresslevel))
1307 return -1;
1309 if (compresslevel < 1 || compresslevel > 9) {
1310 PyErr_SetString(PyExc_ValueError,
1311 "compresslevel must be between 1 and 9");
1312 return -1;
1315 for (;;) {
1316 int error = 0;
1317 switch (*mode) {
1318 case 'r':
1319 case 'w':
1320 if (mode_char)
1321 error = 1;
1322 mode_char = *mode;
1323 break;
1325 case 'b':
1326 break;
1328 case 'U':
1329 #ifdef __VMS
1330 self->f_univ_newline = 0;
1331 #else
1332 self->f_univ_newline = 1;
1333 #endif
1334 break;
1336 default:
1337 error = 1;
1338 break;
1340 if (error) {
1341 PyErr_Format(PyExc_ValueError,
1342 "invalid mode char %c", *mode);
1343 return -1;
1345 mode++;
1346 if (*mode == '\0')
1347 break;
1350 if (mode_char == 0) {
1351 mode_char = 'r';
1354 mode = (mode_char == 'r') ? "rb" : "wb";
1356 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1357 name, mode, buffering);
1358 if (self->file == NULL)
1359 return -1;
1361 /* From now on, we have stuff to dealloc, so jump to error label
1362 * instead of returning */
1364 #ifdef WITH_THREAD
1365 self->lock = PyThread_allocate_lock();
1366 if (!self->lock) {
1367 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1368 goto error;
1370 #endif
1372 if (mode_char == 'r')
1373 self->fp = BZ2_bzReadOpen(&bzerror,
1374 PyFile_AsFile(self->file),
1375 0, 0, NULL, 0);
1376 else
1377 self->fp = BZ2_bzWriteOpen(&bzerror,
1378 PyFile_AsFile(self->file),
1379 compresslevel, 0, 0);
1381 if (bzerror != BZ_OK) {
1382 Util_CatchBZ2Error(bzerror);
1383 goto error;
1386 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1388 return 0;
1390 error:
1391 Py_CLEAR(self->file);
1392 #ifdef WITH_THREAD
1393 if (self->lock) {
1394 PyThread_free_lock(self->lock);
1395 self->lock = NULL;
1397 #endif
1398 return -1;
1401 static void
1402 BZ2File_dealloc(BZ2FileObject *self)
1404 int bzerror;
1405 #ifdef WITH_THREAD
1406 if (self->lock)
1407 PyThread_free_lock(self->lock);
1408 #endif
1409 switch (self->mode) {
1410 case MODE_READ:
1411 case MODE_READ_EOF:
1412 BZ2_bzReadClose(&bzerror, self->fp);
1413 break;
1414 case MODE_WRITE:
1415 BZ2_bzWriteClose(&bzerror, self->fp,
1416 0, NULL, NULL);
1417 break;
1419 Util_DropReadAhead(self);
1420 Py_XDECREF(self->file);
1421 self->ob_type->tp_free((PyObject *)self);
1424 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1425 static PyObject *
1426 BZ2File_getiter(BZ2FileObject *self)
1428 if (self->mode == MODE_CLOSED) {
1429 PyErr_SetString(PyExc_ValueError,
1430 "I/O operation on closed file");
1431 return NULL;
1433 Py_INCREF((PyObject*)self);
1434 return (PyObject *)self;
1437 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1438 #define READAHEAD_BUFSIZE 8192
1439 static PyObject *
1440 BZ2File_iternext(BZ2FileObject *self)
1442 PyStringObject* ret;
1443 ACQUIRE_LOCK(self);
1444 if (self->mode == MODE_CLOSED) {
1445 PyErr_SetString(PyExc_ValueError,
1446 "I/O operation on closed file");
1447 return NULL;
1449 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1450 RELEASE_LOCK(self);
1451 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1452 Py_XDECREF(ret);
1453 return NULL;
1455 return (PyObject *)ret;
1458 /* ===================================================================== */
1459 /* BZ2File_Type definition. */
1461 PyDoc_VAR(BZ2File__doc__) =
1462 PyDoc_STR(
1463 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1465 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1466 writing. When opened for writing, the file will be created if it doesn't\n\
1467 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1468 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1469 is given, must be a number between 1 and 9.\n\
1471 PyDoc_STR(
1472 "\n\
1473 Add a 'U' to mode to open the file for input with universal newline\n\
1474 support. Any line ending in the input file will be seen as a '\\n' in\n\
1475 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1476 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1477 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1478 newlines are available only when reading.\n\
1482 static PyTypeObject BZ2File_Type = {
1483 PyObject_HEAD_INIT(NULL)
1484 0, /*ob_size*/
1485 "bz2.BZ2File", /*tp_name*/
1486 sizeof(BZ2FileObject), /*tp_basicsize*/
1487 0, /*tp_itemsize*/
1488 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1489 0, /*tp_print*/
1490 0, /*tp_getattr*/
1491 0, /*tp_setattr*/
1492 0, /*tp_compare*/
1493 0, /*tp_repr*/
1494 0, /*tp_as_number*/
1495 0, /*tp_as_sequence*/
1496 0, /*tp_as_mapping*/
1497 0, /*tp_hash*/
1498 0, /*tp_call*/
1499 0, /*tp_str*/
1500 PyObject_GenericGetAttr,/*tp_getattro*/
1501 PyObject_GenericSetAttr,/*tp_setattro*/
1502 0, /*tp_as_buffer*/
1503 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1504 BZ2File__doc__, /*tp_doc*/
1505 0, /*tp_traverse*/
1506 0, /*tp_clear*/
1507 0, /*tp_richcompare*/
1508 0, /*tp_weaklistoffset*/
1509 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1510 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1511 BZ2File_methods, /*tp_methods*/
1512 BZ2File_members, /*tp_members*/
1513 BZ2File_getset, /*tp_getset*/
1514 0, /*tp_base*/
1515 0, /*tp_dict*/
1516 0, /*tp_descr_get*/
1517 0, /*tp_descr_set*/
1518 0, /*tp_dictoffset*/
1519 (initproc)BZ2File_init, /*tp_init*/
1520 PyType_GenericAlloc, /*tp_alloc*/
1521 PyType_GenericNew, /*tp_new*/
1522 _PyObject_Del, /*tp_free*/
1523 0, /*tp_is_gc*/
1527 /* ===================================================================== */
1528 /* Methods of BZ2Comp. */
1530 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1531 "compress(data) -> string\n\
1533 Provide more data to the compressor object. It will return chunks of\n\
1534 compressed data whenever possible. When you've finished providing data\n\
1535 to compress, call the flush() method to finish the compression process,\n\
1536 and return what is left in the internal buffers.\n\
1539 static PyObject *
1540 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1542 char *data;
1543 int datasize;
1544 int bufsize = SMALLCHUNK;
1545 PY_LONG_LONG totalout;
1546 PyObject *ret = NULL;
1547 bz_stream *bzs = &self->bzs;
1548 int bzerror;
1550 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1551 return NULL;
1553 if (datasize == 0)
1554 return PyString_FromString("");
1556 ACQUIRE_LOCK(self);
1557 if (!self->running) {
1558 PyErr_SetString(PyExc_ValueError,
1559 "this object was already flushed");
1560 goto error;
1563 ret = PyString_FromStringAndSize(NULL, bufsize);
1564 if (!ret)
1565 goto error;
1567 bzs->next_in = data;
1568 bzs->avail_in = datasize;
1569 bzs->next_out = BUF(ret);
1570 bzs->avail_out = bufsize;
1572 totalout = BZS_TOTAL_OUT(bzs);
1574 for (;;) {
1575 Py_BEGIN_ALLOW_THREADS
1576 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1577 Py_END_ALLOW_THREADS
1578 if (bzerror != BZ_RUN_OK) {
1579 Util_CatchBZ2Error(bzerror);
1580 goto error;
1582 if (bzs->avail_out == 0) {
1583 bufsize = Util_NewBufferSize(bufsize);
1584 if (_PyString_Resize(&ret, bufsize) < 0) {
1585 BZ2_bzCompressEnd(bzs);
1586 goto error;
1588 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1589 - totalout);
1590 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1591 } else if (bzs->avail_in == 0) {
1592 break;
1596 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1598 RELEASE_LOCK(self);
1599 return ret;
1601 error:
1602 RELEASE_LOCK(self);
1603 Py_XDECREF(ret);
1604 return NULL;
1607 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1608 "flush() -> string\n\
1610 Finish the compression process and return what is left in internal buffers.\n\
1611 You must not use the compressor object after calling this method.\n\
1614 static PyObject *
1615 BZ2Comp_flush(BZ2CompObject *self)
1617 int bufsize = SMALLCHUNK;
1618 PyObject *ret = NULL;
1619 bz_stream *bzs = &self->bzs;
1620 PY_LONG_LONG totalout;
1621 int bzerror;
1623 ACQUIRE_LOCK(self);
1624 if (!self->running) {
1625 PyErr_SetString(PyExc_ValueError, "object was already "
1626 "flushed");
1627 goto error;
1629 self->running = 0;
1631 ret = PyString_FromStringAndSize(NULL, bufsize);
1632 if (!ret)
1633 goto error;
1635 bzs->next_out = BUF(ret);
1636 bzs->avail_out = bufsize;
1638 totalout = BZS_TOTAL_OUT(bzs);
1640 for (;;) {
1641 Py_BEGIN_ALLOW_THREADS
1642 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1643 Py_END_ALLOW_THREADS
1644 if (bzerror == BZ_STREAM_END) {
1645 break;
1646 } else if (bzerror != BZ_FINISH_OK) {
1647 Util_CatchBZ2Error(bzerror);
1648 goto error;
1650 if (bzs->avail_out == 0) {
1651 bufsize = Util_NewBufferSize(bufsize);
1652 if (_PyString_Resize(&ret, bufsize) < 0)
1653 goto error;
1654 bzs->next_out = BUF(ret);
1655 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1656 - totalout);
1657 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1661 if (bzs->avail_out != 0)
1662 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1664 RELEASE_LOCK(self);
1665 return ret;
1667 error:
1668 RELEASE_LOCK(self);
1669 Py_XDECREF(ret);
1670 return NULL;
1673 static PyMethodDef BZ2Comp_methods[] = {
1674 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1675 BZ2Comp_compress__doc__},
1676 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1677 BZ2Comp_flush__doc__},
1678 {NULL, NULL} /* sentinel */
1682 /* ===================================================================== */
1683 /* Slot definitions for BZ2Comp_Type. */
1685 static int
1686 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1688 int compresslevel = 9;
1689 int bzerror;
1690 static char *kwlist[] = {"compresslevel", 0};
1692 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1693 kwlist, &compresslevel))
1694 return -1;
1696 if (compresslevel < 1 || compresslevel > 9) {
1697 PyErr_SetString(PyExc_ValueError,
1698 "compresslevel must be between 1 and 9");
1699 goto error;
1702 #ifdef WITH_THREAD
1703 self->lock = PyThread_allocate_lock();
1704 if (!self->lock) {
1705 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1706 goto error;
1708 #endif
1710 memset(&self->bzs, 0, sizeof(bz_stream));
1711 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1712 if (bzerror != BZ_OK) {
1713 Util_CatchBZ2Error(bzerror);
1714 goto error;
1717 self->running = 1;
1719 return 0;
1720 error:
1721 #ifdef WITH_THREAD
1722 if (self->lock) {
1723 PyThread_free_lock(self->lock);
1724 self->lock = NULL;
1726 #endif
1727 return -1;
1730 static void
1731 BZ2Comp_dealloc(BZ2CompObject *self)
1733 #ifdef WITH_THREAD
1734 if (self->lock)
1735 PyThread_free_lock(self->lock);
1736 #endif
1737 BZ2_bzCompressEnd(&self->bzs);
1738 self->ob_type->tp_free((PyObject *)self);
1742 /* ===================================================================== */
1743 /* BZ2Comp_Type definition. */
1745 PyDoc_STRVAR(BZ2Comp__doc__,
1746 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1748 Create a new compressor object. This object may be used to compress\n\
1749 data sequentially. If you want to compress data in one shot, use the\n\
1750 compress() function instead. The compresslevel parameter, if given,\n\
1751 must be a number between 1 and 9.\n\
1754 static PyTypeObject BZ2Comp_Type = {
1755 PyObject_HEAD_INIT(NULL)
1756 0, /*ob_size*/
1757 "bz2.BZ2Compressor", /*tp_name*/
1758 sizeof(BZ2CompObject), /*tp_basicsize*/
1759 0, /*tp_itemsize*/
1760 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1761 0, /*tp_print*/
1762 0, /*tp_getattr*/
1763 0, /*tp_setattr*/
1764 0, /*tp_compare*/
1765 0, /*tp_repr*/
1766 0, /*tp_as_number*/
1767 0, /*tp_as_sequence*/
1768 0, /*tp_as_mapping*/
1769 0, /*tp_hash*/
1770 0, /*tp_call*/
1771 0, /*tp_str*/
1772 PyObject_GenericGetAttr,/*tp_getattro*/
1773 PyObject_GenericSetAttr,/*tp_setattro*/
1774 0, /*tp_as_buffer*/
1775 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1776 BZ2Comp__doc__, /*tp_doc*/
1777 0, /*tp_traverse*/
1778 0, /*tp_clear*/
1779 0, /*tp_richcompare*/
1780 0, /*tp_weaklistoffset*/
1781 0, /*tp_iter*/
1782 0, /*tp_iternext*/
1783 BZ2Comp_methods, /*tp_methods*/
1784 0, /*tp_members*/
1785 0, /*tp_getset*/
1786 0, /*tp_base*/
1787 0, /*tp_dict*/
1788 0, /*tp_descr_get*/
1789 0, /*tp_descr_set*/
1790 0, /*tp_dictoffset*/
1791 (initproc)BZ2Comp_init, /*tp_init*/
1792 PyType_GenericAlloc, /*tp_alloc*/
1793 PyType_GenericNew, /*tp_new*/
1794 _PyObject_Del, /*tp_free*/
1795 0, /*tp_is_gc*/
1799 /* ===================================================================== */
1800 /* Members of BZ2Decomp. */
1802 #undef OFF
1803 #define OFF(x) offsetof(BZ2DecompObject, x)
1805 static PyMemberDef BZ2Decomp_members[] = {
1806 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1807 {NULL} /* Sentinel */
1811 /* ===================================================================== */
1812 /* Methods of BZ2Decomp. */
1814 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1815 "decompress(data) -> string\n\
1817 Provide more data to the decompressor object. It will return chunks\n\
1818 of decompressed data whenever possible. If you try to decompress data\n\
1819 after the end of stream is found, EOFError will be raised. If any data\n\
1820 was found after the end of stream, it'll be ignored and saved in\n\
1821 unused_data attribute.\n\
1824 static PyObject *
1825 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1827 char *data;
1828 int datasize;
1829 int bufsize = SMALLCHUNK;
1830 PY_LONG_LONG totalout;
1831 PyObject *ret = NULL;
1832 bz_stream *bzs = &self->bzs;
1833 int bzerror;
1835 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1836 return NULL;
1838 ACQUIRE_LOCK(self);
1839 if (!self->running) {
1840 PyErr_SetString(PyExc_EOFError, "end of stream was "
1841 "already found");
1842 goto error;
1845 ret = PyString_FromStringAndSize(NULL, bufsize);
1846 if (!ret)
1847 goto error;
1849 bzs->next_in = data;
1850 bzs->avail_in = datasize;
1851 bzs->next_out = BUF(ret);
1852 bzs->avail_out = bufsize;
1854 totalout = BZS_TOTAL_OUT(bzs);
1856 for (;;) {
1857 Py_BEGIN_ALLOW_THREADS
1858 bzerror = BZ2_bzDecompress(bzs);
1859 Py_END_ALLOW_THREADS
1860 if (bzerror == BZ_STREAM_END) {
1861 if (bzs->avail_in != 0) {
1862 Py_DECREF(self->unused_data);
1863 self->unused_data =
1864 PyString_FromStringAndSize(bzs->next_in,
1865 bzs->avail_in);
1867 self->running = 0;
1868 break;
1870 if (bzerror != BZ_OK) {
1871 Util_CatchBZ2Error(bzerror);
1872 goto error;
1874 if (bzs->avail_out == 0) {
1875 bufsize = Util_NewBufferSize(bufsize);
1876 if (_PyString_Resize(&ret, bufsize) < 0) {
1877 BZ2_bzDecompressEnd(bzs);
1878 goto error;
1880 bzs->next_out = BUF(ret);
1881 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1882 - totalout);
1883 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1884 } else if (bzs->avail_in == 0) {
1885 break;
1889 if (bzs->avail_out != 0)
1890 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1892 RELEASE_LOCK(self);
1893 return ret;
1895 error:
1896 RELEASE_LOCK(self);
1897 Py_XDECREF(ret);
1898 return NULL;
1901 static PyMethodDef BZ2Decomp_methods[] = {
1902 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1903 {NULL, NULL} /* sentinel */
1907 /* ===================================================================== */
1908 /* Slot definitions for BZ2Decomp_Type. */
1910 static int
1911 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1913 int bzerror;
1915 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1916 return -1;
1918 #ifdef WITH_THREAD
1919 self->lock = PyThread_allocate_lock();
1920 if (!self->lock) {
1921 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1922 goto error;
1924 #endif
1926 self->unused_data = PyString_FromString("");
1927 if (!self->unused_data)
1928 goto error;
1930 memset(&self->bzs, 0, sizeof(bz_stream));
1931 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1932 if (bzerror != BZ_OK) {
1933 Util_CatchBZ2Error(bzerror);
1934 goto error;
1937 self->running = 1;
1939 return 0;
1941 error:
1942 #ifdef WITH_THREAD
1943 if (self->lock) {
1944 PyThread_free_lock(self->lock);
1945 self->lock = NULL;
1947 #endif
1948 Py_CLEAR(self->unused_data);
1949 return -1;
1952 static void
1953 BZ2Decomp_dealloc(BZ2DecompObject *self)
1955 #ifdef WITH_THREAD
1956 if (self->lock)
1957 PyThread_free_lock(self->lock);
1958 #endif
1959 Py_XDECREF(self->unused_data);
1960 BZ2_bzDecompressEnd(&self->bzs);
1961 self->ob_type->tp_free((PyObject *)self);
1965 /* ===================================================================== */
1966 /* BZ2Decomp_Type definition. */
1968 PyDoc_STRVAR(BZ2Decomp__doc__,
1969 "BZ2Decompressor() -> decompressor object\n\
1971 Create a new decompressor object. This object may be used to decompress\n\
1972 data sequentially. If you want to decompress data in one shot, use the\n\
1973 decompress() function instead.\n\
1976 static PyTypeObject BZ2Decomp_Type = {
1977 PyObject_HEAD_INIT(NULL)
1978 0, /*ob_size*/
1979 "bz2.BZ2Decompressor", /*tp_name*/
1980 sizeof(BZ2DecompObject), /*tp_basicsize*/
1981 0, /*tp_itemsize*/
1982 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1983 0, /*tp_print*/
1984 0, /*tp_getattr*/
1985 0, /*tp_setattr*/
1986 0, /*tp_compare*/
1987 0, /*tp_repr*/
1988 0, /*tp_as_number*/
1989 0, /*tp_as_sequence*/
1990 0, /*tp_as_mapping*/
1991 0, /*tp_hash*/
1992 0, /*tp_call*/
1993 0, /*tp_str*/
1994 PyObject_GenericGetAttr,/*tp_getattro*/
1995 PyObject_GenericSetAttr,/*tp_setattro*/
1996 0, /*tp_as_buffer*/
1997 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1998 BZ2Decomp__doc__, /*tp_doc*/
1999 0, /*tp_traverse*/
2000 0, /*tp_clear*/
2001 0, /*tp_richcompare*/
2002 0, /*tp_weaklistoffset*/
2003 0, /*tp_iter*/
2004 0, /*tp_iternext*/
2005 BZ2Decomp_methods, /*tp_methods*/
2006 BZ2Decomp_members, /*tp_members*/
2007 0, /*tp_getset*/
2008 0, /*tp_base*/
2009 0, /*tp_dict*/
2010 0, /*tp_descr_get*/
2011 0, /*tp_descr_set*/
2012 0, /*tp_dictoffset*/
2013 (initproc)BZ2Decomp_init, /*tp_init*/
2014 PyType_GenericAlloc, /*tp_alloc*/
2015 PyType_GenericNew, /*tp_new*/
2016 _PyObject_Del, /*tp_free*/
2017 0, /*tp_is_gc*/
2021 /* ===================================================================== */
2022 /* Module functions. */
2024 PyDoc_STRVAR(bz2_compress__doc__,
2025 "compress(data [, compresslevel=9]) -> string\n\
2027 Compress data in one shot. If you want to compress data sequentially,\n\
2028 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2029 given, must be a number between 1 and 9.\n\
2032 static PyObject *
2033 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2035 int compresslevel=9;
2036 char *data;
2037 int datasize;
2038 int bufsize;
2039 PyObject *ret = NULL;
2040 bz_stream _bzs;
2041 bz_stream *bzs = &_bzs;
2042 int bzerror;
2043 static char *kwlist[] = {"data", "compresslevel", 0};
2045 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2046 kwlist, &data, &datasize,
2047 &compresslevel))
2048 return NULL;
2050 if (compresslevel < 1 || compresslevel > 9) {
2051 PyErr_SetString(PyExc_ValueError,
2052 "compresslevel must be between 1 and 9");
2053 return NULL;
2056 /* Conforming to bz2 manual, this is large enough to fit compressed
2057 * data in one shot. We will check it later anyway. */
2058 bufsize = datasize + (datasize/100+1) + 600;
2060 ret = PyString_FromStringAndSize(NULL, bufsize);
2061 if (!ret)
2062 return NULL;
2064 memset(bzs, 0, sizeof(bz_stream));
2066 bzs->next_in = data;
2067 bzs->avail_in = datasize;
2068 bzs->next_out = BUF(ret);
2069 bzs->avail_out = bufsize;
2071 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2072 if (bzerror != BZ_OK) {
2073 Util_CatchBZ2Error(bzerror);
2074 Py_DECREF(ret);
2075 return NULL;
2078 for (;;) {
2079 Py_BEGIN_ALLOW_THREADS
2080 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2081 Py_END_ALLOW_THREADS
2082 if (bzerror == BZ_STREAM_END) {
2083 break;
2084 } else if (bzerror != BZ_FINISH_OK) {
2085 BZ2_bzCompressEnd(bzs);
2086 Util_CatchBZ2Error(bzerror);
2087 Py_DECREF(ret);
2088 return NULL;
2090 if (bzs->avail_out == 0) {
2091 bufsize = Util_NewBufferSize(bufsize);
2092 if (_PyString_Resize(&ret, bufsize) < 0) {
2093 BZ2_bzCompressEnd(bzs);
2094 Py_DECREF(ret);
2095 return NULL;
2097 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2098 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2102 if (bzs->avail_out != 0)
2103 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2104 BZ2_bzCompressEnd(bzs);
2106 return ret;
2109 PyDoc_STRVAR(bz2_decompress__doc__,
2110 "decompress(data) -> decompressed data\n\
2112 Decompress data in one shot. If you want to decompress data sequentially,\n\
2113 use an instance of BZ2Decompressor instead.\n\
2116 static PyObject *
2117 bz2_decompress(PyObject *self, PyObject *args)
2119 char *data;
2120 int datasize;
2121 int bufsize = SMALLCHUNK;
2122 PyObject *ret;
2123 bz_stream _bzs;
2124 bz_stream *bzs = &_bzs;
2125 int bzerror;
2127 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2128 return NULL;
2130 if (datasize == 0)
2131 return PyString_FromString("");
2133 ret = PyString_FromStringAndSize(NULL, bufsize);
2134 if (!ret)
2135 return NULL;
2137 memset(bzs, 0, sizeof(bz_stream));
2139 bzs->next_in = data;
2140 bzs->avail_in = datasize;
2141 bzs->next_out = BUF(ret);
2142 bzs->avail_out = bufsize;
2144 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2145 if (bzerror != BZ_OK) {
2146 Util_CatchBZ2Error(bzerror);
2147 Py_DECREF(ret);
2148 return NULL;
2151 for (;;) {
2152 Py_BEGIN_ALLOW_THREADS
2153 bzerror = BZ2_bzDecompress(bzs);
2154 Py_END_ALLOW_THREADS
2155 if (bzerror == BZ_STREAM_END) {
2156 break;
2157 } else if (bzerror != BZ_OK) {
2158 BZ2_bzDecompressEnd(bzs);
2159 Util_CatchBZ2Error(bzerror);
2160 Py_DECREF(ret);
2161 return NULL;
2163 if (bzs->avail_out == 0) {
2164 bufsize = Util_NewBufferSize(bufsize);
2165 if (_PyString_Resize(&ret, bufsize) < 0) {
2166 BZ2_bzDecompressEnd(bzs);
2167 Py_DECREF(ret);
2168 return NULL;
2170 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2171 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2172 } else if (bzs->avail_in == 0) {
2173 BZ2_bzDecompressEnd(bzs);
2174 PyErr_SetString(PyExc_ValueError,
2175 "couldn't find end of stream");
2176 Py_DECREF(ret);
2177 return NULL;
2181 if (bzs->avail_out != 0)
2182 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2183 BZ2_bzDecompressEnd(bzs);
2185 return ret;
2188 static PyMethodDef bz2_methods[] = {
2189 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2190 bz2_compress__doc__},
2191 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2192 bz2_decompress__doc__},
2193 {NULL, NULL} /* sentinel */
2196 /* ===================================================================== */
2197 /* Initialization function. */
2199 PyDoc_STRVAR(bz2__doc__,
2200 "The python bz2 module provides a comprehensive interface for\n\
2201 the bz2 compression library. It implements a complete file\n\
2202 interface, one shot (de)compression functions, and types for\n\
2203 sequential (de)compression.\n\
2206 PyMODINIT_FUNC
2207 initbz2(void)
2209 PyObject *m;
2211 BZ2File_Type.ob_type = &PyType_Type;
2212 BZ2Comp_Type.ob_type = &PyType_Type;
2213 BZ2Decomp_Type.ob_type = &PyType_Type;
2215 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2216 if (m == NULL)
2217 return;
2219 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2221 Py_INCREF(&BZ2File_Type);
2222 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2224 Py_INCREF(&BZ2Comp_Type);
2225 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2227 Py_INCREF(&BZ2Decomp_Type);
2228 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);