Minor typo fixes
[pytest.git] / Modules / bz2module.c
blob5e5a801907d8362f97bd2bee7d12dc05fc54246a
1 /*
3 python-bz2 - python bz2 library interface
5 Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002 Python Software Foundation; All Rights Reserved
8 */
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
39 #define MODE_CLOSED 0
40 #define MODE_READ 1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE 3
44 #define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
47 #ifdef BZ_CONFIG_ERROR
49 #if SIZEOF_LONG >= 8
50 #define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52 #elif SIZEOF_LONG_LONG >= 8
53 #define BZS_TOTAL_OUT(bzs) \
54 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
55 #else
56 #define BZS_TOTAL_OUT(bzs) \
57 bzs->total_out_lo32
58 #endif
60 #else /* ! BZ_CONFIG_ERROR */
62 #define BZ2_bzRead bzRead
63 #define BZ2_bzReadOpen bzReadOpen
64 #define BZ2_bzReadClose bzReadClose
65 #define BZ2_bzWrite bzWrite
66 #define BZ2_bzWriteOpen bzWriteOpen
67 #define BZ2_bzWriteClose bzWriteClose
68 #define BZ2_bzCompress bzCompress
69 #define BZ2_bzCompressInit bzCompressInit
70 #define BZ2_bzCompressEnd bzCompressEnd
71 #define BZ2_bzDecompress bzDecompress
72 #define BZ2_bzDecompressInit bzDecompressInit
73 #define BZ2_bzDecompressEnd bzDecompressEnd
75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
77 #endif /* ! BZ_CONFIG_ERROR */
80 #ifdef WITH_THREAD
81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83 #else
84 #define ACQUIRE_LOCK(obj)
85 #define RELEASE_LOCK(obj)
86 #endif
88 /* Bits in f_newlinetypes */
89 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90 #define NEWLINE_CR 1 /* \r newline seen */
91 #define NEWLINE_LF 2 /* \n newline seen */
92 #define NEWLINE_CRLF 4 /* \r\n newline seen */
94 /* ===================================================================== */
95 /* Structure definitions. */
97 typedef struct {
98 PyObject_HEAD
99 PyObject *file;
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
105 int f_softspace; /* Flag used by 'print' command */
107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
111 BZFILE *fp;
112 int mode;
113 Py_off_t pos;
114 Py_off_t size;
115 #ifdef WITH_THREAD
116 PyThread_type_lock lock;
117 #endif
118 } BZ2FileObject;
120 typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124 #ifdef WITH_THREAD
125 PyThread_type_lock lock;
126 #endif
127 } BZ2CompObject;
129 typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134 #ifdef WITH_THREAD
135 PyThread_type_lock lock;
136 #endif
137 } BZ2DecompObject;
139 /* ===================================================================== */
140 /* Utility functions. */
142 static int
143 Util_CatchBZ2Error(int bzerror)
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
151 #ifdef BZ_CONFIG_ERROR
152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
158 #endif
160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
197 return ret;
200 #if BUFSIZ < 8192
201 #define SMALLCHUNK 8192
202 #else
203 #define SMALLCHUNK BUFSIZ
204 #endif
206 #if SIZEOF_INT < 4
207 #define BIGCHUNK (512 * 32)
208 #else
209 #define BIGCHUNK (512 * 1024)
210 #endif
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213 static size_t
214 Util_NewBufferSize(size_t currentsize)
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
224 return currentsize + SMALLCHUNK;
227 /* This is a hacked version of Python's fileobject.c:get_line(). */
228 static PyObject *
229 Util_GetLine(BZ2FileObject *f, int n)
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
247 buf = BUF(v);
248 end = buf + total_v_size;
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
252 if (univ_newline) {
253 while (1) {
254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
256 if (bzerror != BZ_OK || buf == end)
257 break;
258 if (skipnextlf) {
259 skipnextlf = 0;
260 if (c == '\n') {
261 /* Seeing a \n here with
262 * skipnextlf true means we
263 * saw a \r before.
265 newlinetypes |= NEWLINE_CRLF;
266 BZ2_bzRead(&bzerror, f->fp,
267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
285 do {
286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
293 if (bzerror == BZ_STREAM_END) {
294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
328 /* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330 size_t
331 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
332 char* buf, size_t n, BZ2FileObject *f)
334 char *dst = buf;
335 int newlinetypes, skipnextlf;
337 assert(buf != NULL);
338 assert(stream != NULL);
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
396 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397 static void
398 Util_DropReadAhead(BZ2FileObject *f)
400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
406 /* This is a hacked version of Python's fileobject.c:readahead(). */
407 static int
408 Util_ReadAhead(BZ2FileObject *f, int bufsize)
410 int chunksize;
411 int bzerror;
413 if (f->f_buf != NULL) {
414 if((f->f_bufend - f->f_bufptr) >= 1)
415 return 0;
416 else
417 Util_DropReadAhead(f);
419 if (f->mode == MODE_READ_EOF) {
420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
427 Py_BEGIN_ALLOW_THREADS
428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
430 Py_END_ALLOW_THREADS
431 f->pos += chunksize;
432 if (bzerror == BZ_STREAM_END) {
433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
437 Util_DropReadAhead(f);
438 return -1;
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
445 /* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447 static PyStringObject *
448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
455 if (f->f_buf == NULL)
456 if (Util_ReadAhead(f, bufsize) < 0)
457 return NULL;
459 len = f->f_bufend - f->f_bufptr;
460 if (len == 0)
461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
469 if (s == NULL)
470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
474 Util_DropReadAhead(f);
475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
488 return s;
491 /* ===================================================================== */
492 /* Methods of BZ2File. */
494 PyDoc_STRVAR(BZ2File_read__doc__,
495 "read([size]) -> string\n\
497 Read at most size uncompressed bytes, returned as a string. If the size\n\
498 argument is negative or omitted, read until EOF is reached.\n\
501 /* This is a hacked version of Python's fileobject.c:file_read(). */
502 static PyObject *
503 BZ2File_read(BZ2FileObject *self, PyObject *args)
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
536 "requested number of bytes is "
537 "more than a Python string can hold");
538 goto cleanup;
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
575 cleanup:
576 RELEASE_LOCK(self);
577 return ret;
580 PyDoc_STRVAR(BZ2File_readline__doc__,
581 "readline([size]) -> string\n\
583 Return the next line from the file, as a string, retaining newline.\n\
584 A non-negative size argument will limit the maximum number of bytes to\n\
585 return (an incomplete line may be returned then). Return an empty\n\
586 string at EOF.\n\
589 static PyObject *
590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
592 PyObject *ret = NULL;
593 int sizehint = -1;
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
620 cleanup:
621 RELEASE_LOCK(self);
622 return ret;
625 PyDoc_STRVAR(BZ2File_readlines__doc__,
626 "readlines([size]) -> list\n\
628 Call readline() repeatedly and return a list of lines read.\n\
629 The optional size argument, if given, is an approximate bound on the\n\
630 total number of bytes in the lines returned.\n\
633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
634 static PyObject *
635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
699 if (!shortread && p == NULL) {
700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
705 "line is longer than a Python string can hold");
706 goto error;
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
722 continue;
724 end = buffer+nfilled+nread;
725 q = buffer;
726 while (p != NULL) {
727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
778 return list;
781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782 "xreadlines() -> self\n\
784 For backward compatibility. BZ2File objects now include the performance\n\
785 optimizations previously implemented in the xreadlines module.\n\
788 PyDoc_STRVAR(BZ2File_write__doc__,
789 "write(data) -> None\n\
791 Write the 'data' string to file. Note that due to buffering, close() may\n\
792 be needed before the file on disk reflects the data written.\n\
795 /* This is a hacked version of Python's fileobject.c:file_write(). */
796 static PyObject *
797 BZ2File_write(BZ2FileObject *self, PyObject *args)
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
804 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
805 return NULL;
807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
823 self->f_softspace = 0;
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
835 Py_INCREF(Py_None);
836 ret = Py_None;
838 cleanup:
839 RELEASE_LOCK(self);
840 return ret;
843 PyDoc_STRVAR(BZ2File_writelines__doc__,
844 "writelines(sequence_of_strings) -> None\n\
846 Write the sequence of strings to the file. Note that newlines are not\n\
847 added. The sequence can be any iterable object producing strings. This is\n\
848 equivalent to calling write() for each string.\n\
851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
852 static PyObject *
853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
855 #define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
896 PyList_SetItem(list, j, line);
899 if (j == 0)
900 break;
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
911 Py_ssize_t len;
912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
929 self->f_softspace = 0;
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
945 Py_END_ALLOW_THREADS
947 if (j < CHUNKSIZE)
948 break;
951 Py_INCREF(Py_None);
952 ret = Py_None;
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959 #undef CHUNKSIZE
962 PyDoc_STRVAR(BZ2File_seek__doc__,
963 "seek(offset [, whence]) -> None\n\
965 Move to new file position. Argument offset is a byte count. Optional\n\
966 argument whence defaults to 0 (offset from start of file, offset\n\
967 should be >= 0); other values are 1 (move relative to current position,\n\
968 positive or negative), and 2 (move relative to end of file, usually\n\
969 negative, although many platforms allow seeking beyond the end of a file).\n\
971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972 the operation may be extremely slow.\n\
975 static PyObject *
976 BZ2File_seek(BZ2FileObject *self, PyObject *args)
978 int where = 0;
979 PyObject *offobj;
980 Py_off_t offset;
981 char small_buffer[SMALLCHUNK];
982 char *buffer = small_buffer;
983 size_t buffersize = SMALLCHUNK;
984 int bytesread = 0;
985 size_t readsize;
986 int chunksize;
987 int bzerror;
988 PyObject *ret = NULL;
990 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
991 return NULL;
992 #if !defined(HAVE_LARGEFILE_SUPPORT)
993 offset = PyInt_AsLong(offobj);
994 #else
995 offset = PyLong_Check(offobj) ?
996 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
997 #endif
998 if (PyErr_Occurred())
999 return NULL;
1001 ACQUIRE_LOCK(self);
1002 Util_DropReadAhead(self);
1003 switch (self->mode) {
1004 case MODE_READ:
1005 case MODE_READ_EOF:
1006 break;
1008 case MODE_CLOSED:
1009 PyErr_SetString(PyExc_ValueError,
1010 "I/O operation on closed file");
1011 goto cleanup;;
1013 default:
1014 PyErr_SetString(PyExc_IOError,
1015 "seek works only while reading");
1016 goto cleanup;;
1019 if (where == 2) {
1020 if (self->size == -1) {
1021 assert(self->mode != MODE_READ_EOF);
1022 for (;;) {
1023 Py_BEGIN_ALLOW_THREADS
1024 chunksize = Util_UnivNewlineRead(
1025 &bzerror, self->fp,
1026 buffer, buffersize,
1027 self);
1028 self->pos += chunksize;
1029 Py_END_ALLOW_THREADS
1031 bytesread += chunksize;
1032 if (bzerror == BZ_STREAM_END) {
1033 break;
1034 } else if (bzerror != BZ_OK) {
1035 Util_CatchBZ2Error(bzerror);
1036 goto cleanup;
1039 self->mode = MODE_READ_EOF;
1040 self->size = self->pos;
1041 bytesread = 0;
1043 offset = self->size + offset;
1044 } else if (where == 1) {
1045 offset = self->pos + offset;
1048 /* Before getting here, offset must be the absolute position the file
1049 * pointer should be set to. */
1051 if (offset >= self->pos) {
1052 /* we can move forward */
1053 offset -= self->pos;
1054 } else {
1055 /* we cannot move back, so rewind the stream */
1056 BZ2_bzReadClose(&bzerror, self->fp);
1057 if (bzerror != BZ_OK) {
1058 Util_CatchBZ2Error(bzerror);
1059 goto cleanup;
1061 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1062 if (!ret)
1063 goto cleanup;
1064 Py_DECREF(ret);
1065 ret = NULL;
1066 self->pos = 0;
1067 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1068 0, 0, NULL, 0);
1069 if (bzerror != BZ_OK) {
1070 Util_CatchBZ2Error(bzerror);
1071 goto cleanup;
1073 self->mode = MODE_READ;
1076 if (offset <= 0 || self->mode == MODE_READ_EOF)
1077 goto exit;
1079 /* Before getting here, offset must be set to the number of bytes
1080 * to walk forward. */
1081 for (;;) {
1082 if (offset-bytesread > buffersize)
1083 readsize = buffersize;
1084 else
1085 /* offset might be wider that readsize, but the result
1086 * of the subtraction is bound by buffersize (see the
1087 * condition above). buffersize is 8192. */
1088 readsize = (size_t)(offset-bytesread);
1089 Py_BEGIN_ALLOW_THREADS
1090 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1091 buffer, readsize, self);
1092 self->pos += chunksize;
1093 Py_END_ALLOW_THREADS
1094 bytesread += chunksize;
1095 if (bzerror == BZ_STREAM_END) {
1096 self->size = self->pos;
1097 self->mode = MODE_READ_EOF;
1098 break;
1099 } else if (bzerror != BZ_OK) {
1100 Util_CatchBZ2Error(bzerror);
1101 goto cleanup;
1103 if (bytesread == offset)
1104 break;
1107 exit:
1108 Py_INCREF(Py_None);
1109 ret = Py_None;
1111 cleanup:
1112 RELEASE_LOCK(self);
1113 return ret;
1116 PyDoc_STRVAR(BZ2File_tell__doc__,
1117 "tell() -> int\n\
1119 Return the current file position, an integer (may be a long integer).\n\
1122 static PyObject *
1123 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1125 PyObject *ret = NULL;
1127 if (self->mode == MODE_CLOSED) {
1128 PyErr_SetString(PyExc_ValueError,
1129 "I/O operation on closed file");
1130 goto cleanup;
1133 #if !defined(HAVE_LARGEFILE_SUPPORT)
1134 ret = PyInt_FromLong(self->pos);
1135 #else
1136 ret = PyLong_FromLongLong(self->pos);
1137 #endif
1139 cleanup:
1140 return ret;
1143 PyDoc_STRVAR(BZ2File_close__doc__,
1144 "close() -> None or (perhaps) an integer\n\
1146 Close the file. Sets data attribute .closed to true. A closed file\n\
1147 cannot be used for further I/O operations. close() may be called more\n\
1148 than once without error.\n\
1151 static PyObject *
1152 BZ2File_close(BZ2FileObject *self)
1154 PyObject *ret = NULL;
1155 int bzerror = BZ_OK;
1157 ACQUIRE_LOCK(self);
1158 switch (self->mode) {
1159 case MODE_READ:
1160 case MODE_READ_EOF:
1161 BZ2_bzReadClose(&bzerror, self->fp);
1162 break;
1163 case MODE_WRITE:
1164 BZ2_bzWriteClose(&bzerror, self->fp,
1165 0, NULL, NULL);
1166 break;
1168 self->mode = MODE_CLOSED;
1169 ret = PyObject_CallMethod(self->file, "close", NULL);
1170 if (bzerror != BZ_OK) {
1171 Util_CatchBZ2Error(bzerror);
1172 Py_XDECREF(ret);
1173 ret = NULL;
1176 RELEASE_LOCK(self);
1177 return ret;
1180 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1182 static PyMethodDef BZ2File_methods[] = {
1183 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1184 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1185 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1186 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1187 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1188 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1189 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1190 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1191 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1192 {NULL, NULL} /* sentinel */
1196 /* ===================================================================== */
1197 /* Getters and setters of BZ2File. */
1199 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1200 static PyObject *
1201 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1203 switch (self->f_newlinetypes) {
1204 case NEWLINE_UNKNOWN:
1205 Py_INCREF(Py_None);
1206 return Py_None;
1207 case NEWLINE_CR:
1208 return PyString_FromString("\r");
1209 case NEWLINE_LF:
1210 return PyString_FromString("\n");
1211 case NEWLINE_CR|NEWLINE_LF:
1212 return Py_BuildValue("(ss)", "\r", "\n");
1213 case NEWLINE_CRLF:
1214 return PyString_FromString("\r\n");
1215 case NEWLINE_CR|NEWLINE_CRLF:
1216 return Py_BuildValue("(ss)", "\r", "\r\n");
1217 case NEWLINE_LF|NEWLINE_CRLF:
1218 return Py_BuildValue("(ss)", "\n", "\r\n");
1219 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1220 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1221 default:
1222 PyErr_Format(PyExc_SystemError,
1223 "Unknown newlines value 0x%x\n",
1224 self->f_newlinetypes);
1225 return NULL;
1229 static PyObject *
1230 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1232 return PyInt_FromLong(self->mode == MODE_CLOSED);
1235 static PyObject *
1236 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1238 return PyObject_GetAttrString(self->file, "mode");
1241 static PyObject *
1242 BZ2File_get_name(BZ2FileObject *self, void *closure)
1244 return PyObject_GetAttrString(self->file, "name");
1247 static PyGetSetDef BZ2File_getset[] = {
1248 {"closed", (getter)BZ2File_get_closed, NULL,
1249 "True if the file is closed"},
1250 {"newlines", (getter)BZ2File_get_newlines, NULL,
1251 "end-of-line convention used in this file"},
1252 {"mode", (getter)BZ2File_get_mode, NULL,
1253 "file mode ('r', 'w', or 'U')"},
1254 {"name", (getter)BZ2File_get_name, NULL,
1255 "file name"},
1256 {NULL} /* Sentinel */
1260 /* ===================================================================== */
1261 /* Members of BZ2File_Type. */
1263 #undef OFF
1264 #define OFF(x) offsetof(BZ2FileObject, x)
1266 static PyMemberDef BZ2File_members[] = {
1267 {"softspace", T_INT, OFF(f_softspace), 0,
1268 "flag indicating that a space needs to be printed; used by print"},
1269 {NULL} /* Sentinel */
1272 /* ===================================================================== */
1273 /* Slot definitions for BZ2File_Type. */
1275 static int
1276 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1278 static char *kwlist[] = {"filename", "mode", "buffering",
1279 "compresslevel", 0};
1280 PyObject *name;
1281 char *mode = "r";
1282 int buffering = -1;
1283 int compresslevel = 9;
1284 int bzerror;
1285 int mode_char = 0;
1287 self->size = -1;
1289 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1290 kwlist, &name, &mode, &buffering,
1291 &compresslevel))
1292 return -1;
1294 if (compresslevel < 1 || compresslevel > 9) {
1295 PyErr_SetString(PyExc_ValueError,
1296 "compresslevel must be between 1 and 9");
1297 return -1;
1300 for (;;) {
1301 int error = 0;
1302 switch (*mode) {
1303 case 'r':
1304 case 'w':
1305 if (mode_char)
1306 error = 1;
1307 mode_char = *mode;
1308 break;
1310 case 'b':
1311 break;
1313 case 'U':
1314 #ifdef __VMS
1315 self->f_univ_newline = 0;
1316 #else
1317 self->f_univ_newline = 1;
1318 #endif
1319 break;
1321 default:
1322 error = 1;
1323 break;
1325 if (error) {
1326 PyErr_Format(PyExc_ValueError,
1327 "invalid mode char %c", *mode);
1328 return -1;
1330 mode++;
1331 if (*mode == '\0')
1332 break;
1335 if (mode_char == 0) {
1336 mode_char = 'r';
1339 mode = (mode_char == 'r') ? "rb" : "wb";
1341 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1342 name, mode, buffering);
1343 if (self->file == NULL)
1344 return -1;
1346 /* From now on, we have stuff to dealloc, so jump to error label
1347 * instead of returning */
1349 #ifdef WITH_THREAD
1350 self->lock = PyThread_allocate_lock();
1351 if (!self->lock) {
1352 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1353 goto error;
1355 #endif
1357 if (mode_char == 'r')
1358 self->fp = BZ2_bzReadOpen(&bzerror,
1359 PyFile_AsFile(self->file),
1360 0, 0, NULL, 0);
1361 else
1362 self->fp = BZ2_bzWriteOpen(&bzerror,
1363 PyFile_AsFile(self->file),
1364 compresslevel, 0, 0);
1366 if (bzerror != BZ_OK) {
1367 Util_CatchBZ2Error(bzerror);
1368 goto error;
1371 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1373 return 0;
1375 error:
1376 Py_CLEAR(self->file);
1377 #ifdef WITH_THREAD
1378 if (self->lock) {
1379 PyThread_free_lock(self->lock);
1380 self->lock = NULL;
1382 #endif
1383 return -1;
1386 static void
1387 BZ2File_dealloc(BZ2FileObject *self)
1389 int bzerror;
1390 #ifdef WITH_THREAD
1391 if (self->lock)
1392 PyThread_free_lock(self->lock);
1393 #endif
1394 switch (self->mode) {
1395 case MODE_READ:
1396 case MODE_READ_EOF:
1397 BZ2_bzReadClose(&bzerror, self->fp);
1398 break;
1399 case MODE_WRITE:
1400 BZ2_bzWriteClose(&bzerror, self->fp,
1401 0, NULL, NULL);
1402 break;
1404 Util_DropReadAhead(self);
1405 Py_XDECREF(self->file);
1406 self->ob_type->tp_free((PyObject *)self);
1409 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1410 static PyObject *
1411 BZ2File_getiter(BZ2FileObject *self)
1413 if (self->mode == MODE_CLOSED) {
1414 PyErr_SetString(PyExc_ValueError,
1415 "I/O operation on closed file");
1416 return NULL;
1418 Py_INCREF((PyObject*)self);
1419 return (PyObject *)self;
1422 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1423 #define READAHEAD_BUFSIZE 8192
1424 static PyObject *
1425 BZ2File_iternext(BZ2FileObject *self)
1427 PyStringObject* ret;
1428 ACQUIRE_LOCK(self);
1429 if (self->mode == MODE_CLOSED) {
1430 PyErr_SetString(PyExc_ValueError,
1431 "I/O operation on closed file");
1432 return NULL;
1434 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1435 RELEASE_LOCK(self);
1436 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1437 Py_XDECREF(ret);
1438 return NULL;
1440 return (PyObject *)ret;
1443 /* ===================================================================== */
1444 /* BZ2File_Type definition. */
1446 PyDoc_VAR(BZ2File__doc__) =
1447 PyDoc_STR(
1448 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1450 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1451 writing. When opened for writing, the file will be created if it doesn't\n\
1452 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1453 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1454 is given, must be a number between 1 and 9.\n\
1456 PyDoc_STR(
1457 "\n\
1458 Add a 'U' to mode to open the file for input with universal newline\n\
1459 support. Any line ending in the input file will be seen as a '\\n' in\n\
1460 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1461 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1462 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1463 newlines are available only when reading.\n\
1467 static PyTypeObject BZ2File_Type = {
1468 PyObject_HEAD_INIT(NULL)
1469 0, /*ob_size*/
1470 "bz2.BZ2File", /*tp_name*/
1471 sizeof(BZ2FileObject), /*tp_basicsize*/
1472 0, /*tp_itemsize*/
1473 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1474 0, /*tp_print*/
1475 0, /*tp_getattr*/
1476 0, /*tp_setattr*/
1477 0, /*tp_compare*/
1478 0, /*tp_repr*/
1479 0, /*tp_as_number*/
1480 0, /*tp_as_sequence*/
1481 0, /*tp_as_mapping*/
1482 0, /*tp_hash*/
1483 0, /*tp_call*/
1484 0, /*tp_str*/
1485 PyObject_GenericGetAttr,/*tp_getattro*/
1486 PyObject_GenericSetAttr,/*tp_setattro*/
1487 0, /*tp_as_buffer*/
1488 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1489 BZ2File__doc__, /*tp_doc*/
1490 0, /*tp_traverse*/
1491 0, /*tp_clear*/
1492 0, /*tp_richcompare*/
1493 0, /*tp_weaklistoffset*/
1494 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1495 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1496 BZ2File_methods, /*tp_methods*/
1497 BZ2File_members, /*tp_members*/
1498 BZ2File_getset, /*tp_getset*/
1499 0, /*tp_base*/
1500 0, /*tp_dict*/
1501 0, /*tp_descr_get*/
1502 0, /*tp_descr_set*/
1503 0, /*tp_dictoffset*/
1504 (initproc)BZ2File_init, /*tp_init*/
1505 PyType_GenericAlloc, /*tp_alloc*/
1506 PyType_GenericNew, /*tp_new*/
1507 _PyObject_Del, /*tp_free*/
1508 0, /*tp_is_gc*/
1512 /* ===================================================================== */
1513 /* Methods of BZ2Comp. */
1515 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1516 "compress(data) -> string\n\
1518 Provide more data to the compressor object. It will return chunks of\n\
1519 compressed data whenever possible. When you've finished providing data\n\
1520 to compress, call the flush() method to finish the compression process,\n\
1521 and return what is left in the internal buffers.\n\
1524 static PyObject *
1525 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1527 char *data;
1528 int datasize;
1529 int bufsize = SMALLCHUNK;
1530 PY_LONG_LONG totalout;
1531 PyObject *ret = NULL;
1532 bz_stream *bzs = &self->bzs;
1533 int bzerror;
1535 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1536 return NULL;
1538 if (datasize == 0)
1539 return PyString_FromString("");
1541 ACQUIRE_LOCK(self);
1542 if (!self->running) {
1543 PyErr_SetString(PyExc_ValueError,
1544 "this object was already flushed");
1545 goto error;
1548 ret = PyString_FromStringAndSize(NULL, bufsize);
1549 if (!ret)
1550 goto error;
1552 bzs->next_in = data;
1553 bzs->avail_in = datasize;
1554 bzs->next_out = BUF(ret);
1555 bzs->avail_out = bufsize;
1557 totalout = BZS_TOTAL_OUT(bzs);
1559 for (;;) {
1560 Py_BEGIN_ALLOW_THREADS
1561 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1562 Py_END_ALLOW_THREADS
1563 if (bzerror != BZ_RUN_OK) {
1564 Util_CatchBZ2Error(bzerror);
1565 goto error;
1567 if (bzs->avail_out == 0) {
1568 bufsize = Util_NewBufferSize(bufsize);
1569 if (_PyString_Resize(&ret, bufsize) < 0) {
1570 BZ2_bzCompressEnd(bzs);
1571 goto error;
1573 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1574 - totalout);
1575 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1576 } else if (bzs->avail_in == 0) {
1577 break;
1581 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1583 RELEASE_LOCK(self);
1584 return ret;
1586 error:
1587 RELEASE_LOCK(self);
1588 Py_XDECREF(ret);
1589 return NULL;
1592 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1593 "flush() -> string\n\
1595 Finish the compression process and return what is left in internal buffers.\n\
1596 You must not use the compressor object after calling this method.\n\
1599 static PyObject *
1600 BZ2Comp_flush(BZ2CompObject *self)
1602 int bufsize = SMALLCHUNK;
1603 PyObject *ret = NULL;
1604 bz_stream *bzs = &self->bzs;
1605 PY_LONG_LONG totalout;
1606 int bzerror;
1608 ACQUIRE_LOCK(self);
1609 if (!self->running) {
1610 PyErr_SetString(PyExc_ValueError, "object was already "
1611 "flushed");
1612 goto error;
1614 self->running = 0;
1616 ret = PyString_FromStringAndSize(NULL, bufsize);
1617 if (!ret)
1618 goto error;
1620 bzs->next_out = BUF(ret);
1621 bzs->avail_out = bufsize;
1623 totalout = BZS_TOTAL_OUT(bzs);
1625 for (;;) {
1626 Py_BEGIN_ALLOW_THREADS
1627 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1628 Py_END_ALLOW_THREADS
1629 if (bzerror == BZ_STREAM_END) {
1630 break;
1631 } else if (bzerror != BZ_FINISH_OK) {
1632 Util_CatchBZ2Error(bzerror);
1633 goto error;
1635 if (bzs->avail_out == 0) {
1636 bufsize = Util_NewBufferSize(bufsize);
1637 if (_PyString_Resize(&ret, bufsize) < 0)
1638 goto error;
1639 bzs->next_out = BUF(ret);
1640 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1641 - totalout);
1642 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1646 if (bzs->avail_out != 0)
1647 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1649 RELEASE_LOCK(self);
1650 return ret;
1652 error:
1653 RELEASE_LOCK(self);
1654 Py_XDECREF(ret);
1655 return NULL;
1658 static PyMethodDef BZ2Comp_methods[] = {
1659 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1660 BZ2Comp_compress__doc__},
1661 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1662 BZ2Comp_flush__doc__},
1663 {NULL, NULL} /* sentinel */
1667 /* ===================================================================== */
1668 /* Slot definitions for BZ2Comp_Type. */
1670 static int
1671 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1673 int compresslevel = 9;
1674 int bzerror;
1675 static char *kwlist[] = {"compresslevel", 0};
1677 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1678 kwlist, &compresslevel))
1679 return -1;
1681 if (compresslevel < 1 || compresslevel > 9) {
1682 PyErr_SetString(PyExc_ValueError,
1683 "compresslevel must be between 1 and 9");
1684 goto error;
1687 #ifdef WITH_THREAD
1688 self->lock = PyThread_allocate_lock();
1689 if (!self->lock) {
1690 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1691 goto error;
1693 #endif
1695 memset(&self->bzs, 0, sizeof(bz_stream));
1696 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1697 if (bzerror != BZ_OK) {
1698 Util_CatchBZ2Error(bzerror);
1699 goto error;
1702 self->running = 1;
1704 return 0;
1705 error:
1706 #ifdef WITH_THREAD
1707 if (self->lock) {
1708 PyThread_free_lock(self->lock);
1709 self->lock = NULL;
1711 #endif
1712 return -1;
1715 static void
1716 BZ2Comp_dealloc(BZ2CompObject *self)
1718 #ifdef WITH_THREAD
1719 if (self->lock)
1720 PyThread_free_lock(self->lock);
1721 #endif
1722 BZ2_bzCompressEnd(&self->bzs);
1723 self->ob_type->tp_free((PyObject *)self);
1727 /* ===================================================================== */
1728 /* BZ2Comp_Type definition. */
1730 PyDoc_STRVAR(BZ2Comp__doc__,
1731 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1733 Create a new compressor object. This object may be used to compress\n\
1734 data sequentially. If you want to compress data in one shot, use the\n\
1735 compress() function instead. The compresslevel parameter, if given,\n\
1736 must be a number between 1 and 9.\n\
1739 static PyTypeObject BZ2Comp_Type = {
1740 PyObject_HEAD_INIT(NULL)
1741 0, /*ob_size*/
1742 "bz2.BZ2Compressor", /*tp_name*/
1743 sizeof(BZ2CompObject), /*tp_basicsize*/
1744 0, /*tp_itemsize*/
1745 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1746 0, /*tp_print*/
1747 0, /*tp_getattr*/
1748 0, /*tp_setattr*/
1749 0, /*tp_compare*/
1750 0, /*tp_repr*/
1751 0, /*tp_as_number*/
1752 0, /*tp_as_sequence*/
1753 0, /*tp_as_mapping*/
1754 0, /*tp_hash*/
1755 0, /*tp_call*/
1756 0, /*tp_str*/
1757 PyObject_GenericGetAttr,/*tp_getattro*/
1758 PyObject_GenericSetAttr,/*tp_setattro*/
1759 0, /*tp_as_buffer*/
1760 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1761 BZ2Comp__doc__, /*tp_doc*/
1762 0, /*tp_traverse*/
1763 0, /*tp_clear*/
1764 0, /*tp_richcompare*/
1765 0, /*tp_weaklistoffset*/
1766 0, /*tp_iter*/
1767 0, /*tp_iternext*/
1768 BZ2Comp_methods, /*tp_methods*/
1769 0, /*tp_members*/
1770 0, /*tp_getset*/
1771 0, /*tp_base*/
1772 0, /*tp_dict*/
1773 0, /*tp_descr_get*/
1774 0, /*tp_descr_set*/
1775 0, /*tp_dictoffset*/
1776 (initproc)BZ2Comp_init, /*tp_init*/
1777 PyType_GenericAlloc, /*tp_alloc*/
1778 PyType_GenericNew, /*tp_new*/
1779 _PyObject_Del, /*tp_free*/
1780 0, /*tp_is_gc*/
1784 /* ===================================================================== */
1785 /* Members of BZ2Decomp. */
1787 #undef OFF
1788 #define OFF(x) offsetof(BZ2DecompObject, x)
1790 static PyMemberDef BZ2Decomp_members[] = {
1791 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1792 {NULL} /* Sentinel */
1796 /* ===================================================================== */
1797 /* Methods of BZ2Decomp. */
1799 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1800 "decompress(data) -> string\n\
1802 Provide more data to the decompressor object. It will return chunks\n\
1803 of decompressed data whenever possible. If you try to decompress data\n\
1804 after the end of stream is found, EOFError will be raised. If any data\n\
1805 was found after the end of stream, it'll be ignored and saved in\n\
1806 unused_data attribute.\n\
1809 static PyObject *
1810 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1812 char *data;
1813 int datasize;
1814 int bufsize = SMALLCHUNK;
1815 PY_LONG_LONG totalout;
1816 PyObject *ret = NULL;
1817 bz_stream *bzs = &self->bzs;
1818 int bzerror;
1820 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1821 return NULL;
1823 ACQUIRE_LOCK(self);
1824 if (!self->running) {
1825 PyErr_SetString(PyExc_EOFError, "end of stream was "
1826 "already found");
1827 goto error;
1830 ret = PyString_FromStringAndSize(NULL, bufsize);
1831 if (!ret)
1832 goto error;
1834 bzs->next_in = data;
1835 bzs->avail_in = datasize;
1836 bzs->next_out = BUF(ret);
1837 bzs->avail_out = bufsize;
1839 totalout = BZS_TOTAL_OUT(bzs);
1841 for (;;) {
1842 Py_BEGIN_ALLOW_THREADS
1843 bzerror = BZ2_bzDecompress(bzs);
1844 Py_END_ALLOW_THREADS
1845 if (bzerror == BZ_STREAM_END) {
1846 if (bzs->avail_in != 0) {
1847 Py_DECREF(self->unused_data);
1848 self->unused_data =
1849 PyString_FromStringAndSize(bzs->next_in,
1850 bzs->avail_in);
1852 self->running = 0;
1853 break;
1855 if (bzerror != BZ_OK) {
1856 Util_CatchBZ2Error(bzerror);
1857 goto error;
1859 if (bzs->avail_out == 0) {
1860 bufsize = Util_NewBufferSize(bufsize);
1861 if (_PyString_Resize(&ret, bufsize) < 0) {
1862 BZ2_bzDecompressEnd(bzs);
1863 goto error;
1865 bzs->next_out = BUF(ret);
1866 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1867 - totalout);
1868 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1869 } else if (bzs->avail_in == 0) {
1870 break;
1874 if (bzs->avail_out != 0)
1875 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1877 RELEASE_LOCK(self);
1878 return ret;
1880 error:
1881 RELEASE_LOCK(self);
1882 Py_XDECREF(ret);
1883 return NULL;
1886 static PyMethodDef BZ2Decomp_methods[] = {
1887 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1888 {NULL, NULL} /* sentinel */
1892 /* ===================================================================== */
1893 /* Slot definitions for BZ2Decomp_Type. */
1895 static int
1896 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1898 int bzerror;
1900 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1901 return -1;
1903 #ifdef WITH_THREAD
1904 self->lock = PyThread_allocate_lock();
1905 if (!self->lock) {
1906 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1907 goto error;
1909 #endif
1911 self->unused_data = PyString_FromString("");
1912 if (!self->unused_data)
1913 goto error;
1915 memset(&self->bzs, 0, sizeof(bz_stream));
1916 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1917 if (bzerror != BZ_OK) {
1918 Util_CatchBZ2Error(bzerror);
1919 goto error;
1922 self->running = 1;
1924 return 0;
1926 error:
1927 #ifdef WITH_THREAD
1928 if (self->lock) {
1929 PyThread_free_lock(self->lock);
1930 self->lock = NULL;
1932 #endif
1933 Py_CLEAR(self->unused_data);
1934 return -1;
1937 static void
1938 BZ2Decomp_dealloc(BZ2DecompObject *self)
1940 #ifdef WITH_THREAD
1941 if (self->lock)
1942 PyThread_free_lock(self->lock);
1943 #endif
1944 Py_XDECREF(self->unused_data);
1945 BZ2_bzDecompressEnd(&self->bzs);
1946 self->ob_type->tp_free((PyObject *)self);
1950 /* ===================================================================== */
1951 /* BZ2Decomp_Type definition. */
1953 PyDoc_STRVAR(BZ2Decomp__doc__,
1954 "BZ2Decompressor() -> decompressor object\n\
1956 Create a new decompressor object. This object may be used to decompress\n\
1957 data sequentially. If you want to decompress data in one shot, use the\n\
1958 decompress() function instead.\n\
1961 static PyTypeObject BZ2Decomp_Type = {
1962 PyObject_HEAD_INIT(NULL)
1963 0, /*ob_size*/
1964 "bz2.BZ2Decompressor", /*tp_name*/
1965 sizeof(BZ2DecompObject), /*tp_basicsize*/
1966 0, /*tp_itemsize*/
1967 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1968 0, /*tp_print*/
1969 0, /*tp_getattr*/
1970 0, /*tp_setattr*/
1971 0, /*tp_compare*/
1972 0, /*tp_repr*/
1973 0, /*tp_as_number*/
1974 0, /*tp_as_sequence*/
1975 0, /*tp_as_mapping*/
1976 0, /*tp_hash*/
1977 0, /*tp_call*/
1978 0, /*tp_str*/
1979 PyObject_GenericGetAttr,/*tp_getattro*/
1980 PyObject_GenericSetAttr,/*tp_setattro*/
1981 0, /*tp_as_buffer*/
1982 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1983 BZ2Decomp__doc__, /*tp_doc*/
1984 0, /*tp_traverse*/
1985 0, /*tp_clear*/
1986 0, /*tp_richcompare*/
1987 0, /*tp_weaklistoffset*/
1988 0, /*tp_iter*/
1989 0, /*tp_iternext*/
1990 BZ2Decomp_methods, /*tp_methods*/
1991 BZ2Decomp_members, /*tp_members*/
1992 0, /*tp_getset*/
1993 0, /*tp_base*/
1994 0, /*tp_dict*/
1995 0, /*tp_descr_get*/
1996 0, /*tp_descr_set*/
1997 0, /*tp_dictoffset*/
1998 (initproc)BZ2Decomp_init, /*tp_init*/
1999 PyType_GenericAlloc, /*tp_alloc*/
2000 PyType_GenericNew, /*tp_new*/
2001 _PyObject_Del, /*tp_free*/
2002 0, /*tp_is_gc*/
2006 /* ===================================================================== */
2007 /* Module functions. */
2009 PyDoc_STRVAR(bz2_compress__doc__,
2010 "compress(data [, compresslevel=9]) -> string\n\
2012 Compress data in one shot. If you want to compress data sequentially,\n\
2013 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2014 given, must be a number between 1 and 9.\n\
2017 static PyObject *
2018 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2020 int compresslevel=9;
2021 char *data;
2022 int datasize;
2023 int bufsize;
2024 PyObject *ret = NULL;
2025 bz_stream _bzs;
2026 bz_stream *bzs = &_bzs;
2027 int bzerror;
2028 static char *kwlist[] = {"data", "compresslevel", 0};
2030 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2031 kwlist, &data, &datasize,
2032 &compresslevel))
2033 return NULL;
2035 if (compresslevel < 1 || compresslevel > 9) {
2036 PyErr_SetString(PyExc_ValueError,
2037 "compresslevel must be between 1 and 9");
2038 return NULL;
2041 /* Conforming to bz2 manual, this is large enough to fit compressed
2042 * data in one shot. We will check it later anyway. */
2043 bufsize = datasize + (datasize/100+1) + 600;
2045 ret = PyString_FromStringAndSize(NULL, bufsize);
2046 if (!ret)
2047 return NULL;
2049 memset(bzs, 0, sizeof(bz_stream));
2051 bzs->next_in = data;
2052 bzs->avail_in = datasize;
2053 bzs->next_out = BUF(ret);
2054 bzs->avail_out = bufsize;
2056 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2057 if (bzerror != BZ_OK) {
2058 Util_CatchBZ2Error(bzerror);
2059 Py_DECREF(ret);
2060 return NULL;
2063 for (;;) {
2064 Py_BEGIN_ALLOW_THREADS
2065 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2066 Py_END_ALLOW_THREADS
2067 if (bzerror == BZ_STREAM_END) {
2068 break;
2069 } else if (bzerror != BZ_FINISH_OK) {
2070 BZ2_bzCompressEnd(bzs);
2071 Util_CatchBZ2Error(bzerror);
2072 Py_DECREF(ret);
2073 return NULL;
2075 if (bzs->avail_out == 0) {
2076 bufsize = Util_NewBufferSize(bufsize);
2077 if (_PyString_Resize(&ret, bufsize) < 0) {
2078 BZ2_bzCompressEnd(bzs);
2079 Py_DECREF(ret);
2080 return NULL;
2082 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2083 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2087 if (bzs->avail_out != 0)
2088 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2089 BZ2_bzCompressEnd(bzs);
2091 return ret;
2094 PyDoc_STRVAR(bz2_decompress__doc__,
2095 "decompress(data) -> decompressed data\n\
2097 Decompress data in one shot. If you want to decompress data sequentially,\n\
2098 use an instance of BZ2Decompressor instead.\n\
2101 static PyObject *
2102 bz2_decompress(PyObject *self, PyObject *args)
2104 char *data;
2105 int datasize;
2106 int bufsize = SMALLCHUNK;
2107 PyObject *ret;
2108 bz_stream _bzs;
2109 bz_stream *bzs = &_bzs;
2110 int bzerror;
2112 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2113 return NULL;
2115 if (datasize == 0)
2116 return PyString_FromString("");
2118 ret = PyString_FromStringAndSize(NULL, bufsize);
2119 if (!ret)
2120 return NULL;
2122 memset(bzs, 0, sizeof(bz_stream));
2124 bzs->next_in = data;
2125 bzs->avail_in = datasize;
2126 bzs->next_out = BUF(ret);
2127 bzs->avail_out = bufsize;
2129 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2130 if (bzerror != BZ_OK) {
2131 Util_CatchBZ2Error(bzerror);
2132 Py_DECREF(ret);
2133 return NULL;
2136 for (;;) {
2137 Py_BEGIN_ALLOW_THREADS
2138 bzerror = BZ2_bzDecompress(bzs);
2139 Py_END_ALLOW_THREADS
2140 if (bzerror == BZ_STREAM_END) {
2141 break;
2142 } else if (bzerror != BZ_OK) {
2143 BZ2_bzDecompressEnd(bzs);
2144 Util_CatchBZ2Error(bzerror);
2145 Py_DECREF(ret);
2146 return NULL;
2148 if (bzs->avail_out == 0) {
2149 bufsize = Util_NewBufferSize(bufsize);
2150 if (_PyString_Resize(&ret, bufsize) < 0) {
2151 BZ2_bzDecompressEnd(bzs);
2152 Py_DECREF(ret);
2153 return NULL;
2155 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2156 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2157 } else if (bzs->avail_in == 0) {
2158 BZ2_bzDecompressEnd(bzs);
2159 PyErr_SetString(PyExc_ValueError,
2160 "couldn't find end of stream");
2161 Py_DECREF(ret);
2162 return NULL;
2166 if (bzs->avail_out != 0)
2167 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2168 BZ2_bzDecompressEnd(bzs);
2170 return ret;
2173 static PyMethodDef bz2_methods[] = {
2174 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2175 bz2_compress__doc__},
2176 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2177 bz2_decompress__doc__},
2178 {NULL, NULL} /* sentinel */
2181 /* ===================================================================== */
2182 /* Initialization function. */
2184 PyDoc_STRVAR(bz2__doc__,
2185 "The python bz2 module provides a comprehensive interface for\n\
2186 the bz2 compression library. It implements a complete file\n\
2187 interface, one shot (de)compression functions, and types for\n\
2188 sequential (de)compression.\n\
2191 PyMODINIT_FUNC
2192 initbz2(void)
2194 PyObject *m;
2196 BZ2File_Type.ob_type = &PyType_Type;
2197 BZ2Comp_Type.ob_type = &PyType_Type;
2198 BZ2Decomp_Type.ob_type = &PyType_Type;
2200 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2201 if (m == NULL)
2202 return;
2204 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2206 Py_INCREF(&BZ2File_Type);
2207 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2209 Py_INCREF(&BZ2Comp_Type);
2210 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2212 Py_INCREF(&BZ2Decomp_Type);
2213 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);