Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyBytes_AS_STRING(v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         FILE *rawfp;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         BZFILE *fp;
 106         int mode;
 107         Py_off_t pos;
 108         Py_off_t size;
 109 #ifdef WITH_THREAD
 110         PyThread_type_lock lock;
 111 #endif
 112 } BZ2FileObject;
 113
 114 typedef struct {
 115         PyObject_HEAD
 116         bz_stream bzs;
 117         int running;
 118 #ifdef WITH_THREAD
 119         PyThread_type_lock lock;
 120 #endif
 121 } BZ2CompObject;
 122
 123 typedef struct {
 124         PyObject_HEAD
 125         bz_stream bzs;
 126         int running;
 127         PyObject *unused_data;
 128 #ifdef WITH_THREAD
 129         PyThread_type_lock lock;
 130 #endif
 131 } BZ2DecompObject;
 132
 133 /* ===================================================================== */
 134 /* Utility functions. */
 135
 136 static int
 137 Util_CatchBZ2Error(int bzerror)
 138 {
 139         int ret = 0;
 140         switch(bzerror) {
 141                 case BZ_OK:
 142                 case BZ_STREAM_END:
 143                         break;
 144
 145 #ifdef BZ_CONFIG_ERROR
 146                 case BZ_CONFIG_ERROR:
 147                         PyErr_SetString(PyExc_SystemError,
 148                                         "the bz2 library was not compiled "
 149                                         "correctly");
 150                         ret = 1;
 151                         break;
 152 #endif
 153
 154                 case BZ_PARAM_ERROR:
 155                         PyErr_SetString(PyExc_ValueError,
 156                                         "the bz2 library has received wrong "
 157                                         "parameters");
 158                         ret = 1;
 159                         break;
 160
 161                 case BZ_MEM_ERROR:
 162                         PyErr_NoMemory();
 163                         ret = 1;
 164                         break;
 165
 166                 case BZ_DATA_ERROR:
 167                 case BZ_DATA_ERROR_MAGIC:
 168                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_IO_ERROR:
 173                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 174                         ret = 1;
 175                         break;
 176
 177                 case BZ_UNEXPECTED_EOF:
 178                         PyErr_SetString(PyExc_EOFError,
 179                                         "compressed file ended before the "
 180                                         "logical end-of-stream was detected");
 181                         ret = 1;
 182                         break;
 183
 184                 case BZ_SEQUENCE_ERROR:
 185                         PyErr_SetString(PyExc_RuntimeError,
 186                                         "wrong sequence of bz2 library "
 187                                         "commands used");
 188                         ret = 1;
 189                         break;
 190         }
 191         return ret;
 192 }
 193
 194 #if BUFSIZ < 8192
 195 #define SMALLCHUNK 8192
 196 #else
 197 #define SMALLCHUNK BUFSIZ
 198 #endif
 199
 200 #if SIZEOF_INT < 4
 201 #define BIGCHUNK  (512 * 32)
 202 #else
 203 #define BIGCHUNK  (512 * 1024)
 204 #endif
 205
 206 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 207 static size_t
 208 Util_NewBufferSize(size_t currentsize)
 209 {
 210         if (currentsize > SMALLCHUNK) {
 211                 /* Keep doubling until we reach BIGCHUNK;
 212                    then keep adding BIGCHUNK. */
 213                 if (currentsize <= BIGCHUNK)
 214                         return currentsize + currentsize;
 215                 else
 216                         return currentsize + BIGCHUNK;
 217         }
 218         return currentsize + SMALLCHUNK;
 219 }
 220
 221 /* This is a hacked version of Python's fileobject.c:get_line(). */
 222 static PyObject *
 223 Util_GetLine(BZ2FileObject *f, int n)
 224 {
 225         char c;
 226         char *buf, *end;
 227         size_t total_v_size;    /* total # of slots in buffer */
 228         size_t used_v_size;     /* # used slots in buffer */
 229         size_t increment;       /* amount to increment the buffer */
 230         PyObject *v;
 231         int bzerror;
 232         int bytes_read;
 233
 234         total_v_size = n > 0 ? n : 100;
 235         v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
 236         if (v == NULL)
 237                 return NULL;
 238
 239         buf = BUF(v);
 240         end = buf + total_v_size;
 241
 242         for (;;) {
 243                 Py_BEGIN_ALLOW_THREADS
 244                 do {
 245                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 246                         f->pos++;
 247                         if (bytes_read == 0)
 248                                 break;
 249                         *buf++ = c;
 250                 } while (bzerror == BZ_OK && c != '\n' && buf != end);
 251                 Py_END_ALLOW_THREADS
 252                 if (bzerror == BZ_STREAM_END) {
 253                         f->size = f->pos;
 254                         f->mode = MODE_READ_EOF;
 255                         break;
 256                 } else if (bzerror != BZ_OK) {
 257                         Util_CatchBZ2Error(bzerror);
 258                         Py_DECREF(v);
 259                         return NULL;
 260                 }
 261                 if (c == '\n')
 262                         break;
 263                 /* Must be because buf == end */
 264                 if (n > 0)
 265                         break;
 266                 used_v_size = total_v_size;
 267                 increment = total_v_size >> 2; /* mild exponential growth */
 268                 total_v_size += increment;
 269                 if (total_v_size > INT_MAX) {
 270                         PyErr_SetString(PyExc_OverflowError,
 271                             "line is longer than a Python string can hold");
 272                         Py_DECREF(v);
 273                         return NULL;
 274                 }
 275                 if (_PyBytes_Resize(&v, total_v_size) < 0) {
 276                         return NULL;
 277                 }
 278                 buf = BUF(v) + used_v_size;
 279                 end = BUF(v) + total_v_size;
 280         }
 281
 282         used_v_size = buf - BUF(v);
 283         if (used_v_size != total_v_size) {
 284                 if (_PyBytes_Resize(&v, used_v_size) < 0) {
 285                         v = NULL;
 286                 }
 287         }
 288         return v;
 289 }
 290
 291 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 292 static void
 293 Util_DropReadAhead(BZ2FileObject *f)
 294 {
 295         if (f->f_buf != NULL) {
 296                 PyMem_Free(f->f_buf);
 297                 f->f_buf = NULL;
 298         }
 299 }
 300
 301 /* This is a hacked version of Python's fileobject.c:readahead(). */
 302 static int
 303 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 304 {
 305         int chunksize;
 306         int bzerror;
 307
 308         if (f->f_buf != NULL) {
 309                 if((f->f_bufend - f->f_bufptr) >= 1)
 310                         return 0;
 311                 else
 312                         Util_DropReadAhead(f);
 313         }
 314         if (f->mode == MODE_READ_EOF) {
 315                 f->f_bufptr = f->f_buf;
 316                 f->f_bufend = f->f_buf;
 317                 return 0;
 318         }
 319         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 320                 PyErr_NoMemory();
 321                 return -1;
 322         }
 323         Py_BEGIN_ALLOW_THREADS
 324         chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
 325         Py_END_ALLOW_THREADS
 326         f->pos += chunksize;
 327         if (bzerror == BZ_STREAM_END) {
 328                 f->size = f->pos;
 329                 f->mode = MODE_READ_EOF;
 330         } else if (bzerror != BZ_OK) {
 331                 Util_CatchBZ2Error(bzerror);
 332                 Util_DropReadAhead(f);
 333                 return -1;
 334         }
 335         f->f_bufptr = f->f_buf;
 336         f->f_bufend = f->f_buf + chunksize;
 337         return 0;
 338 }
 339
 340 /* This is a hacked version of Python's
 341  * fileobject.c:readahead_get_line_skip(). */
 342 static PyBytesObject *
 343 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 344 {
 345         PyBytesObject* s;
 346         char *bufptr;
 347         char *buf;
 348         int len;
 349
 350         if (f->f_buf == NULL)
 351                 if (Util_ReadAhead(f, bufsize) < 0)
 352                         return NULL;
 353
 354         len = f->f_bufend - f->f_bufptr;
 355         if (len == 0)
 356                 return (PyBytesObject *)
 357                         PyBytes_FromStringAndSize(NULL, skip);
 358         bufptr = memchr(f->f_bufptr, '\n', len);
 359         if (bufptr != NULL) {
 360                 bufptr++;                       /* Count the '\n' */
 361                 len = bufptr - f->f_bufptr;
 362                 s = (PyBytesObject *)
 363                         PyBytes_FromStringAndSize(NULL, skip+len);
 364                 if (s == NULL)
 365                         return NULL;
 366                 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
 367                 f->f_bufptr = bufptr;
 368                 if (bufptr == f->f_bufend)
 369                         Util_DropReadAhead(f);
 370         } else {
 371                 bufptr = f->f_bufptr;
 372                 buf = f->f_buf;
 373                 f->f_buf = NULL;        /* Force new readahead buffer */
 374                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 375                                               bufsize + (bufsize>>2));
 376                 if (s == NULL) {
 377                         PyMem_Free(buf);
 378                         return NULL;
 379                 }
 380                 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
 381                 PyMem_Free(buf);
 382         }
 383         return s;
 384 }
 385
 386 /* ===================================================================== */
 387 /* Methods of BZ2File. */
 388
 389 PyDoc_STRVAR(BZ2File_read__doc__,
 390 "read([size]) -> string\n\
 391 \n\
 392 Read at most size uncompressed bytes, returned as a string. If the size\n\
 393 argument is negative or omitted, read until EOF is reached.\n\
 394 ");
 395
 396 /* This is a hacked version of Python's fileobject.c:file_read(). */
 397 static PyObject *
 398 BZ2File_read(BZ2FileObject *self, PyObject *args)
 399 {
 400         long bytesrequested = -1;
 401         size_t bytesread, buffersize, chunksize;
 402         int bzerror;
 403         PyObject *ret = NULL;
 404
 405         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 406                 return NULL;
 407
 408         ACQUIRE_LOCK(self);
 409         switch (self->mode) {
 410                 case MODE_READ:
 411                         break;
 412                 case MODE_READ_EOF:
 413                         ret = PyBytes_FromStringAndSize("", 0);
 414                         goto cleanup;
 415                 case MODE_CLOSED:
 416                         PyErr_SetString(PyExc_ValueError,
 417                                         "I/O operation on closed file");
 418                         goto cleanup;
 419                 default:
 420                         PyErr_SetString(PyExc_IOError,
 421                                         "file is not ready for reading");
 422                         goto cleanup;
 423         }
 424
 425         if (bytesrequested < 0)
 426                 buffersize = Util_NewBufferSize((size_t)0);
 427         else
 428                 buffersize = bytesrequested;
 429         if (buffersize > INT_MAX) {
 430                 PyErr_SetString(PyExc_OverflowError,
 431                                 "requested number of bytes is "
 432                                 "more than a Python string can hold");
 433                 goto cleanup;
 434         }
 435         ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
 436         if (ret == NULL || buffersize == 0)
 437                 goto cleanup;
 438         bytesread = 0;
 439
 440         for (;;) {
 441                 Py_BEGIN_ALLOW_THREADS
 442                 chunksize = BZ2_bzRead(&bzerror, self->fp,
 443                                        BUF(ret)+bytesread,
 444                                        buffersize-bytesread);
 445                 self->pos += chunksize;
 446                 Py_END_ALLOW_THREADS
 447                 bytesread += chunksize;
 448                 if (bzerror == BZ_STREAM_END) {
 449                         self->size = self->pos;
 450                         self->mode = MODE_READ_EOF;
 451                         break;
 452                 } else if (bzerror != BZ_OK) {
 453                         Util_CatchBZ2Error(bzerror);
 454                         Py_DECREF(ret);
 455                         ret = NULL;
 456                         goto cleanup;
 457                 }
 458                 if (bytesrequested < 0) {
 459                         buffersize = Util_NewBufferSize(buffersize);
 460                         if (_PyBytes_Resize(&ret, buffersize) < 0) {
 461                                 ret = NULL;
 462                                 goto cleanup;
 463                         }
 464                 } else {
 465                         break;
 466                 }
 467         }
 468         if (bytesread != buffersize) {
 469                 if (_PyBytes_Resize(&ret, bytesread) < 0) {
 470                         ret = NULL;
 471                 }
 472         }
 473
 474 cleanup:
 475         RELEASE_LOCK(self);
 476         return ret;
 477 }
 478
 479 PyDoc_STRVAR(BZ2File_readline__doc__,
 480 "readline([size]) -> string\n\
 481 \n\
 482 Return the next line from the file, as a string, retaining newline.\n\
 483 A non-negative size argument will limit the maximum number of bytes to\n\
 484 return (an incomplete line may be returned then). Return an empty\n\
 485 string at EOF.\n\
 486 ");
 487
 488 static PyObject *
 489 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 490 {
 491         PyObject *ret = NULL;
 492         int sizehint = -1;
 493
 494         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 495                 return NULL;
 496
 497         ACQUIRE_LOCK(self);
 498         switch (self->mode) {
 499                 case MODE_READ:
 500                         break;
 501                 case MODE_READ_EOF:
 502                         ret = PyBytes_FromStringAndSize("", 0);
 503                         goto cleanup;
 504                 case MODE_CLOSED:
 505                         PyErr_SetString(PyExc_ValueError,
 506                                         "I/O operation on closed file");
 507                         goto cleanup;
 508                 default:
 509                         PyErr_SetString(PyExc_IOError,
 510                                         "file is not ready for reading");
 511                         goto cleanup;
 512         }
 513
 514         if (sizehint == 0)
 515                 ret = PyBytes_FromStringAndSize("", 0);
 516         else
 517                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 518
 519 cleanup:
 520         RELEASE_LOCK(self);
 521         return ret;
 522 }
 523
 524 PyDoc_STRVAR(BZ2File_readlines__doc__,
 525 "readlines([size]) -> list\n\
 526 \n\
 527 Call readline() repeatedly and return a list of lines read.\n\
 528 The optional size argument, if given, is an approximate bound on the\n\
 529 total number of bytes in the lines returned.\n\
 530 ");
 531
 532 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 533 static PyObject *
 534 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 535 {
 536         long sizehint = 0;
 537         PyObject *list = NULL;
 538         PyObject *line;
 539         char small_buffer[SMALLCHUNK];
 540         char *buffer = small_buffer;
 541         size_t buffersize = SMALLCHUNK;
 542         PyObject *big_buffer = NULL;
 543         size_t nfilled = 0;
 544         size_t nread;
 545         size_t totalread = 0;
 546         char *p, *q, *end;
 547         int err;
 548         int shortread = 0;
 549         int bzerror;
 550
 551         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 552                 return NULL;
 553
 554         ACQUIRE_LOCK(self);
 555         switch (self->mode) {
 556                 case MODE_READ:
 557                         break;
 558                 case MODE_READ_EOF:
 559                         list = PyList_New(0);
 560                         goto cleanup;
 561                 case MODE_CLOSED:
 562                         PyErr_SetString(PyExc_ValueError,
 563                                         "I/O operation on closed file");
 564                         goto cleanup;
 565                 default:
 566                         PyErr_SetString(PyExc_IOError,
 567                                         "file is not ready for reading");
 568                         goto cleanup;
 569         }
 570
 571         if ((list = PyList_New(0)) == NULL)
 572                 goto cleanup;
 573
 574         for (;;) {
 575                 Py_BEGIN_ALLOW_THREADS
 576                 nread = BZ2_bzRead(&bzerror, self->fp,
 577                                    buffer+nfilled, buffersize-nfilled);
 578                 self->pos += nread;
 579                 Py_END_ALLOW_THREADS
 580                 if (bzerror == BZ_STREAM_END) {
 581                         self->size = self->pos;
 582                         self->mode = MODE_READ_EOF;
 583                         if (nread == 0) {
 584                                 sizehint = 0;
 585                                 break;
 586                         }
 587                         shortread = 1;
 588                 } else if (bzerror != BZ_OK) {
 589                         Util_CatchBZ2Error(bzerror);
 590                   error:
 591                         Py_DECREF(list);
 592                         list = NULL;
 593                         goto cleanup;
 594                 }
 595                 totalread += nread;
 596                 p = memchr(buffer+nfilled, '\n', nread);
 597                 if (!shortread && p == NULL) {
 598                         /* Need a larger buffer to fit this line */
 599                         nfilled += nread;
 600                         buffersize *= 2;
 601                         if (buffersize > INT_MAX) {
 602                                 PyErr_SetString(PyExc_OverflowError,
 603                                 "line is longer than a Python string can hold");
 604                                 goto error;
 605                         }
 606                         if (big_buffer == NULL) {
 607                                 /* Create the big buffer */
 608                                 big_buffer = PyBytes_FromStringAndSize(
 609                                         NULL, buffersize);
 610                                 if (big_buffer == NULL)
 611                                         goto error;
 612                                 buffer = PyBytes_AS_STRING(big_buffer);
 613                                 memcpy(buffer, small_buffer, nfilled);
 614                         }
 615                         else {
 616                                 /* Grow the big buffer */
 617                                 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
 618                                         big_buffer = NULL;
 619                                         goto error;
 620                                 }
 621                                 buffer = PyBytes_AS_STRING(big_buffer);
 622                         }
 623                         continue;
 624                 }
 625                 end = buffer+nfilled+nread;
 626                 q = buffer;
 627                 while (p != NULL) {
 628                         /* Process complete lines */
 629                         p++;
 630                         line = PyBytes_FromStringAndSize(q, p-q);
 631                         if (line == NULL)
 632                                 goto error;
 633                         err = PyList_Append(list, line);
 634                         Py_DECREF(line);
 635                         if (err != 0)
 636                                 goto error;
 637                         q = p;
 638                         p = memchr(q, '\n', end-q);
 639                 }
 640                 /* Move the remaining incomplete line to the start */
 641                 nfilled = end-q;
 642                 memmove(buffer, q, nfilled);
 643                 if (sizehint > 0)
 644                         if (totalread >= (size_t)sizehint)
 645                                 break;
 646                 if (shortread) {
 647                         sizehint = 0;
 648                         break;
 649                 }
 650         }
 651         if (nfilled != 0) {
 652                 /* Partial last line */
 653                 line = PyBytes_FromStringAndSize(buffer, nfilled);
 654                 if (line == NULL)
 655                         goto error;
 656                 if (sizehint > 0) {
 657                         /* Need to complete the last line */
 658                         PyObject *rest = Util_GetLine(self, 0);
 659                         if (rest == NULL) {
 660                                 Py_DECREF(line);
 661                                 goto error;
 662                         }
 663                         PyBytes_Concat(&line, rest);
 664                         Py_DECREF(rest);
 665                         if (line == NULL)
 666                                 goto error;
 667                 }
 668                 err = PyList_Append(list, line);
 669                 Py_DECREF(line);
 670                 if (err != 0)
 671                         goto error;
 672         }
 673
 674   cleanup:
 675         RELEASE_LOCK(self);
 676         if (big_buffer) {
 677                 Py_DECREF(big_buffer);
 678         }
 679         return list;
 680 }
 681
 682 PyDoc_STRVAR(BZ2File_write__doc__,
 683 "write(data) -> None\n\
 684 \n\
 685 Write the 'data' string to file. Note that due to buffering, close() may\n\
 686 be needed before the file on disk reflects the data written.\n\
 687 ");
 688
 689 /* This is a hacked version of Python's fileobject.c:file_write(). */
 690 static PyObject *
 691 BZ2File_write(BZ2FileObject *self, PyObject *args)
 692 {
 693         PyObject *ret = NULL;
 694         Py_buffer pbuf;
 695         char *buf;
 696         int len;
 697         int bzerror;
 698
 699         if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
 700                 return NULL;
 701         buf = pbuf.buf;
 702         len = pbuf.len;
 703
 704         ACQUIRE_LOCK(self);
 705         switch (self->mode) {
 706                 case MODE_WRITE:
 707                         break;
 708
 709                 case MODE_CLOSED:
 710                         PyErr_SetString(PyExc_ValueError,
 711                                         "I/O operation on closed file");
 712                         goto cleanup;
 713
 714                 default:
 715                         PyErr_SetString(PyExc_IOError,
 716                                         "file is not ready for writing");
 717                         goto cleanup;
 718         }
 719
 720         Py_BEGIN_ALLOW_THREADS
 721         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 722         self->pos += len;
 723         Py_END_ALLOW_THREADS
 724
 725         if (bzerror != BZ_OK) {
 726                 Util_CatchBZ2Error(bzerror);
 727                 goto cleanup;
 728         }
 729
 730         Py_INCREF(Py_None);
 731         ret = Py_None;
 732
 733 cleanup:
 734         PyBuffer_Release(&pbuf);
 735         RELEASE_LOCK(self);
 736         return ret;
 737 }
 738
 739 PyDoc_STRVAR(BZ2File_writelines__doc__,
 740 "writelines(sequence_of_strings) -> None\n\
 741 \n\
 742 Write the sequence of strings to the file. Note that newlines are not\n\
 743 added. The sequence can be any iterable object producing strings. This is\n\
 744 equivalent to calling write() for each string.\n\
 745 ");
 746
 747 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 748 static PyObject *
 749 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 750 {
 751 #define CHUNKSIZE 1000
 752         PyObject *list = NULL;
 753         PyObject *iter = NULL;
 754         PyObject *ret = NULL;
 755         PyObject *line;
 756         int i, j, index, len, islist;
 757         int bzerror;
 758
 759         ACQUIRE_LOCK(self);
 760         switch (self->mode) {
 761                 case MODE_WRITE:
 762                         break;
 763
 764                 case MODE_CLOSED:
 765                         PyErr_SetString(PyExc_ValueError,
 766                                         "I/O operation on closed file");
 767                         goto error;
 768
 769                 default:
 770                         PyErr_SetString(PyExc_IOError,
 771                                         "file is not ready for writing");
 772                         goto error;
 773         }
 774
 775         islist = PyList_Check(seq);
 776         if  (!islist) {
 777                 iter = PyObject_GetIter(seq);
 778                 if (iter == NULL) {
 779                         PyErr_SetString(PyExc_TypeError,
 780                                 "writelines() requires an iterable argument");
 781                         goto error;
 782                 }
 783                 list = PyList_New(CHUNKSIZE);
 784                 if (list == NULL)
 785                         goto error;
 786         }
 787
 788         /* Strategy: slurp CHUNKSIZE lines into a private list,
 789            checking that they are all strings, then write that list
 790            without holding the interpreter lock, then come back for more. */
 791         for (index = 0; ; index += CHUNKSIZE) {
 792                 if (islist) {
 793                         Py_XDECREF(list);
 794                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 795                         if (list == NULL)
 796                                 goto error;
 797                         j = PyList_GET_SIZE(list);
 798                 }
 799                 else {
 800                         for (j = 0; j < CHUNKSIZE; j++) {
 801                                 line = PyIter_Next(iter);
 802                                 if (line == NULL) {
 803                                         if (PyErr_Occurred())
 804                                                 goto error;
 805                                         break;
 806                                 }
 807                                 PyList_SetItem(list, j, line);
 808                         }
 809                 }
 810                 if (j == 0)
 811                         break;
 812
 813                 /* Check that all entries are indeed byte strings. If not,
 814                    apply the same rules as for file.write() and
 815                    convert the rets to strings. This is slow, but
 816                    seems to be the only way since all conversion APIs
 817                    could potentially execute Python code. */
 818                 for (i = 0; i < j; i++) {
 819                         PyObject *v = PyList_GET_ITEM(list, i);
 820                         if (!PyBytes_Check(v)) {
 821                                 const char *buffer;
 822                                 Py_ssize_t len;
 823                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 824                                         PyErr_SetString(PyExc_TypeError,
 825                                                         "writelines() "
 826                                                         "argument must be "
 827                                                         "a sequence of "
 828                                                         "bytes objects");
 829                                         goto error;
 830                                 }
 831                                 line = PyBytes_FromStringAndSize(buffer,
 832                                                                   len);
 833                                 if (line == NULL)
 834                                         goto error;
 835                                 Py_DECREF(v);
 836                                 PyList_SET_ITEM(list, i, line);
 837                         }
 838                 }
 839
 840                 /* Since we are releasing the global lock, the
 841                    following code may *not* execute Python code. */
 842                 Py_BEGIN_ALLOW_THREADS
 843                 for (i = 0; i < j; i++) {
 844                         line = PyList_GET_ITEM(list, i);
 845                         len = PyBytes_GET_SIZE(line);
 846                         BZ2_bzWrite (&bzerror, self->fp,
 847                                      PyBytes_AS_STRING(line), len);
 848                         if (bzerror != BZ_OK) {
 849                                 Py_BLOCK_THREADS
 850                                 Util_CatchBZ2Error(bzerror);
 851                                 goto error;
 852                         }
 853                 }
 854                 Py_END_ALLOW_THREADS
 855
 856                 if (j < CHUNKSIZE)
 857                         break;
 858         }
 859
 860         Py_INCREF(Py_None);
 861         ret = Py_None;
 862
 863   error:
 864         RELEASE_LOCK(self);
 865         Py_XDECREF(list);
 866         Py_XDECREF(iter);
 867         return ret;
 868 #undef CHUNKSIZE
 869 }
 870
 871 PyDoc_STRVAR(BZ2File_seek__doc__,
 872 "seek(offset [, whence]) -> None\n\
 873 \n\
 874 Move to new file position. Argument offset is a byte count. Optional\n\
 875 argument whence defaults to 0 (offset from start of file, offset\n\
 876 should be >= 0); other values are 1 (move relative to current position,\n\
 877 positive or negative), and 2 (move relative to end of file, usually\n\
 878 negative, although many platforms allow seeking beyond the end of a file).\n\
 879 \n\
 880 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 881 the operation may be extremely slow.\n\
 882 ");
 883
 884 static PyObject *
 885 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 886 {
 887         int where = 0;
 888         PyObject *offobj;
 889         Py_off_t offset;
 890         char small_buffer[SMALLCHUNK];
 891         char *buffer = small_buffer;
 892         size_t buffersize = SMALLCHUNK;
 893         Py_off_t bytesread = 0;
 894         size_t readsize;
 895         int chunksize;
 896         int bzerror;
 897         PyObject *ret = NULL;
 898
 899         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
 900                 return NULL;
 901 #if !defined(HAVE_LARGEFILE_SUPPORT)
 902         offset = PyLong_AsLong(offobj);
 903 #else
 904         offset = PyLong_Check(offobj) ?
 905                 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
 906 #endif
 907         if (PyErr_Occurred())
 908                 return NULL;
 909
 910         ACQUIRE_LOCK(self);
 911         Util_DropReadAhead(self);
 912         switch (self->mode) {
 913                 case MODE_READ:
 914                 case MODE_READ_EOF:
 915                         break;
 916
 917                 case MODE_CLOSED:
 918                         PyErr_SetString(PyExc_ValueError,
 919                                         "I/O operation on closed file");
 920                         goto cleanup;
 921
 922                 default:
 923                         PyErr_SetString(PyExc_IOError,
 924                                         "seek works only while reading");
 925                         goto cleanup;
 926         }
 927
 928         if (where == 2) {
 929                 if (self->size == -1) {
 930                         assert(self->mode != MODE_READ_EOF);
 931                         for (;;) {
 932                                 Py_BEGIN_ALLOW_THREADS
 933                                 chunksize = BZ2_bzRead(&bzerror, self->fp,
 934                                                        buffer, buffersize);
 935                                 self->pos += chunksize;
 936                                 Py_END_ALLOW_THREADS
 937
 938                                 bytesread += chunksize;
 939                                 if (bzerror == BZ_STREAM_END) {
 940                                         break;
 941                                 } else if (bzerror != BZ_OK) {
 942                                         Util_CatchBZ2Error(bzerror);
 943                                         goto cleanup;
 944                                 }
 945                         }
 946                         self->mode = MODE_READ_EOF;
 947                         self->size = self->pos;
 948                         bytesread = 0;
 949                 }
 950                 offset = self->size + offset;
 951         } else if (where == 1) {
 952                 offset = self->pos + offset;
 953         }
 954
 955         /* Before getting here, offset must be the absolute position the file
 956          * pointer should be set to. */
 957
 958         if (offset >= self->pos) {
 959                 /* we can move forward */
 960                 offset -= self->pos;
 961         } else {
 962                 /* we cannot move back, so rewind the stream */
 963                 BZ2_bzReadClose(&bzerror, self->fp);
 964                 if (bzerror != BZ_OK) {
 965                         Util_CatchBZ2Error(bzerror);
 966                         goto cleanup;
 967                 }
 968                 rewind(self->rawfp);
 969                 self->pos = 0;
 970                 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
 971                                           0, 0, NULL, 0);
 972                 if (bzerror != BZ_OK) {
 973                         Util_CatchBZ2Error(bzerror);
 974                         goto cleanup;
 975                 }
 976                 self->mode = MODE_READ;
 977         }
 978
 979         if (offset <= 0 || self->mode == MODE_READ_EOF)
 980                 goto exit;
 981
 982         /* Before getting here, offset must be set to the number of bytes
 983          * to walk forward. */
 984         for (;;) {
 985                 if (offset-bytesread > buffersize)
 986                         readsize = buffersize;
 987                 else
 988                         /* offset might be wider that readsize, but the result
 989                          * of the subtraction is bound by buffersize (see the
 990                          * condition above). buffersize is 8192. */
 991                         readsize = (size_t)(offset-bytesread);
 992                 Py_BEGIN_ALLOW_THREADS
 993                 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
 994                 self->pos += chunksize;
 995                 Py_END_ALLOW_THREADS
 996                 bytesread += chunksize;
 997                 if (bzerror == BZ_STREAM_END) {
 998                         self->size = self->pos;
 999                         self->mode = MODE_READ_EOF;
1000                         break;
1001                 } else if (bzerror != BZ_OK) {
1002                         Util_CatchBZ2Error(bzerror);
1003                         goto cleanup;
1004                 }
1005                 if (bytesread == offset)
1006                         break;
1007         }
1008
1009 exit:
1010         Py_INCREF(Py_None);
1011         ret = Py_None;
1012
1013 cleanup:
1014         RELEASE_LOCK(self);
1015         return ret;
1016 }
1017
1018 PyDoc_STRVAR(BZ2File_tell__doc__,
1019 "tell() -> int\n\
1020 \n\
1021 Return the current file position, an integer (may be a long integer).\n\
1022 ");
1023
1024 static PyObject *
1025 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1026 {
1027         PyObject *ret = NULL;
1028
1029         if (self->mode == MODE_CLOSED) {
1030                 PyErr_SetString(PyExc_ValueError,
1031                                 "I/O operation on closed file");
1032                 goto cleanup;
1033         }
1034
1035 #if !defined(HAVE_LARGEFILE_SUPPORT)
1036         ret = PyLong_FromLong(self->pos);
1037 #else
1038         ret = PyLong_FromLongLong(self->pos);
1039 #endif
1040
1041 cleanup:
1042         return ret;
1043 }
1044
1045 PyDoc_STRVAR(BZ2File_close__doc__,
1046 "close() -> None or (perhaps) an integer\n\
1047 \n\
1048 Close the file. Sets data attribute .closed to true. A closed file\n\
1049 cannot be used for further I/O operations. close() may be called more\n\
1050 than once without error.\n\
1051 ");
1052
1053 static PyObject *
1054 BZ2File_close(BZ2FileObject *self)
1055 {
1056         PyObject *ret = NULL;
1057         int bzerror = BZ_OK;
1058
1059         if (self->mode == MODE_CLOSED) {
1060                 Py_RETURN_NONE;
1061         }
1062
1063         ACQUIRE_LOCK(self);
1064         switch (self->mode) {
1065                 case MODE_READ:
1066                 case MODE_READ_EOF:
1067                         BZ2_bzReadClose(&bzerror, self->fp);
1068                         break;
1069                 case MODE_WRITE:
1070                         BZ2_bzWriteClose(&bzerror, self->fp,
1071                                          0, NULL, NULL);
1072                         break;
1073         }
1074         self->mode = MODE_CLOSED;
1075         fclose(self->rawfp);
1076         self->rawfp = NULL;
1077         if (bzerror == BZ_OK) {
1078                 Py_INCREF(Py_None);
1079                 ret = Py_None;
1080         }
1081         else {
1082                 Util_CatchBZ2Error(bzerror);
1083         }
1084
1085         RELEASE_LOCK(self);
1086         return ret;
1087 }
1088
1089 PyDoc_STRVAR(BZ2File_enter_doc,
1090 "__enter__() -> self.");
1091
1092 static PyObject *
1093 BZ2File_enter(BZ2FileObject *self)
1094 {
1095         if (self->mode == MODE_CLOSED) {
1096                 PyErr_SetString(PyExc_ValueError,
1097                         "I/O operation on closed file");
1098                 return NULL;
1099         }
1100         Py_INCREF(self);
1101         return (PyObject *) self;
1102 }
1103
1104 PyDoc_STRVAR(BZ2File_exit_doc,
1105 "__exit__(*excinfo) -> None.  Closes the file.");
1106
1107 static PyObject *
1108 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1109 {
1110         PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1111         if (!ret)
1112                 /* If error occurred, pass through */
1113                 return NULL;
1114         Py_DECREF(ret);
1115         Py_RETURN_NONE;
1116 }
1117
1118
1119 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1120
1121 static PyMethodDef BZ2File_methods[] = {
1122         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1123         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1124         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1125         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1126         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1127         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1128         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1129         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1130         {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1131         {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1132         {NULL,          NULL}           /* sentinel */
1133 };
1134
1135
1136 /* ===================================================================== */
1137 /* Getters and setters of BZ2File. */
1138
1139 static PyObject *
1140 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1141 {
1142         return PyLong_FromLong(self->mode == MODE_CLOSED);
1143 }
1144
1145 static PyGetSetDef BZ2File_getset[] = {
1146         {"closed", (getter)BZ2File_get_closed, NULL,
1147                         "True if the file is closed"},
1148         {NULL}  /* Sentinel */
1149 };
1150
1151
1152 /* ===================================================================== */
1153 /* Slot definitions for BZ2File_Type. */
1154
1155 static int
1156 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1157 {
1158         static char *kwlist[] = {"filename", "mode", "buffering",
1159                                  "compresslevel", 0};
1160         char *name;
1161         char *mode = "r";
1162         int buffering = -1;
1163         int compresslevel = 9;
1164         int bzerror;
1165         int mode_char = 0;
1166
1167         self->size = -1;
1168
1169         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
1170                                          kwlist, &name, &mode, &buffering,
1171                                          &compresslevel))
1172                 return -1;
1173
1174         if (compresslevel < 1 || compresslevel > 9) {
1175                 PyErr_SetString(PyExc_ValueError,
1176                                 "compresslevel must be between 1 and 9");
1177                 return -1;
1178         }
1179
1180         for (;;) {
1181                 int error = 0;
1182                 switch (*mode) {
1183                         case 'r':
1184                         case 'w':
1185                                 if (mode_char)
1186                                         error = 1;
1187                                 mode_char = *mode;
1188                                 break;
1189
1190                         case 'b':
1191                                 break;
1192
1193                         default:
1194                                 error = 1;
1195                                 break;
1196                 }
1197                 if (error) {
1198                         PyErr_Format(PyExc_ValueError,
1199                                      "invalid mode char %c", *mode);
1200                         return -1;
1201                 }
1202                 mode++;
1203                 if (*mode == '\0')
1204                         break;
1205         }
1206
1207         if (mode_char == 0) {
1208                 mode_char = 'r';
1209         }
1210
1211         mode = (mode_char == 'r') ? "rb" : "wb";
1212
1213         self->rawfp = fopen(name, mode);
1214         if (self->rawfp == NULL) {
1215                 PyErr_SetFromErrno(PyExc_IOError);
1216                 return -1;
1217         }
1218         /* XXX Ignore buffering */
1219
1220         /* From now on, we have stuff to dealloc, so jump to error label
1221          * instead of returning */
1222
1223 #ifdef WITH_THREAD
1224         self->lock = PyThread_allocate_lock();
1225         if (!self->lock) {
1226                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1227                 goto error;
1228         }
1229 #endif
1230
1231         if (mode_char == 'r')
1232                 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1233                                           0, 0, NULL, 0);
1234         else
1235                 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1236                                            compresslevel, 0, 0);
1237
1238         if (bzerror != BZ_OK) {
1239                 Util_CatchBZ2Error(bzerror);
1240                 goto error;
1241         }
1242
1243         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1244
1245         return 0;
1246
1247 error:
1248         fclose(self->rawfp);
1249         self->rawfp = NULL;
1250 #ifdef WITH_THREAD
1251         if (self->lock) {
1252                 PyThread_free_lock(self->lock);
1253                 self->lock = NULL;
1254         }
1255 #endif
1256         return -1;
1257 }
1258
1259 static void
1260 BZ2File_dealloc(BZ2FileObject *self)
1261 {
1262         int bzerror;
1263 #ifdef WITH_THREAD
1264         if (self->lock)
1265                 PyThread_free_lock(self->lock);
1266 #endif
1267         switch (self->mode) {
1268                 case MODE_READ:
1269                 case MODE_READ_EOF:
1270                         BZ2_bzReadClose(&bzerror, self->fp);
1271                         break;
1272                 case MODE_WRITE:
1273                         BZ2_bzWriteClose(&bzerror, self->fp,
1274                                          0, NULL, NULL);
1275                         break;
1276         }
1277         Util_DropReadAhead(self);
1278         if (self->rawfp != NULL)
1279                 fclose(self->rawfp);
1280         Py_TYPE(self)->tp_free((PyObject *)self);
1281 }
1282
1283 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1284 static PyObject *
1285 BZ2File_getiter(BZ2FileObject *self)
1286 {
1287         if (self->mode == MODE_CLOSED) {
1288                 PyErr_SetString(PyExc_ValueError,
1289                                 "I/O operation on closed file");
1290                 return NULL;
1291         }
1292         Py_INCREF((PyObject*)self);
1293         return (PyObject *)self;
1294 }
1295
1296 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1297 #define READAHEAD_BUFSIZE 8192
1298 static PyObject *
1299 BZ2File_iternext(BZ2FileObject *self)
1300 {
1301         PyBytesObject* ret;
1302         ACQUIRE_LOCK(self);
1303         if (self->mode == MODE_CLOSED) {
1304                 RELEASE_LOCK(self);
1305                 PyErr_SetString(PyExc_ValueError,
1306                                 "I/O operation on closed file");
1307                 return NULL;
1308         }
1309         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1310         RELEASE_LOCK(self);
1311         if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1312                 Py_XDECREF(ret);
1313                 return NULL;
1314         }
1315         return (PyObject *)ret;
1316 }
1317
1318 /* ===================================================================== */
1319 /* BZ2File_Type definition. */
1320
1321 PyDoc_VAR(BZ2File__doc__) =
1322 PyDoc_STR(
1323 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1324 \n\
1325 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1326 writing. When opened for writing, the file will be created if it doesn't\n\
1327 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1328 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1329 is given, must be a number between 1 and 9.\n\
1330 Data read is always returned in bytes; data written ought to be bytes.\n\
1331 ");
1332
1333 static PyTypeObject BZ2File_Type = {
1334         PyVarObject_HEAD_INIT(NULL, 0)
1335         "bz2.BZ2File",          /*tp_name*/
1336         sizeof(BZ2FileObject),  /*tp_basicsize*/
1337         0,                      /*tp_itemsize*/
1338         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1339         0,                      /*tp_print*/
1340         0,                      /*tp_getattr*/
1341         0,                      /*tp_setattr*/
1342         0,                      /*tp_reserved*/
1343         0,                      /*tp_repr*/
1344         0,                      /*tp_as_number*/
1345         0,                      /*tp_as_sequence*/
1346         0,                      /*tp_as_mapping*/
1347         0,                      /*tp_hash*/
1348         0,                      /*tp_call*/
1349         0,                      /*tp_str*/
1350         PyObject_GenericGetAttr,/*tp_getattro*/
1351         PyObject_GenericSetAttr,/*tp_setattro*/
1352         0,                      /*tp_as_buffer*/
1353         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1354         BZ2File__doc__,         /*tp_doc*/
1355         0,                      /*tp_traverse*/
1356         0,                      /*tp_clear*/
1357         0,                      /*tp_richcompare*/
1358         0,                      /*tp_weaklistoffset*/
1359         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1360         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1361         BZ2File_methods,        /*tp_methods*/
1362         0,                      /*tp_members*/
1363         BZ2File_getset,         /*tp_getset*/
1364         0,                      /*tp_base*/
1365         0,                      /*tp_dict*/
1366         0,                      /*tp_descr_get*/
1367         0,                      /*tp_descr_set*/
1368         0,                      /*tp_dictoffset*/
1369         (initproc)BZ2File_init, /*tp_init*/
1370         PyType_GenericAlloc,    /*tp_alloc*/
1371         PyType_GenericNew,      /*tp_new*/
1372         PyObject_Free,          /*tp_free*/
1373         0,                      /*tp_is_gc*/
1374 };
1375
1376
1377 /* ===================================================================== */
1378 /* Methods of BZ2Comp. */
1379
1380 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1381 "compress(data) -> string\n\
1382 \n\
1383 Provide more data to the compressor object. It will return chunks of\n\
1384 compressed data whenever possible. When you've finished providing data\n\
1385 to compress, call the flush() method to finish the compression process,\n\
1386 and return what is left in the internal buffers.\n\
1387 ");
1388
1389 static PyObject *
1390 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1391 {
1392         Py_buffer pdata;
1393         char *data;
1394         int datasize;
1395         int bufsize = SMALLCHUNK;
1396         PY_LONG_LONG totalout;
1397         PyObject *ret = NULL;
1398         bz_stream *bzs = &self->bzs;
1399         int bzerror;
1400
1401         if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1402                 return NULL;
1403         data = pdata.buf;
1404         datasize = pdata.len;
1405
1406         if (datasize == 0) {
1407                 PyBuffer_Release(&pdata);
1408                 return PyBytes_FromStringAndSize("", 0);
1409         }
1410
1411         ACQUIRE_LOCK(self);
1412         if (!self->running) {
1413                 PyErr_SetString(PyExc_ValueError,
1414                                 "this object was already flushed");
1415                 goto error;
1416         }
1417
1418         ret = PyBytes_FromStringAndSize(NULL, bufsize);
1419         if (!ret)
1420                 goto error;
1421
1422         bzs->next_in = data;
1423         bzs->avail_in = datasize;
1424         bzs->next_out = BUF(ret);
1425         bzs->avail_out = bufsize;
1426
1427         totalout = BZS_TOTAL_OUT(bzs);
1428
1429         for (;;) {
1430                 Py_BEGIN_ALLOW_THREADS
1431                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1432                 Py_END_ALLOW_THREADS
1433                 if (bzerror != BZ_RUN_OK) {
1434                         Util_CatchBZ2Error(bzerror);
1435                         goto error;
1436                 }
1437                 if (bzs->avail_in == 0)
1438                         break; /* no more input data */
1439                 if (bzs->avail_out == 0) {
1440                         bufsize = Util_NewBufferSize(bufsize);
1441                         if (_PyBytes_Resize(&ret, bufsize) < 0) {
1442                                 BZ2_bzCompressEnd(bzs);
1443                                 goto error;
1444                         }
1445                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1446                                                     - totalout);
1447                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1448                 }
1449         }
1450
1451         if (_PyBytes_Resize(&ret,
1452                            (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1453                 goto error;
1454
1455         RELEASE_LOCK(self);
1456         PyBuffer_Release(&pdata);
1457         return ret;
1458
1459 error:
1460         RELEASE_LOCK(self);
1461         PyBuffer_Release(&pdata);
1462         Py_XDECREF(ret);
1463         return NULL;
1464 }
1465
1466 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1467 "flush() -> string\n\
1468 \n\
1469 Finish the compression process and return what is left in internal buffers.\n\
1470 You must not use the compressor object after calling this method.\n\
1471 ");
1472
1473 static PyObject *
1474 BZ2Comp_flush(BZ2CompObject *self)
1475 {
1476         int bufsize = SMALLCHUNK;
1477         PyObject *ret = NULL;
1478         bz_stream *bzs = &self->bzs;
1479         PY_LONG_LONG totalout;
1480         int bzerror;
1481
1482         ACQUIRE_LOCK(self);
1483         if (!self->running) {
1484                 PyErr_SetString(PyExc_ValueError, "object was already "
1485                                                   "flushed");
1486                 goto error;
1487         }
1488         self->running = 0;
1489
1490         ret = PyBytes_FromStringAndSize(NULL, bufsize);
1491         if (!ret)
1492                 goto error;
1493
1494         bzs->next_out = BUF(ret);
1495         bzs->avail_out = bufsize;
1496
1497         totalout = BZS_TOTAL_OUT(bzs);
1498
1499         for (;;) {
1500                 Py_BEGIN_ALLOW_THREADS
1501                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1502                 Py_END_ALLOW_THREADS
1503                 if (bzerror == BZ_STREAM_END) {
1504                         break;
1505                 } else if (bzerror != BZ_FINISH_OK) {
1506                         Util_CatchBZ2Error(bzerror);
1507                         goto error;
1508                 }
1509                 if (bzs->avail_out == 0) {
1510                         bufsize = Util_NewBufferSize(bufsize);
1511                         if (_PyBytes_Resize(&ret, bufsize) < 0)
1512                                 goto error;
1513                         bzs->next_out = BUF(ret);
1514                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1515                                                     - totalout);
1516                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1517                 }
1518         }
1519
1520         if (bzs->avail_out != 0) {
1521                 if (_PyBytes_Resize(&ret,
1522                             (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1523                         goto error;
1524         }
1525
1526         RELEASE_LOCK(self);
1527         return ret;
1528
1529 error:
1530         RELEASE_LOCK(self);
1531         Py_XDECREF(ret);
1532         return NULL;
1533 }
1534
1535 static PyMethodDef BZ2Comp_methods[] = {
1536         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1537          BZ2Comp_compress__doc__},
1538         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1539          BZ2Comp_flush__doc__},
1540         {NULL,          NULL}           /* sentinel */
1541 };
1542
1543
1544 /* ===================================================================== */
1545 /* Slot definitions for BZ2Comp_Type. */
1546
1547 static int
1548 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1549 {
1550         int compresslevel = 9;
1551         int bzerror;
1552         static char *kwlist[] = {"compresslevel", 0};
1553
1554         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1555                                          kwlist, &compresslevel))
1556                 return -1;
1557
1558         if (compresslevel < 1 || compresslevel > 9) {
1559                 PyErr_SetString(PyExc_ValueError,
1560                                 "compresslevel must be between 1 and 9");
1561                 goto error;
1562         }
1563
1564 #ifdef WITH_THREAD
1565         self->lock = PyThread_allocate_lock();
1566         if (!self->lock) {
1567                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1568                 goto error;
1569         }
1570 #endif
1571
1572         memset(&self->bzs, 0, sizeof(bz_stream));
1573         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1574         if (bzerror != BZ_OK) {
1575                 Util_CatchBZ2Error(bzerror);
1576                 goto error;
1577         }
1578
1579         self->running = 1;
1580
1581         return 0;
1582 error:
1583 #ifdef WITH_THREAD
1584         if (self->lock) {
1585                 PyThread_free_lock(self->lock);
1586                 self->lock = NULL;
1587         }
1588 #endif
1589         return -1;
1590 }
1591
1592 static void
1593 BZ2Comp_dealloc(BZ2CompObject *self)
1594 {
1595 #ifdef WITH_THREAD
1596         if (self->lock)
1597                 PyThread_free_lock(self->lock);
1598 #endif
1599         BZ2_bzCompressEnd(&self->bzs);
1600         Py_TYPE(self)->tp_free((PyObject *)self);
1601 }
1602
1603
1604 /* ===================================================================== */
1605 /* BZ2Comp_Type definition. */
1606
1607 PyDoc_STRVAR(BZ2Comp__doc__,
1608 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1609 \n\
1610 Create a new compressor object. This object may be used to compress\n\
1611 data sequentially. If you want to compress data in one shot, use the\n\
1612 compress() function instead. The compresslevel parameter, if given,\n\
1613 must be a number between 1 and 9.\n\
1614 ");
1615
1616 static PyTypeObject BZ2Comp_Type = {
1617         PyVarObject_HEAD_INIT(NULL, 0)
1618         "bz2.BZ2Compressor",    /*tp_name*/
1619         sizeof(BZ2CompObject),  /*tp_basicsize*/
1620         0,                      /*tp_itemsize*/
1621         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1622         0,                      /*tp_print*/
1623         0,                      /*tp_getattr*/
1624         0,                      /*tp_setattr*/
1625         0,                      /*tp_reserved*/
1626         0,                      /*tp_repr*/
1627         0,                      /*tp_as_number*/
1628         0,                      /*tp_as_sequence*/
1629         0,                      /*tp_as_mapping*/
1630         0,                      /*tp_hash*/
1631         0,                      /*tp_call*/
1632         0,                      /*tp_str*/
1633         PyObject_GenericGetAttr,/*tp_getattro*/
1634         PyObject_GenericSetAttr,/*tp_setattro*/
1635         0,                      /*tp_as_buffer*/
1636         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1637         BZ2Comp__doc__,         /*tp_doc*/
1638         0,                      /*tp_traverse*/
1639         0,                      /*tp_clear*/
1640         0,                      /*tp_richcompare*/
1641         0,                      /*tp_weaklistoffset*/
1642         0,                      /*tp_iter*/
1643         0,                      /*tp_iternext*/
1644         BZ2Comp_methods,        /*tp_methods*/
1645         0,                      /*tp_members*/
1646         0,                      /*tp_getset*/
1647         0,                      /*tp_base*/
1648         0,                      /*tp_dict*/
1649         0,                      /*tp_descr_get*/
1650         0,                      /*tp_descr_set*/
1651         0,                      /*tp_dictoffset*/
1652         (initproc)BZ2Comp_init, /*tp_init*/
1653         PyType_GenericAlloc,    /*tp_alloc*/
1654         PyType_GenericNew,      /*tp_new*/
1655         PyObject_Free,          /*tp_free*/
1656         0,                      /*tp_is_gc*/
1657 };
1658
1659
1660 /* ===================================================================== */
1661 /* Members of BZ2Decomp. */
1662
1663 #undef OFF
1664 #define OFF(x) offsetof(BZ2DecompObject, x)
1665
1666 static PyMemberDef BZ2Decomp_members[] = {
1667         {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1668         {NULL}  /* Sentinel */
1669 };
1670
1671
1672 /* ===================================================================== */
1673 /* Methods of BZ2Decomp. */
1674
1675 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1676 "decompress(data) -> string\n\
1677 \n\
1678 Provide more data to the decompressor object. It will return chunks\n\
1679 of decompressed data whenever possible. If you try to decompress data\n\
1680 after the end of stream is found, EOFError will be raised. If any data\n\
1681 was found after the end of stream, it'll be ignored and saved in\n\
1682 unused_data attribute.\n\
1683 ");
1684
1685 static PyObject *
1686 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1687 {
1688         Py_buffer pdata;
1689         char *data;
1690         int datasize;
1691         int bufsize = SMALLCHUNK;
1692         PY_LONG_LONG totalout;
1693         PyObject *ret = NULL;
1694         bz_stream *bzs = &self->bzs;
1695         int bzerror;
1696
1697         if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1698                 return NULL;
1699         data = pdata.buf;
1700         datasize = pdata.len;
1701
1702         ACQUIRE_LOCK(self);
1703         if (!self->running) {
1704                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1705                                                 "already found");
1706                 goto error;
1707         }
1708
1709         ret = PyBytes_FromStringAndSize(NULL, bufsize);
1710         if (!ret)
1711                 goto error;
1712
1713         bzs->next_in = data;
1714         bzs->avail_in = datasize;
1715         bzs->next_out = BUF(ret);
1716         bzs->avail_out = bufsize;
1717
1718         totalout = BZS_TOTAL_OUT(bzs);
1719
1720         for (;;) {
1721                 Py_BEGIN_ALLOW_THREADS
1722                 bzerror = BZ2_bzDecompress(bzs);
1723                 Py_END_ALLOW_THREADS
1724                 if (bzerror == BZ_STREAM_END) {
1725                         if (bzs->avail_in != 0) {
1726                                 Py_DECREF(self->unused_data);
1727                                 self->unused_data =
1728                                     PyBytes_FromStringAndSize(bzs->next_in,
1729                                                                bzs->avail_in);
1730                         }
1731                         self->running = 0;
1732                         break;
1733                 }
1734                 if (bzerror != BZ_OK) {
1735                         Util_CatchBZ2Error(bzerror);
1736                         goto error;
1737                 }
1738                 if (bzs->avail_in == 0)
1739                         break; /* no more input data */
1740                 if (bzs->avail_out == 0) {
1741                         bufsize = Util_NewBufferSize(bufsize);
1742                         if (_PyBytes_Resize(&ret, bufsize) < 0) {
1743                                 BZ2_bzDecompressEnd(bzs);
1744                                 goto error;
1745                         }
1746                         bzs->next_out = BUF(ret);
1747                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1748                                                     - totalout);
1749                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1750                 }
1751         }
1752
1753         if (bzs->avail_out != 0) {
1754                 if (_PyBytes_Resize(&ret,
1755                             (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1756                         goto error;
1757         }
1758
1759         RELEASE_LOCK(self);
1760         PyBuffer_Release(&pdata);
1761         return ret;
1762
1763 error:
1764         RELEASE_LOCK(self);
1765         PyBuffer_Release(&pdata);
1766         Py_XDECREF(ret);
1767         return NULL;
1768 }
1769
1770 static PyMethodDef BZ2Decomp_methods[] = {
1771         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1772         {NULL,          NULL}           /* sentinel */
1773 };
1774
1775
1776 /* ===================================================================== */
1777 /* Slot definitions for BZ2Decomp_Type. */
1778
1779 static int
1780 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1781 {
1782         int bzerror;
1783
1784         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1785                 return -1;
1786
1787 #ifdef WITH_THREAD
1788         self->lock = PyThread_allocate_lock();
1789         if (!self->lock) {
1790                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1791                 goto error;
1792         }
1793 #endif
1794
1795         self->unused_data = PyBytes_FromStringAndSize("", 0);
1796         if (!self->unused_data)
1797                 goto error;
1798
1799         memset(&self->bzs, 0, sizeof(bz_stream));
1800         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1801         if (bzerror != BZ_OK) {
1802                 Util_CatchBZ2Error(bzerror);
1803                 goto error;
1804         }
1805
1806         self->running = 1;
1807
1808         return 0;
1809
1810 error:
1811 #ifdef WITH_THREAD
1812         if (self->lock) {
1813                 PyThread_free_lock(self->lock);
1814                 self->lock = NULL;
1815         }
1816 #endif
1817         Py_CLEAR(self->unused_data);
1818         return -1;
1819 }
1820
1821 static void
1822 BZ2Decomp_dealloc(BZ2DecompObject *self)
1823 {
1824 #ifdef WITH_THREAD
1825         if (self->lock)
1826                 PyThread_free_lock(self->lock);
1827 #endif
1828         Py_XDECREF(self->unused_data);
1829         BZ2_bzDecompressEnd(&self->bzs);
1830         Py_TYPE(self)->tp_free((PyObject *)self);
1831 }
1832
1833
1834 /* ===================================================================== */
1835 /* BZ2Decomp_Type definition. */
1836
1837 PyDoc_STRVAR(BZ2Decomp__doc__,
1838 "BZ2Decompressor() -> decompressor object\n\
1839 \n\
1840 Create a new decompressor object. This object may be used to decompress\n\
1841 data sequentially. If you want to decompress data in one shot, use the\n\
1842 decompress() function instead.\n\
1843 ");
1844
1845 static PyTypeObject BZ2Decomp_Type = {
1846         PyVarObject_HEAD_INIT(NULL, 0)
1847         "bz2.BZ2Decompressor",  /*tp_name*/
1848         sizeof(BZ2DecompObject), /*tp_basicsize*/
1849         0,                      /*tp_itemsize*/
1850         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1851         0,                      /*tp_print*/
1852         0,                      /*tp_getattr*/
1853         0,                      /*tp_setattr*/
1854         0,                      /*tp_reserved*/
1855         0,                      /*tp_repr*/
1856         0,                      /*tp_as_number*/
1857         0,                      /*tp_as_sequence*/
1858         0,                      /*tp_as_mapping*/
1859         0,                      /*tp_hash*/
1860         0,                      /*tp_call*/
1861         0,                      /*tp_str*/
1862         PyObject_GenericGetAttr,/*tp_getattro*/
1863         PyObject_GenericSetAttr,/*tp_setattro*/
1864         0,                      /*tp_as_buffer*/
1865         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1866         BZ2Decomp__doc__,       /*tp_doc*/
1867         0,                      /*tp_traverse*/
1868         0,                      /*tp_clear*/
1869         0,                      /*tp_richcompare*/
1870         0,                      /*tp_weaklistoffset*/
1871         0,                      /*tp_iter*/
1872         0,                      /*tp_iternext*/
1873         BZ2Decomp_methods,      /*tp_methods*/
1874         BZ2Decomp_members,      /*tp_members*/
1875         0,                      /*tp_getset*/
1876         0,                      /*tp_base*/
1877         0,                      /*tp_dict*/
1878         0,                      /*tp_descr_get*/
1879         0,                      /*tp_descr_set*/
1880         0,                      /*tp_dictoffset*/
1881         (initproc)BZ2Decomp_init, /*tp_init*/
1882         PyType_GenericAlloc,    /*tp_alloc*/
1883         PyType_GenericNew,      /*tp_new*/
1884         PyObject_Free,          /*tp_free*/
1885         0,                      /*tp_is_gc*/
1886 };
1887
1888
1889 /* ===================================================================== */
1890 /* Module functions. */
1891
1892 PyDoc_STRVAR(bz2_compress__doc__,
1893 "compress(data [, compresslevel=9]) -> string\n\
1894 \n\
1895 Compress data in one shot. If you want to compress data sequentially,\n\
1896 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1897 given, must be a number between 1 and 9.\n\
1898 ");
1899
1900 static PyObject *
1901 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1902 {
1903         int compresslevel=9;
1904         Py_buffer pdata;
1905         char *data;
1906         int datasize;
1907         int bufsize;
1908         PyObject *ret = NULL;
1909         bz_stream _bzs;
1910         bz_stream *bzs = &_bzs;
1911         int bzerror;
1912         static char *kwlist[] = {"data", "compresslevel", 0};
1913
1914         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1915                                          kwlist, &pdata,
1916                                          &compresslevel))
1917                 return NULL;
1918         data = pdata.buf;
1919         datasize = pdata.len;
1920
1921         if (compresslevel < 1 || compresslevel > 9) {
1922                 PyErr_SetString(PyExc_ValueError,
1923                                 "compresslevel must be between 1 and 9");
1924                 PyBuffer_Release(&pdata);
1925                 return NULL;
1926         }
1927
1928         /* Conforming to bz2 manual, this is large enough to fit compressed
1929          * data in one shot. We will check it later anyway. */
1930         bufsize = datasize + (datasize/100+1) + 600;
1931
1932         ret = PyBytes_FromStringAndSize(NULL, bufsize);
1933         if (!ret) {
1934                 PyBuffer_Release(&pdata);
1935                 return NULL;
1936         }
1937
1938         memset(bzs, 0, sizeof(bz_stream));
1939
1940         bzs->next_in = data;
1941         bzs->avail_in = datasize;
1942         bzs->next_out = BUF(ret);
1943         bzs->avail_out = bufsize;
1944
1945         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1946         if (bzerror != BZ_OK) {
1947                 Util_CatchBZ2Error(bzerror);
1948                 PyBuffer_Release(&pdata);
1949                 Py_DECREF(ret);
1950                 return NULL;
1951         }
1952
1953         for (;;) {
1954                 Py_BEGIN_ALLOW_THREADS
1955                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1956                 Py_END_ALLOW_THREADS
1957                 if (bzerror == BZ_STREAM_END) {
1958                         break;
1959                 } else if (bzerror != BZ_FINISH_OK) {
1960                         BZ2_bzCompressEnd(bzs);
1961                         Util_CatchBZ2Error(bzerror);
1962                         PyBuffer_Release(&pdata);
1963                         Py_DECREF(ret);
1964                         return NULL;
1965                 }
1966                 if (bzs->avail_out == 0) {
1967                         bufsize = Util_NewBufferSize(bufsize);
1968                         if (_PyBytes_Resize(&ret, bufsize) < 0) {
1969                                 BZ2_bzCompressEnd(bzs);
1970                                 PyBuffer_Release(&pdata);
1971                                 return NULL;
1972                         }
1973                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1974                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1975                 }
1976         }
1977
1978         if (bzs->avail_out != 0) {
1979                 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
1980                         ret = NULL;
1981                 }
1982         }
1983         BZ2_bzCompressEnd(bzs);
1984
1985         PyBuffer_Release(&pdata);
1986         return ret;
1987 }
1988
1989 PyDoc_STRVAR(bz2_decompress__doc__,
1990 "decompress(data) -> decompressed data\n\
1991 \n\
1992 Decompress data in one shot. If you want to decompress data sequentially,\n\
1993 use an instance of BZ2Decompressor instead.\n\
1994 ");
1995
1996 static PyObject *
1997 bz2_decompress(PyObject *self, PyObject *args)
1998 {
1999         Py_buffer pdata;
2000         char *data;
2001         int datasize;
2002         int bufsize = SMALLCHUNK;
2003         PyObject *ret;
2004         bz_stream _bzs;
2005         bz_stream *bzs = &_bzs;
2006         int bzerror;
2007
2008         if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2009                 return NULL;
2010         data = pdata.buf;
2011         datasize = pdata.len;
2012
2013         if (datasize == 0) {
2014                 PyBuffer_Release(&pdata);
2015                 return PyBytes_FromStringAndSize("", 0);
2016         }
2017
2018         ret = PyBytes_FromStringAndSize(NULL, bufsize);
2019         if (!ret) {
2020                 PyBuffer_Release(&pdata);
2021                 return NULL;
2022         }
2023
2024         memset(bzs, 0, sizeof(bz_stream));
2025
2026         bzs->next_in = data;
2027         bzs->avail_in = datasize;
2028         bzs->next_out = BUF(ret);
2029         bzs->avail_out = bufsize;
2030
2031         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2032         if (bzerror != BZ_OK) {
2033                 Util_CatchBZ2Error(bzerror);
2034                 Py_DECREF(ret);
2035                 PyBuffer_Release(&pdata);
2036                 return NULL;
2037         }
2038
2039         for (;;) {
2040                 Py_BEGIN_ALLOW_THREADS
2041                 bzerror = BZ2_bzDecompress(bzs);
2042                 Py_END_ALLOW_THREADS
2043                 if (bzerror == BZ_STREAM_END) {
2044                         break;
2045                 } else if (bzerror != BZ_OK) {
2046                         BZ2_bzDecompressEnd(bzs);
2047                         Util_CatchBZ2Error(bzerror);
2048                         PyBuffer_Release(&pdata);
2049                         Py_DECREF(ret);
2050                         return NULL;
2051                 }
2052                 if (bzs->avail_in == 0) {
2053                         BZ2_bzDecompressEnd(bzs);
2054                         PyErr_SetString(PyExc_ValueError,
2055                                         "couldn't find end of stream");
2056                         PyBuffer_Release(&pdata);
2057                         Py_DECREF(ret);
2058                         return NULL;
2059                 }
2060                 if (bzs->avail_out == 0) {
2061                         bufsize = Util_NewBufferSize(bufsize);
2062                         if (_PyBytes_Resize(&ret, bufsize) < 0) {
2063                                 BZ2_bzDecompressEnd(bzs);
2064                                 PyBuffer_Release(&pdata);
2065                                 return NULL;
2066                         }
2067                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2068                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2069                 }
2070         }
2071
2072         if (bzs->avail_out != 0) {
2073                 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2074                         ret = NULL;
2075                 }
2076         }
2077         BZ2_bzDecompressEnd(bzs);
2078         PyBuffer_Release(&pdata);
2079
2080         return ret;
2081 }
2082
2083 static PyMethodDef bz2_methods[] = {
2084         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2085                 bz2_compress__doc__},
2086         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2087                 bz2_decompress__doc__},
2088         {NULL,          NULL}           /* sentinel */
2089 };
2090
2091 /* ===================================================================== */
2092 /* Initialization function. */
2093
2094 PyDoc_STRVAR(bz2__doc__,
2095 "The python bz2 module provides a comprehensive interface for\n\
2096 the bz2 compression library. It implements a complete file\n\
2097 interface, one shot (de)compression functions, and types for\n\
2098 sequential (de)compression.\n\
2099 ");
2100
2101
2102 static struct PyModuleDef bz2module = {
2103         PyModuleDef_HEAD_INIT,
2104         "bz2",
2105         bz2__doc__,
2106         -1,
2107         bz2_methods,
2108         NULL,
2109         NULL,
2110         NULL,
2111         NULL
2112 };
2113
2114 PyMODINIT_FUNC
2115 PyInit_bz2(void)
2116 {
2117         PyObject *m;
2118
2119         Py_TYPE(&BZ2File_Type) = &PyType_Type;
2120         Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2121         Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2122
2123         m = PyModule_Create(&bz2module);
2124         if (m == NULL)
2125                 return NULL;
2126
2127         PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
2128
2129         Py_INCREF(&BZ2File_Type);
2130         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2131
2132         Py_INCREF(&BZ2Comp_Type);
2133         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2134
2135         Py_INCREF(&BZ2Decomp_Type);
2136         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2137         return m;
2138 }