Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int newlinetypes = f->f_newlinetypes;
 239         int skipnextlf = f->f_skipnextlf;
 240         int univ_newline = f->f_univ_newline;
 241
 242         total_v_size = n > 0 ? n : 100;
 243         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 244         if (v == NULL)
 245                 return NULL;
 246
 247         buf = BUF(v);
 248         end = buf + total_v_size;
 249
 250         for (;;) {
 251                 Py_BEGIN_ALLOW_THREADS
 252                 if (univ_newline) {
 253                         while (1) {
 254                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                                 f->pos++;
 256                                 if (bzerror != BZ_OK || buf == end)
 257                                         break;
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with
 262                                                  * skipnextlf true means we
 263                                                  * saw a \r before.
 264                                                  */
 265                                                 newlinetypes |= NEWLINE_CRLF;
 266                                                 BZ2_bzRead(&bzerror, f->fp,
 267                                                            &c, 1);
 268                                                 if (bzerror != BZ_OK)
 269                                                         break;
 270                                         } else {
 271                                                 newlinetypes |= NEWLINE_CR;
 272                                         }
 273                                 }
 274                                 if (c == '\r') {
 275                                         skipnextlf = 1;
 276                                         c = '\n';
 277                                 } else if ( c == '\n')
 278                                         newlinetypes |= NEWLINE_LF;
 279                                 *buf++ = c;
 280                                 if (c == '\n') break;
 281                         }
 282                         if (bzerror == BZ_STREAM_END && skipnextlf)
 283                                 newlinetypes |= NEWLINE_CR;
 284                 } else /* If not universal newlines use the normal loop */
 285                         do {
 286                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 287                                 f->pos++;
 288                                 *buf++ = c;
 289                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 290                 Py_END_ALLOW_THREADS
 291                 f->f_newlinetypes = newlinetypes;
 292                 f->f_skipnextlf = skipnextlf;
 293                 if (bzerror == BZ_STREAM_END) {
 294                         f->size = f->pos;
 295                         f->mode = MODE_READ_EOF;
 296                         break;
 297                 } else if (bzerror != BZ_OK) {
 298                         Util_CatchBZ2Error(bzerror);
 299                         Py_DECREF(v);
 300                         return NULL;
 301                 }
 302                 if (c == '\n')
 303                         break;
 304                 /* Must be because buf == end */
 305                 if (n > 0)
 306                         break;
 307                 used_v_size = total_v_size;
 308                 increment = total_v_size >> 2; /* mild exponential growth */
 309                 total_v_size += increment;
 310                 if (total_v_size > INT_MAX) {
 311                         PyErr_SetString(PyExc_OverflowError,
 312                             "line is longer than a Python string can hold");
 313                         Py_DECREF(v);
 314                         return NULL;
 315                 }
 316                 if (_PyString_Resize(&v, total_v_size) < 0)
 317                         return NULL;
 318                 buf = BUF(v) + used_v_size;
 319                 end = BUF(v) + total_v_size;
 320         }
 321
 322         used_v_size = buf - BUF(v);
 323         if (used_v_size != total_v_size)
 324                 _PyString_Resize(&v, used_v_size);
 325         return v;
 326 }
 327
 328 /* This is a hacked version of Python's
 329  * fileobject.c:Py_UniversalNewlineFread(). */
 330 size_t
 331 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 332                      char* buf, size_t n, BZ2FileObject *f)
 333 {
 334         char *dst = buf;
 335         int newlinetypes, skipnextlf;
 336
 337         assert(buf != NULL);
 338         assert(stream != NULL);
 339
 340         if (!f->f_univ_newline)
 341                 return BZ2_bzRead(bzerror, stream, buf, n);
 342
 343         newlinetypes = f->f_newlinetypes;
 344         skipnextlf = f->f_skipnextlf;
 345
 346         /* Invariant:  n is the number of bytes remaining to be filled
 347          * in the buffer.
 348          */
 349         while (n) {
 350                 size_t nread;
 351                 int shortread;
 352                 char *src = dst;
 353
 354                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 355                 assert(nread <= n);
 356                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 357                 shortread = n != 0;     /* true iff EOF or error */
 358                 while (nread--) {
 359                         char c = *src++;
 360                         if (c == '\r') {
 361                                 /* Save as LF and set flag to skip next LF. */
 362                                 *dst++ = '\n';
 363                                 skipnextlf = 1;
 364                         }
 365                         else if (skipnextlf && c == '\n') {
 366                                 /* Skip LF, and remember we saw CR LF. */
 367                                 skipnextlf = 0;
 368                                 newlinetypes |= NEWLINE_CRLF;
 369                                 ++n;
 370                         }
 371                         else {
 372                                 /* Normal char to be stored in buffer.  Also
 373                                  * update the newlinetypes flag if either this
 374                                  * is an LF or the previous char was a CR.
 375                                  */
 376                                 if (c == '\n')
 377                                         newlinetypes |= NEWLINE_LF;
 378                                 else if (skipnextlf)
 379                                         newlinetypes |= NEWLINE_CR;
 380                                 *dst++ = c;
 381                                 skipnextlf = 0;
 382                         }
 383                 }
 384                 if (shortread) {
 385                         /* If this is EOF, update type flags. */
 386                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 387                                 newlinetypes |= NEWLINE_CR;
 388                         break;
 389                 }
 390         }
 391         f->f_newlinetypes = newlinetypes;
 392         f->f_skipnextlf = skipnextlf;
 393         return dst - buf;
 394 }
 395
 396 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 397 static void
 398 Util_DropReadAhead(BZ2FileObject *f)
 399 {
 400         if (f->f_buf != NULL) {
 401                 PyMem_Free(f->f_buf);
 402                 f->f_buf = NULL;
 403         }
 404 }
 405
 406 /* This is a hacked version of Python's fileobject.c:readahead(). */
 407 static int
 408 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 409 {
 410         int chunksize;
 411         int bzerror;
 412
 413         if (f->f_buf != NULL) {
 414                 if((f->f_bufend - f->f_bufptr) >= 1)
 415                         return 0;
 416                 else
 417                         Util_DropReadAhead(f);
 418         }
 419         if (f->mode == MODE_READ_EOF) {
 420                 f->f_bufptr = f->f_buf;
 421                 f->f_bufend = f->f_buf;
 422                 return 0;
 423         }
 424         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 425                 return -1;
 426         }
 427         Py_BEGIN_ALLOW_THREADS
 428         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 429                                          bufsize, f);
 430         Py_END_ALLOW_THREADS
 431         f->pos += chunksize;
 432         if (bzerror == BZ_STREAM_END) {
 433                 f->size = f->pos;
 434                 f->mode = MODE_READ_EOF;
 435         } else if (bzerror != BZ_OK) {
 436                 Util_CatchBZ2Error(bzerror);
 437                 Util_DropReadAhead(f);
 438                 return -1;
 439         }
 440         f->f_bufptr = f->f_buf;
 441         f->f_bufend = f->f_buf + chunksize;
 442         return 0;
 443 }
 444
 445 /* This is a hacked version of Python's
 446  * fileobject.c:readahead_get_line_skip(). */
 447 static PyStringObject *
 448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 449 {
 450         PyStringObject* s;
 451         char *bufptr;
 452         char *buf;
 453         int len;
 454
 455         if (f->f_buf == NULL)
 456                 if (Util_ReadAhead(f, bufsize) < 0)
 457                         return NULL;
 458
 459         len = f->f_bufend - f->f_bufptr;
 460         if (len == 0)
 461                 return (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip);
 463         bufptr = memchr(f->f_bufptr, '\n', len);
 464         if (bufptr != NULL) {
 465                 bufptr++;                       /* Count the '\n' */
 466                 len = bufptr - f->f_bufptr;
 467                 s = (PyStringObject *)
 468                         PyString_FromStringAndSize(NULL, skip+len);
 469                 if (s == NULL)
 470                         return NULL;
 471                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 472                 f->f_bufptr = bufptr;
 473                 if (bufptr == f->f_bufend)
 474                         Util_DropReadAhead(f);
 475         } else {
 476                 bufptr = f->f_bufptr;
 477                 buf = f->f_buf;
 478                 f->f_buf = NULL;        /* Force new readahead buffer */
 479                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 480                                               bufsize + (bufsize>>2));
 481                 if (s == NULL) {
 482                         PyMem_Free(buf);
 483                         return NULL;
 484                 }
 485                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 486                 PyMem_Free(buf);
 487         }
 488         return s;
 489 }
 490
 491 /* ===================================================================== */
 492 /* Methods of BZ2File. */
 493
 494 PyDoc_STRVAR(BZ2File_read__doc__,
 495 "read([size]) -> string\n\
 496 \n\
 497 Read at most size uncompressed bytes, returned as a string. If the size\n\
 498 argument is negative or omitted, read until EOF is reached.\n\
 499 ");
 500
 501 /* This is a hacked version of Python's fileobject.c:file_read(). */
 502 static PyObject *
 503 BZ2File_read(BZ2FileObject *self, PyObject *args)
 504 {
 505         long bytesrequested = -1;
 506         size_t bytesread, buffersize, chunksize;
 507         int bzerror;
 508         PyObject *ret = NULL;
 509
 510         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 511                 return NULL;
 512
 513         ACQUIRE_LOCK(self);
 514         switch (self->mode) {
 515                 case MODE_READ:
 516                         break;
 517                 case MODE_READ_EOF:
 518                         ret = PyString_FromString("");
 519                         goto cleanup;
 520                 case MODE_CLOSED:
 521                         PyErr_SetString(PyExc_ValueError,
 522                                         "I/O operation on closed file");
 523                         goto cleanup;
 524                 default:
 525                         PyErr_SetString(PyExc_IOError,
 526                                         "file is not ready for reading");
 527                         goto cleanup;
 528         }
 529
 530         if (bytesrequested < 0)
 531                 buffersize = Util_NewBufferSize((size_t)0);
 532         else
 533                 buffersize = bytesrequested;
 534         if (buffersize > INT_MAX) {
 535                 PyErr_SetString(PyExc_OverflowError,
 536                                 "requested number of bytes is "
 537                                 "more than a Python string can hold");
 538                 goto cleanup;
 539         }
 540         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 541         if (ret == NULL)
 542                 goto cleanup;
 543         bytesread = 0;
 544
 545         for (;;) {
 546                 Py_BEGIN_ALLOW_THREADS
 547                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 548                                                  BUF(ret)+bytesread,
 549                                                  buffersize-bytesread,
 550                                                  self);
 551                 self->pos += chunksize;
 552                 Py_END_ALLOW_THREADS
 553                 bytesread += chunksize;
 554                 if (bzerror == BZ_STREAM_END) {
 555                         self->size = self->pos;
 556                         self->mode = MODE_READ_EOF;
 557                         break;
 558                 } else if (bzerror != BZ_OK) {
 559                         Util_CatchBZ2Error(bzerror);
 560                         Py_DECREF(ret);
 561                         ret = NULL;
 562                         goto cleanup;
 563                 }
 564                 if (bytesrequested < 0) {
 565                         buffersize = Util_NewBufferSize(buffersize);
 566                         if (_PyString_Resize(&ret, buffersize) < 0)
 567                                 goto cleanup;
 568                 } else {
 569                         break;
 570                 }
 571         }
 572         if (bytesread != buffersize)
 573                 _PyString_Resize(&ret, bytesread);
 574
 575 cleanup:
 576         RELEASE_LOCK(self);
 577         return ret;
 578 }
 579
 580 PyDoc_STRVAR(BZ2File_readline__doc__,
 581 "readline([size]) -> string\n\
 582 \n\
 583 Return the next line from the file, as a string, retaining newline.\n\
 584 A non-negative size argument will limit the maximum number of bytes to\n\
 585 return (an incomplete line may be returned then). Return an empty\n\
 586 string at EOF.\n\
 587 ");
 588
 589 static PyObject *
 590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 591 {
 592         PyObject *ret = NULL;
 593         int sizehint = -1;
 594
 595         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 596                 return NULL;
 597
 598         ACQUIRE_LOCK(self);
 599         switch (self->mode) {
 600                 case MODE_READ:
 601                         break;
 602                 case MODE_READ_EOF:
 603                         ret = PyString_FromString("");
 604                         goto cleanup;
 605                 case MODE_CLOSED:
 606                         PyErr_SetString(PyExc_ValueError,
 607                                         "I/O operation on closed file");
 608                         goto cleanup;
 609                 default:
 610                         PyErr_SetString(PyExc_IOError,
 611                                         "file is not ready for reading");
 612                         goto cleanup;
 613         }
 614
 615         if (sizehint == 0)
 616                 ret = PyString_FromString("");
 617         else
 618                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 619
 620 cleanup:
 621         RELEASE_LOCK(self);
 622         return ret;
 623 }
 624
 625 PyDoc_STRVAR(BZ2File_readlines__doc__,
 626 "readlines([size]) -> list\n\
 627 \n\
 628 Call readline() repeatedly and return a list of lines read.\n\
 629 The optional size argument, if given, is an approximate bound on the\n\
 630 total number of bytes in the lines returned.\n\
 631 ");
 632
 633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 634 static PyObject *
 635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 636 {
 637         long sizehint = 0;
 638         PyObject *list = NULL;
 639         PyObject *line;
 640         char small_buffer[SMALLCHUNK];
 641         char *buffer = small_buffer;
 642         size_t buffersize = SMALLCHUNK;
 643         PyObject *big_buffer = NULL;
 644         size_t nfilled = 0;
 645         size_t nread;
 646         size_t totalread = 0;
 647         char *p, *q, *end;
 648         int err;
 649         int shortread = 0;
 650         int bzerror;
 651
 652         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 653                 return NULL;
 654
 655         ACQUIRE_LOCK(self);
 656         switch (self->mode) {
 657                 case MODE_READ:
 658                         break;
 659                 case MODE_READ_EOF:
 660                         list = PyList_New(0);
 661                         goto cleanup;
 662                 case MODE_CLOSED:
 663                         PyErr_SetString(PyExc_ValueError,
 664                                         "I/O operation on closed file");
 665                         goto cleanup;
 666                 default:
 667                         PyErr_SetString(PyExc_IOError,
 668                                         "file is not ready for reading");
 669                         goto cleanup;
 670         }
 671
 672         if ((list = PyList_New(0)) == NULL)
 673                 goto cleanup;
 674
 675         for (;;) {
 676                 Py_BEGIN_ALLOW_THREADS
 677                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 678                                              buffer+nfilled,
 679                                              buffersize-nfilled, self);
 680                 self->pos += nread;
 681                 Py_END_ALLOW_THREADS
 682                 if (bzerror == BZ_STREAM_END) {
 683                         self->size = self->pos;
 684                         self->mode = MODE_READ_EOF;
 685                         if (nread == 0) {
 686                                 sizehint = 0;
 687                                 break;
 688                         }
 689                         shortread = 1;
 690                 } else if (bzerror != BZ_OK) {
 691                         Util_CatchBZ2Error(bzerror);
 692                   error:
 693                         Py_DECREF(list);
 694                         list = NULL;
 695                         goto cleanup;
 696                 }
 697                 totalread += nread;
 698                 p = memchr(buffer+nfilled, '\n', nread);
 699                 if (!shortread && p == NULL) {
 700                         /* Need a larger buffer to fit this line */
 701                         nfilled += nread;
 702                         buffersize *= 2;
 703                         if (buffersize > INT_MAX) {
 704                                 PyErr_SetString(PyExc_OverflowError,
 705                                 "line is longer than a Python string can hold");
 706                                 goto error;
 707                         }
 708                         if (big_buffer == NULL) {
 709                                 /* Create the big buffer */
 710                                 big_buffer = PyString_FromStringAndSize(
 711                                         NULL, buffersize);
 712                                 if (big_buffer == NULL)
 713                                         goto error;
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                                 memcpy(buffer, small_buffer, nfilled);
 716                         }
 717                         else {
 718                                 /* Grow the big buffer */
 719                                 _PyString_Resize(&big_buffer, buffersize);
 720                                 buffer = PyString_AS_STRING(big_buffer);
 721                         }
 722                         continue;
 723                 }
 724                 end = buffer+nfilled+nread;
 725                 q = buffer;
 726                 while (p != NULL) {
 727                         /* Process complete lines */
 728                         p++;
 729                         line = PyString_FromStringAndSize(q, p-q);
 730                         if (line == NULL)
 731                                 goto error;
 732                         err = PyList_Append(list, line);
 733                         Py_DECREF(line);
 734                         if (err != 0)
 735                                 goto error;
 736                         q = p;
 737                         p = memchr(q, '\n', end-q);
 738                 }
 739                 /* Move the remaining incomplete line to the start */
 740                 nfilled = end-q;
 741                 memmove(buffer, q, nfilled);
 742                 if (sizehint > 0)
 743                         if (totalread >= (size_t)sizehint)
 744                                 break;
 745                 if (shortread) {
 746                         sizehint = 0;
 747                         break;
 748                 }
 749         }
 750         if (nfilled != 0) {
 751                 /* Partial last line */
 752                 line = PyString_FromStringAndSize(buffer, nfilled);
 753                 if (line == NULL)
 754                         goto error;
 755                 if (sizehint > 0) {
 756                         /* Need to complete the last line */
 757                         PyObject *rest = Util_GetLine(self, 0);
 758                         if (rest == NULL) {
 759                                 Py_DECREF(line);
 760                                 goto error;
 761                         }
 762                         PyString_Concat(&line, rest);
 763                         Py_DECREF(rest);
 764                         if (line == NULL)
 765                                 goto error;
 766                 }
 767                 err = PyList_Append(list, line);
 768                 Py_DECREF(line);
 769                 if (err != 0)
 770                         goto error;
 771         }
 772
 773   cleanup:
 774         RELEASE_LOCK(self);
 775         if (big_buffer) {
 776                 Py_DECREF(big_buffer);
 777         }
 778         return list;
 779 }
 780
 781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 782 "xreadlines() -> self\n\
 783 \n\
 784 For backward compatibility. BZ2File objects now include the performance\n\
 785 optimizations previously implemented in the xreadlines module.\n\
 786 ");
 787
 788 PyDoc_STRVAR(BZ2File_write__doc__,
 789 "write(data) -> None\n\
 790 \n\
 791 Write the 'data' string to file. Note that due to buffering, close() may\n\
 792 be needed before the file on disk reflects the data written.\n\
 793 ");
 794
 795 /* This is a hacked version of Python's fileobject.c:file_write(). */
 796 static PyObject *
 797 BZ2File_write(BZ2FileObject *self, PyObject *args)
 798 {
 799         PyObject *ret = NULL;
 800         char *buf;
 801         int len;
 802         int bzerror;
 803
 804         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 805                 return NULL;
 806
 807         ACQUIRE_LOCK(self);
 808         switch (self->mode) {
 809                 case MODE_WRITE:
 810                         break;
 811
 812                 case MODE_CLOSED:
 813                         PyErr_SetString(PyExc_ValueError,
 814                                         "I/O operation on closed file");
 815                         goto cleanup;;
 816
 817                 default:
 818                         PyErr_SetString(PyExc_IOError,
 819                                         "file is not ready for writing");
 820                         goto cleanup;;
 821         }
 822
 823         self->f_softspace = 0;
 824
 825         Py_BEGIN_ALLOW_THREADS
 826         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 827         self->pos += len;
 828         Py_END_ALLOW_THREADS
 829
 830         if (bzerror != BZ_OK) {
 831                 Util_CatchBZ2Error(bzerror);
 832                 goto cleanup;
 833         }
 834
 835         Py_INCREF(Py_None);
 836         ret = Py_None;
 837
 838 cleanup:
 839         RELEASE_LOCK(self);
 840         return ret;
 841 }
 842
 843 PyDoc_STRVAR(BZ2File_writelines__doc__,
 844 "writelines(sequence_of_strings) -> None\n\
 845 \n\
 846 Write the sequence of strings to the file. Note that newlines are not\n\
 847 added. The sequence can be any iterable object producing strings. This is\n\
 848 equivalent to calling write() for each string.\n\
 849 ");
 850
 851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 852 static PyObject *
 853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 854 {
 855 #define CHUNKSIZE 1000
 856         PyObject *list = NULL;
 857         PyObject *iter = NULL;
 858         PyObject *ret = NULL;
 859         PyObject *line;
 860         int i, j, index, len, islist;
 861         int bzerror;
 862
 863         ACQUIRE_LOCK(self);
 864         islist = PyList_Check(seq);
 865         if  (!islist) {
 866                 iter = PyObject_GetIter(seq);
 867                 if (iter == NULL) {
 868                         PyErr_SetString(PyExc_TypeError,
 869                                 "writelines() requires an iterable argument");
 870                         goto error;
 871                 }
 872                 list = PyList_New(CHUNKSIZE);
 873                 if (list == NULL)
 874                         goto error;
 875         }
 876
 877         /* Strategy: slurp CHUNKSIZE lines into a private list,
 878            checking that they are all strings, then write that list
 879            without holding the interpreter lock, then come back for more. */
 880         for (index = 0; ; index += CHUNKSIZE) {
 881                 if (islist) {
 882                         Py_XDECREF(list);
 883                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 884                         if (list == NULL)
 885                                 goto error;
 886                         j = PyList_GET_SIZE(list);
 887                 }
 888                 else {
 889                         for (j = 0; j < CHUNKSIZE; j++) {
 890                                 line = PyIter_Next(iter);
 891                                 if (line == NULL) {
 892                                         if (PyErr_Occurred())
 893                                                 goto error;
 894                                         break;
 895                                 }
 896                                 PyList_SetItem(list, j, line);
 897                         }
 898                 }
 899                 if (j == 0)
 900                         break;
 901
 902                 /* Check that all entries are indeed strings. If not,
 903                    apply the same rules as for file.write() and
 904                    convert the rets to strings. This is slow, but
 905                    seems to be the only way since all conversion APIs
 906                    could potentially execute Python code. */
 907                 for (i = 0; i < j; i++) {
 908                         PyObject *v = PyList_GET_ITEM(list, i);
 909                         if (!PyString_Check(v)) {
 910                                 const char *buffer;
 911                                 Py_ssize_t len;
 912                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 913                                         PyErr_SetString(PyExc_TypeError,
 914                                                         "writelines() "
 915                                                         "argument must be "
 916                                                         "a sequence of "
 917                                                         "strings");
 918                                         goto error;
 919                                 }
 920                                 line = PyString_FromStringAndSize(buffer,
 921                                                                   len);
 922                                 if (line == NULL)
 923                                         goto error;
 924                                 Py_DECREF(v);
 925                                 PyList_SET_ITEM(list, i, line);
 926                         }
 927                 }
 928
 929                 self->f_softspace = 0;
 930
 931                 /* Since we are releasing the global lock, the
 932                    following code may *not* execute Python code. */
 933                 Py_BEGIN_ALLOW_THREADS
 934                 for (i = 0; i < j; i++) {
 935                         line = PyList_GET_ITEM(list, i);
 936                         len = PyString_GET_SIZE(line);
 937                         BZ2_bzWrite (&bzerror, self->fp,
 938                                      PyString_AS_STRING(line), len);
 939                         if (bzerror != BZ_OK) {
 940                                 Py_BLOCK_THREADS
 941                                 Util_CatchBZ2Error(bzerror);
 942                                 goto error;
 943                         }
 944                 }
 945                 Py_END_ALLOW_THREADS
 946
 947                 if (j < CHUNKSIZE)
 948                         break;
 949         }
 950
 951         Py_INCREF(Py_None);
 952         ret = Py_None;
 953
 954   error:
 955         RELEASE_LOCK(self);
 956         Py_XDECREF(list);
 957         Py_XDECREF(iter);
 958         return ret;
 959 #undef CHUNKSIZE
 960 }
 961
 962 PyDoc_STRVAR(BZ2File_seek__doc__,
 963 "seek(offset [, whence]) -> None\n\
 964 \n\
 965 Move to new file position. Argument offset is a byte count. Optional\n\
 966 argument whence defaults to 0 (offset from start of file, offset\n\
 967 should be >= 0); other values are 1 (move relative to current position,\n\
 968 positive or negative), and 2 (move relative to end of file, usually\n\
 969 negative, although many platforms allow seeking beyond the end of a file).\n\
 970 \n\
 971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 972 the operation may be extremely slow.\n\
 973 ");
 974
 975 static PyObject *
 976 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 977 {
 978         int where = 0;
 979         PyObject *offobj;
 980         Py_off_t offset;
 981         char small_buffer[SMALLCHUNK];
 982         char *buffer = small_buffer;
 983         size_t buffersize = SMALLCHUNK;
 984         int bytesread = 0;
 985         size_t readsize;
 986         int chunksize;
 987         int bzerror;
 988         PyObject *ret = NULL;
 989
 990         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
 991                 return NULL;
 992 #if !defined(HAVE_LARGEFILE_SUPPORT)
 993         offset = PyInt_AsLong(offobj);
 994 #else
 995         offset = PyLong_Check(offobj) ?
 996                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
 997 #endif
 998         if (PyErr_Occurred())
 999                 return NULL;
1000
1001         ACQUIRE_LOCK(self);
1002         Util_DropReadAhead(self);
1003         switch (self->mode) {
1004                 case MODE_READ:
1005                 case MODE_READ_EOF:
1006                         break;
1007
1008                 case MODE_CLOSED:
1009                         PyErr_SetString(PyExc_ValueError,
1010                                         "I/O operation on closed file");
1011                         goto cleanup;;
1012
1013                 default:
1014                         PyErr_SetString(PyExc_IOError,
1015                                         "seek works only while reading");
1016                         goto cleanup;;
1017         }
1018
1019         if (where == 2) {
1020                 if (self->size == -1) {
1021                         assert(self->mode != MODE_READ_EOF);
1022                         for (;;) {
1023                                 Py_BEGIN_ALLOW_THREADS
1024                                 chunksize = Util_UnivNewlineRead(
1025                                                 &bzerror, self->fp,
1026                                                 buffer, buffersize,
1027                                                 self);
1028                                 self->pos += chunksize;
1029                                 Py_END_ALLOW_THREADS
1030
1031                                 bytesread += chunksize;
1032                                 if (bzerror == BZ_STREAM_END) {
1033                                         break;
1034                                 } else if (bzerror != BZ_OK) {
1035                                         Util_CatchBZ2Error(bzerror);
1036                                         goto cleanup;
1037                                 }
1038                         }
1039                         self->mode = MODE_READ_EOF;
1040                         self->size = self->pos;
1041                         bytesread = 0;
1042                 }
1043                 offset = self->size + offset;
1044         } else if (where == 1) {
1045                 offset = self->pos + offset;
1046         }
1047
1048         /* Before getting here, offset must be the absolute position the file
1049          * pointer should be set to. */
1050
1051         if (offset >= self->pos) {
1052                 /* we can move forward */
1053                 offset -= self->pos;
1054         } else {
1055                 /* we cannot move back, so rewind the stream */
1056                 BZ2_bzReadClose(&bzerror, self->fp);
1057                 if (bzerror != BZ_OK) {
1058                         Util_CatchBZ2Error(bzerror);
1059                         goto cleanup;
1060                 }
1061                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1062                 if (!ret)
1063                         goto cleanup;
1064                 Py_DECREF(ret);
1065                 ret = NULL;
1066                 self->pos = 0;
1067                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1068                                           0, 0, NULL, 0);
1069                 if (bzerror != BZ_OK) {
1070                         Util_CatchBZ2Error(bzerror);
1071                         goto cleanup;
1072                 }
1073                 self->mode = MODE_READ;
1074         }
1075
1076         if (offset <= 0 || self->mode == MODE_READ_EOF)
1077                 goto exit;
1078
1079         /* Before getting here, offset must be set to the number of bytes
1080          * to walk forward. */
1081         for (;;) {
1082                 if (offset-bytesread > buffersize)
1083                         readsize = buffersize;
1084                 else
1085                         /* offset might be wider that readsize, but the result
1086                          * of the subtraction is bound by buffersize (see the
1087                          * condition above). buffersize is 8192. */
1088                         readsize = (size_t)(offset-bytesread);
1089                 Py_BEGIN_ALLOW_THREADS
1090                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1091                                                  buffer, readsize, self);
1092                 self->pos += chunksize;
1093                 Py_END_ALLOW_THREADS
1094                 bytesread += chunksize;
1095                 if (bzerror == BZ_STREAM_END) {
1096                         self->size = self->pos;
1097                         self->mode = MODE_READ_EOF;
1098                         break;
1099                 } else if (bzerror != BZ_OK) {
1100                         Util_CatchBZ2Error(bzerror);
1101                         goto cleanup;
1102                 }
1103                 if (bytesread == offset)
1104                         break;
1105         }
1106
1107 exit:
1108         Py_INCREF(Py_None);
1109         ret = Py_None;
1110
1111 cleanup:
1112         RELEASE_LOCK(self);
1113         return ret;
1114 }
1115
1116 PyDoc_STRVAR(BZ2File_tell__doc__,
1117 "tell() -> int\n\
1118 \n\
1119 Return the current file position, an integer (may be a long integer).\n\
1120 ");
1121
1122 static PyObject *
1123 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1124 {
1125         PyObject *ret = NULL;
1126
1127         if (self->mode == MODE_CLOSED) {
1128                 PyErr_SetString(PyExc_ValueError,
1129                                 "I/O operation on closed file");
1130                 goto cleanup;
1131         }
1132
1133 #if !defined(HAVE_LARGEFILE_SUPPORT)
1134         ret = PyInt_FromLong(self->pos);
1135 #else
1136         ret = PyLong_FromLongLong(self->pos);
1137 #endif
1138
1139 cleanup:
1140         return ret;
1141 }
1142
1143 PyDoc_STRVAR(BZ2File_close__doc__,
1144 "close() -> None or (perhaps) an integer\n\
1145 \n\
1146 Close the file. Sets data attribute .closed to true. A closed file\n\
1147 cannot be used for further I/O operations. close() may be called more\n\
1148 than once without error.\n\
1149 ");
1150
1151 static PyObject *
1152 BZ2File_close(BZ2FileObject *self)
1153 {
1154         PyObject *ret = NULL;
1155         int bzerror = BZ_OK;
1156
1157         ACQUIRE_LOCK(self);
1158         switch (self->mode) {
1159                 case MODE_READ:
1160                 case MODE_READ_EOF:
1161                         BZ2_bzReadClose(&bzerror, self->fp);
1162                         break;
1163                 case MODE_WRITE:
1164                         BZ2_bzWriteClose(&bzerror, self->fp,
1165                                          0, NULL, NULL);
1166                         break;
1167         }
1168         self->mode = MODE_CLOSED;
1169         ret = PyObject_CallMethod(self->file, "close", NULL);
1170         if (bzerror != BZ_OK) {
1171                 Util_CatchBZ2Error(bzerror);
1172                 Py_XDECREF(ret);
1173                 ret = NULL;
1174         }
1175
1176         RELEASE_LOCK(self);
1177         return ret;
1178 }
1179
1180 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1181
1182 static PyMethodDef BZ2File_methods[] = {
1183         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1184         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1185         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1186         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1187         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1188         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1189         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1190         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1191         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1192         {NULL,          NULL}           /* sentinel */
1193 };
1194
1195
1196 /* ===================================================================== */
1197 /* Getters and setters of BZ2File. */
1198
1199 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1200 static PyObject *
1201 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1202 {
1203         switch (self->f_newlinetypes) {
1204         case NEWLINE_UNKNOWN:
1205                 Py_INCREF(Py_None);
1206                 return Py_None;
1207         case NEWLINE_CR:
1208                 return PyString_FromString("\r");
1209         case NEWLINE_LF:
1210                 return PyString_FromString("\n");
1211         case NEWLINE_CR|NEWLINE_LF:
1212                 return Py_BuildValue("(ss)", "\r", "\n");
1213         case NEWLINE_CRLF:
1214                 return PyString_FromString("\r\n");
1215         case NEWLINE_CR|NEWLINE_CRLF:
1216                 return Py_BuildValue("(ss)", "\r", "\r\n");
1217         case NEWLINE_LF|NEWLINE_CRLF:
1218                 return Py_BuildValue("(ss)", "\n", "\r\n");
1219         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1220                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1221         default:
1222                 PyErr_Format(PyExc_SystemError,
1223                              "Unknown newlines value 0x%x\n",
1224                              self->f_newlinetypes);
1225                 return NULL;
1226         }
1227 }
1228
1229 static PyObject *
1230 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1231 {
1232         return PyInt_FromLong(self->mode == MODE_CLOSED);
1233 }
1234
1235 static PyObject *
1236 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1237 {
1238         return PyObject_GetAttrString(self->file, "mode");
1239 }
1240
1241 static PyObject *
1242 BZ2File_get_name(BZ2FileObject *self, void *closure)
1243 {
1244         return PyObject_GetAttrString(self->file, "name");
1245 }
1246
1247 static PyGetSetDef BZ2File_getset[] = {
1248         {"closed", (getter)BZ2File_get_closed, NULL,
1249                         "True if the file is closed"},
1250         {"newlines", (getter)BZ2File_get_newlines, NULL,
1251                         "end-of-line convention used in this file"},
1252         {"mode", (getter)BZ2File_get_mode, NULL,
1253                         "file mode ('r', 'w', or 'U')"},
1254         {"name", (getter)BZ2File_get_name, NULL,
1255                         "file name"},
1256         {NULL}  /* Sentinel */
1257 };
1258
1259
1260 /* ===================================================================== */
1261 /* Members of BZ2File_Type. */
1262
1263 #undef OFF
1264 #define OFF(x) offsetof(BZ2FileObject, x)
1265
1266 static PyMemberDef BZ2File_members[] = {
1267         {"softspace",   T_INT,          OFF(f_softspace), 0,
1268          "flag indicating that a space needs to be printed; used by print"},
1269         {NULL}  /* Sentinel */
1270 };
1271
1272 /* ===================================================================== */
1273 /* Slot definitions for BZ2File_Type. */
1274
1275 static int
1276 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1277 {
1278         static char *kwlist[] = {"filename", "mode", "buffering",
1279                                        "compresslevel", 0};
1280         PyObject *name;
1281         char *mode = "r";
1282         int buffering = -1;
1283         int compresslevel = 9;
1284         int bzerror;
1285         int mode_char = 0;
1286
1287         self->size = -1;
1288
1289         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1290                                          kwlist, &name, &mode, &buffering,
1291                                          &compresslevel))
1292                 return -1;
1293
1294         if (compresslevel < 1 || compresslevel > 9) {
1295                 PyErr_SetString(PyExc_ValueError,
1296                                 "compresslevel must be between 1 and 9");
1297                 return -1;
1298         }
1299
1300         for (;;) {
1301                 int error = 0;
1302                 switch (*mode) {
1303                         case 'r':
1304                         case 'w':
1305                                 if (mode_char)
1306                                         error = 1;
1307                                 mode_char = *mode;
1308                                 break;
1309
1310                         case 'b':
1311                                 break;
1312
1313                         case 'U':
1314 #ifdef __VMS
1315                                 self->f_univ_newline = 0;
1316 #else
1317                                 self->f_univ_newline = 1;
1318 #endif
1319                                 break;
1320
1321                         default:
1322                                 error = 1;
1323                                 break;
1324                 }
1325                 if (error) {
1326                         PyErr_Format(PyExc_ValueError,
1327                                      "invalid mode char %c", *mode);
1328                         return -1;
1329                 }
1330                 mode++;
1331                 if (*mode == '\0')
1332                         break;
1333         }
1334
1335         if (mode_char == 0) {
1336                 mode_char = 'r';
1337         }
1338
1339         mode = (mode_char == 'r') ? "rb" : "wb";
1340
1341         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1342                                            name, mode, buffering);
1343         if (self->file == NULL)
1344                 return -1;
1345
1346         /* From now on, we have stuff to dealloc, so jump to error label
1347          * instead of returning */
1348
1349 #ifdef WITH_THREAD
1350         self->lock = PyThread_allocate_lock();
1351         if (!self->lock) {
1352                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1353                 goto error;
1354         }
1355 #endif
1356
1357         if (mode_char == 'r')
1358                 self->fp = BZ2_bzReadOpen(&bzerror,
1359                                           PyFile_AsFile(self->file),
1360                                           0, 0, NULL, 0);
1361         else
1362                 self->fp = BZ2_bzWriteOpen(&bzerror,
1363                                            PyFile_AsFile(self->file),
1364                                            compresslevel, 0, 0);
1365
1366         if (bzerror != BZ_OK) {
1367                 Util_CatchBZ2Error(bzerror);
1368                 goto error;
1369         }
1370
1371         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1372
1373         return 0;
1374
1375 error:
1376         Py_CLEAR(self->file);
1377 #ifdef WITH_THREAD
1378         if (self->lock) {
1379                 PyThread_free_lock(self->lock);
1380                 self->lock = NULL;
1381         }
1382 #endif
1383         return -1;
1384 }
1385
1386 static void
1387 BZ2File_dealloc(BZ2FileObject *self)
1388 {
1389         int bzerror;
1390 #ifdef WITH_THREAD
1391         if (self->lock)
1392                 PyThread_free_lock(self->lock);
1393 #endif
1394         switch (self->mode) {
1395                 case MODE_READ:
1396                 case MODE_READ_EOF:
1397                         BZ2_bzReadClose(&bzerror, self->fp);
1398                         break;
1399                 case MODE_WRITE:
1400                         BZ2_bzWriteClose(&bzerror, self->fp,
1401                                          0, NULL, NULL);
1402                         break;
1403         }
1404         Util_DropReadAhead(self);
1405         Py_XDECREF(self->file);
1406         self->ob_type->tp_free((PyObject *)self);
1407 }
1408
1409 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1410 static PyObject *
1411 BZ2File_getiter(BZ2FileObject *self)
1412 {
1413         if (self->mode == MODE_CLOSED) {
1414                 PyErr_SetString(PyExc_ValueError,
1415                                 "I/O operation on closed file");
1416                 return NULL;
1417         }
1418         Py_INCREF((PyObject*)self);
1419         return (PyObject *)self;
1420 }
1421
1422 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1423 #define READAHEAD_BUFSIZE 8192
1424 static PyObject *
1425 BZ2File_iternext(BZ2FileObject *self)
1426 {
1427         PyStringObject* ret;
1428         ACQUIRE_LOCK(self);
1429         if (self->mode == MODE_CLOSED) {
1430                 PyErr_SetString(PyExc_ValueError,
1431                                 "I/O operation on closed file");
1432                 return NULL;
1433         }
1434         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1435         RELEASE_LOCK(self);
1436         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1437                 Py_XDECREF(ret);
1438                 return NULL;
1439         }
1440         return (PyObject *)ret;
1441 }
1442
1443 /* ===================================================================== */
1444 /* BZ2File_Type definition. */
1445
1446 PyDoc_VAR(BZ2File__doc__) =
1447 PyDoc_STR(
1448 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1449 \n\
1450 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1451 writing. When opened for writing, the file will be created if it doesn't\n\
1452 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1453 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1454 is given, must be a number between 1 and 9.\n\
1455 ")
1456 PyDoc_STR(
1457 "\n\
1458 Add a 'U' to mode to open the file for input with universal newline\n\
1459 support. Any line ending in the input file will be seen as a '\\n' in\n\
1460 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1461 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1462 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1463 newlines are available only when reading.\n\
1464 ")
1465 ;
1466
1467 static PyTypeObject BZ2File_Type = {
1468         PyObject_HEAD_INIT(NULL)
1469         0,                      /*ob_size*/
1470         "bz2.BZ2File",          /*tp_name*/
1471         sizeof(BZ2FileObject),  /*tp_basicsize*/
1472         0,                      /*tp_itemsize*/
1473         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1474         0,                      /*tp_print*/
1475         0,                      /*tp_getattr*/
1476         0,                      /*tp_setattr*/
1477         0,                      /*tp_compare*/
1478         0,                      /*tp_repr*/
1479         0,                      /*tp_as_number*/
1480         0,                      /*tp_as_sequence*/
1481         0,                      /*tp_as_mapping*/
1482         0,                      /*tp_hash*/
1483         0,                      /*tp_call*/
1484         0,                      /*tp_str*/
1485         PyObject_GenericGetAttr,/*tp_getattro*/
1486         PyObject_GenericSetAttr,/*tp_setattro*/
1487         0,                      /*tp_as_buffer*/
1488         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1489         BZ2File__doc__,         /*tp_doc*/
1490         0,                      /*tp_traverse*/
1491         0,                      /*tp_clear*/
1492         0,                      /*tp_richcompare*/
1493         0,                      /*tp_weaklistoffset*/
1494         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1495         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1496         BZ2File_methods,        /*tp_methods*/
1497         BZ2File_members,        /*tp_members*/
1498         BZ2File_getset,         /*tp_getset*/
1499         0,                      /*tp_base*/
1500         0,                      /*tp_dict*/
1501         0,                      /*tp_descr_get*/
1502         0,                      /*tp_descr_set*/
1503         0,                      /*tp_dictoffset*/
1504         (initproc)BZ2File_init, /*tp_init*/
1505         PyType_GenericAlloc,    /*tp_alloc*/
1506         PyType_GenericNew,      /*tp_new*/
1507         _PyObject_Del,          /*tp_free*/
1508         0,                      /*tp_is_gc*/
1509 };
1510
1511
1512 /* ===================================================================== */
1513 /* Methods of BZ2Comp. */
1514
1515 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1516 "compress(data) -> string\n\
1517 \n\
1518 Provide more data to the compressor object. It will return chunks of\n\
1519 compressed data whenever possible. When you've finished providing data\n\
1520 to compress, call the flush() method to finish the compression process,\n\
1521 and return what is left in the internal buffers.\n\
1522 ");
1523
1524 static PyObject *
1525 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1526 {
1527         char *data;
1528         int datasize;
1529         int bufsize = SMALLCHUNK;
1530         PY_LONG_LONG totalout;
1531         PyObject *ret = NULL;
1532         bz_stream *bzs = &self->bzs;
1533         int bzerror;
1534
1535         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1536                 return NULL;
1537
1538         if (datasize == 0)
1539                 return PyString_FromString("");
1540
1541         ACQUIRE_LOCK(self);
1542         if (!self->running) {
1543                 PyErr_SetString(PyExc_ValueError,
1544                                 "this object was already flushed");
1545                 goto error;
1546         }
1547
1548         ret = PyString_FromStringAndSize(NULL, bufsize);
1549         if (!ret)
1550                 goto error;
1551
1552         bzs->next_in = data;
1553         bzs->avail_in = datasize;
1554         bzs->next_out = BUF(ret);
1555         bzs->avail_out = bufsize;
1556
1557         totalout = BZS_TOTAL_OUT(bzs);
1558
1559         for (;;) {
1560                 Py_BEGIN_ALLOW_THREADS
1561                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1562                 Py_END_ALLOW_THREADS
1563                 if (bzerror != BZ_RUN_OK) {
1564                         Util_CatchBZ2Error(bzerror);
1565                         goto error;
1566                 }
1567                 if (bzs->avail_out == 0) {
1568                         bufsize = Util_NewBufferSize(bufsize);
1569                         if (_PyString_Resize(&ret, bufsize) < 0) {
1570                                 BZ2_bzCompressEnd(bzs);
1571                                 goto error;
1572                         }
1573                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1574                                                     - totalout);
1575                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1576                 } else if (bzs->avail_in == 0) {
1577                         break;
1578                 }
1579         }
1580
1581         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1582
1583         RELEASE_LOCK(self);
1584         return ret;
1585
1586 error:
1587         RELEASE_LOCK(self);
1588         Py_XDECREF(ret);
1589         return NULL;
1590 }
1591
1592 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1593 "flush() -> string\n\
1594 \n\
1595 Finish the compression process and return what is left in internal buffers.\n\
1596 You must not use the compressor object after calling this method.\n\
1597 ");
1598
1599 static PyObject *
1600 BZ2Comp_flush(BZ2CompObject *self)
1601 {
1602         int bufsize = SMALLCHUNK;
1603         PyObject *ret = NULL;
1604         bz_stream *bzs = &self->bzs;
1605         PY_LONG_LONG totalout;
1606         int bzerror;
1607
1608         ACQUIRE_LOCK(self);
1609         if (!self->running) {
1610                 PyErr_SetString(PyExc_ValueError, "object was already "
1611                                                   "flushed");
1612                 goto error;
1613         }
1614         self->running = 0;
1615
1616         ret = PyString_FromStringAndSize(NULL, bufsize);
1617         if (!ret)
1618                 goto error;
1619
1620         bzs->next_out = BUF(ret);
1621         bzs->avail_out = bufsize;
1622
1623         totalout = BZS_TOTAL_OUT(bzs);
1624
1625         for (;;) {
1626                 Py_BEGIN_ALLOW_THREADS
1627                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1628                 Py_END_ALLOW_THREADS
1629                 if (bzerror == BZ_STREAM_END) {
1630                         break;
1631                 } else if (bzerror != BZ_FINISH_OK) {
1632                         Util_CatchBZ2Error(bzerror);
1633                         goto error;
1634                 }
1635                 if (bzs->avail_out == 0) {
1636                         bufsize = Util_NewBufferSize(bufsize);
1637                         if (_PyString_Resize(&ret, bufsize) < 0)
1638                                 goto error;
1639                         bzs->next_out = BUF(ret);
1640                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1641                                                     - totalout);
1642                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1643                 }
1644         }
1645
1646         if (bzs->avail_out != 0)
1647                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1648
1649         RELEASE_LOCK(self);
1650         return ret;
1651
1652 error:
1653         RELEASE_LOCK(self);
1654         Py_XDECREF(ret);
1655         return NULL;
1656 }
1657
1658 static PyMethodDef BZ2Comp_methods[] = {
1659         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1660          BZ2Comp_compress__doc__},
1661         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1662          BZ2Comp_flush__doc__},
1663         {NULL,          NULL}           /* sentinel */
1664 };
1665
1666
1667 /* ===================================================================== */
1668 /* Slot definitions for BZ2Comp_Type. */
1669
1670 static int
1671 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1672 {
1673         int compresslevel = 9;
1674         int bzerror;
1675         static char *kwlist[] = {"compresslevel", 0};
1676
1677         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1678                                          kwlist, &compresslevel))
1679                 return -1;
1680
1681         if (compresslevel < 1 || compresslevel > 9) {
1682                 PyErr_SetString(PyExc_ValueError,
1683                                 "compresslevel must be between 1 and 9");
1684                 goto error;
1685         }
1686
1687 #ifdef WITH_THREAD
1688         self->lock = PyThread_allocate_lock();
1689         if (!self->lock) {
1690                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1691                 goto error;
1692         }
1693 #endif
1694
1695         memset(&self->bzs, 0, sizeof(bz_stream));
1696         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1697         if (bzerror != BZ_OK) {
1698                 Util_CatchBZ2Error(bzerror);
1699                 goto error;
1700         }
1701
1702         self->running = 1;
1703
1704         return 0;
1705 error:
1706 #ifdef WITH_THREAD
1707         if (self->lock) {
1708                 PyThread_free_lock(self->lock);
1709                 self->lock = NULL;
1710         }
1711 #endif
1712         return -1;
1713 }
1714
1715 static void
1716 BZ2Comp_dealloc(BZ2CompObject *self)
1717 {
1718 #ifdef WITH_THREAD
1719         if (self->lock)
1720                 PyThread_free_lock(self->lock);
1721 #endif
1722         BZ2_bzCompressEnd(&self->bzs);
1723         self->ob_type->tp_free((PyObject *)self);
1724 }
1725
1726
1727 /* ===================================================================== */
1728 /* BZ2Comp_Type definition. */
1729
1730 PyDoc_STRVAR(BZ2Comp__doc__,
1731 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1732 \n\
1733 Create a new compressor object. This object may be used to compress\n\
1734 data sequentially. If you want to compress data in one shot, use the\n\
1735 compress() function instead. The compresslevel parameter, if given,\n\
1736 must be a number between 1 and 9.\n\
1737 ");
1738
1739 static PyTypeObject BZ2Comp_Type = {
1740         PyObject_HEAD_INIT(NULL)
1741         0,                      /*ob_size*/
1742         "bz2.BZ2Compressor",    /*tp_name*/
1743         sizeof(BZ2CompObject),  /*tp_basicsize*/
1744         0,                      /*tp_itemsize*/
1745         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1746         0,                      /*tp_print*/
1747         0,                      /*tp_getattr*/
1748         0,                      /*tp_setattr*/
1749         0,                      /*tp_compare*/
1750         0,                      /*tp_repr*/
1751         0,                      /*tp_as_number*/
1752         0,                      /*tp_as_sequence*/
1753         0,                      /*tp_as_mapping*/
1754         0,                      /*tp_hash*/
1755         0,                      /*tp_call*/
1756         0,                      /*tp_str*/
1757         PyObject_GenericGetAttr,/*tp_getattro*/
1758         PyObject_GenericSetAttr,/*tp_setattro*/
1759         0,                      /*tp_as_buffer*/
1760         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1761         BZ2Comp__doc__,         /*tp_doc*/
1762         0,                      /*tp_traverse*/
1763         0,                      /*tp_clear*/
1764         0,                      /*tp_richcompare*/
1765         0,                      /*tp_weaklistoffset*/
1766         0,                      /*tp_iter*/
1767         0,                      /*tp_iternext*/
1768         BZ2Comp_methods,        /*tp_methods*/
1769         0,                      /*tp_members*/
1770         0,                      /*tp_getset*/
1771         0,                      /*tp_base*/
1772         0,                      /*tp_dict*/
1773         0,                      /*tp_descr_get*/
1774         0,                      /*tp_descr_set*/
1775         0,                      /*tp_dictoffset*/
1776         (initproc)BZ2Comp_init, /*tp_init*/
1777         PyType_GenericAlloc,    /*tp_alloc*/
1778         PyType_GenericNew,      /*tp_new*/
1779         _PyObject_Del,          /*tp_free*/
1780         0,                      /*tp_is_gc*/
1781 };
1782
1783
1784 /* ===================================================================== */
1785 /* Members of BZ2Decomp. */
1786
1787 #undef OFF
1788 #define OFF(x) offsetof(BZ2DecompObject, x)
1789
1790 static PyMemberDef BZ2Decomp_members[] = {
1791         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1792         {NULL}  /* Sentinel */
1793 };
1794
1795
1796 /* ===================================================================== */
1797 /* Methods of BZ2Decomp. */
1798
1799 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1800 "decompress(data) -> string\n\
1801 \n\
1802 Provide more data to the decompressor object. It will return chunks\n\
1803 of decompressed data whenever possible. If you try to decompress data\n\
1804 after the end of stream is found, EOFError will be raised. If any data\n\
1805 was found after the end of stream, it'll be ignored and saved in\n\
1806 unused_data attribute.\n\
1807 ");
1808
1809 static PyObject *
1810 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1811 {
1812         char *data;
1813         int datasize;
1814         int bufsize = SMALLCHUNK;
1815         PY_LONG_LONG totalout;
1816         PyObject *ret = NULL;
1817         bz_stream *bzs = &self->bzs;
1818         int bzerror;
1819
1820         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1821                 return NULL;
1822
1823         ACQUIRE_LOCK(self);
1824         if (!self->running) {
1825                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1826                                                 "already found");
1827                 goto error;
1828         }
1829
1830         ret = PyString_FromStringAndSize(NULL, bufsize);
1831         if (!ret)
1832                 goto error;
1833
1834         bzs->next_in = data;
1835         bzs->avail_in = datasize;
1836         bzs->next_out = BUF(ret);
1837         bzs->avail_out = bufsize;
1838
1839         totalout = BZS_TOTAL_OUT(bzs);
1840
1841         for (;;) {
1842                 Py_BEGIN_ALLOW_THREADS
1843                 bzerror = BZ2_bzDecompress(bzs);
1844                 Py_END_ALLOW_THREADS
1845                 if (bzerror == BZ_STREAM_END) {
1846                         if (bzs->avail_in != 0) {
1847                                 Py_DECREF(self->unused_data);
1848                                 self->unused_data =
1849                                     PyString_FromStringAndSize(bzs->next_in,
1850                                                                bzs->avail_in);
1851                         }
1852                         self->running = 0;
1853                         break;
1854                 }
1855                 if (bzerror != BZ_OK) {
1856                         Util_CatchBZ2Error(bzerror);
1857                         goto error;
1858                 }
1859                 if (bzs->avail_out == 0) {
1860                         bufsize = Util_NewBufferSize(bufsize);
1861                         if (_PyString_Resize(&ret, bufsize) < 0) {
1862                                 BZ2_bzDecompressEnd(bzs);
1863                                 goto error;
1864                         }
1865                         bzs->next_out = BUF(ret);
1866                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1867                                                     - totalout);
1868                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1869                 } else if (bzs->avail_in == 0) {
1870                         break;
1871                 }
1872         }
1873
1874         if (bzs->avail_out != 0)
1875                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1876
1877         RELEASE_LOCK(self);
1878         return ret;
1879
1880 error:
1881         RELEASE_LOCK(self);
1882         Py_XDECREF(ret);
1883         return NULL;
1884 }
1885
1886 static PyMethodDef BZ2Decomp_methods[] = {
1887         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1888         {NULL,          NULL}           /* sentinel */
1889 };
1890
1891
1892 /* ===================================================================== */
1893 /* Slot definitions for BZ2Decomp_Type. */
1894
1895 static int
1896 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1897 {
1898         int bzerror;
1899
1900         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1901                 return -1;
1902
1903 #ifdef WITH_THREAD
1904         self->lock = PyThread_allocate_lock();
1905         if (!self->lock) {
1906                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1907                 goto error;
1908         }
1909 #endif
1910
1911         self->unused_data = PyString_FromString("");
1912         if (!self->unused_data)
1913                 goto error;
1914
1915         memset(&self->bzs, 0, sizeof(bz_stream));
1916         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1917         if (bzerror != BZ_OK) {
1918                 Util_CatchBZ2Error(bzerror);
1919                 goto error;
1920         }
1921
1922         self->running = 1;
1923
1924         return 0;
1925
1926 error:
1927 #ifdef WITH_THREAD
1928         if (self->lock) {
1929                 PyThread_free_lock(self->lock);
1930                 self->lock = NULL;
1931         }
1932 #endif
1933         Py_CLEAR(self->unused_data);
1934         return -1;
1935 }
1936
1937 static void
1938 BZ2Decomp_dealloc(BZ2DecompObject *self)
1939 {
1940 #ifdef WITH_THREAD
1941         if (self->lock)
1942                 PyThread_free_lock(self->lock);
1943 #endif
1944         Py_XDECREF(self->unused_data);
1945         BZ2_bzDecompressEnd(&self->bzs);
1946         self->ob_type->tp_free((PyObject *)self);
1947 }
1948
1949
1950 /* ===================================================================== */
1951 /* BZ2Decomp_Type definition. */
1952
1953 PyDoc_STRVAR(BZ2Decomp__doc__,
1954 "BZ2Decompressor() -> decompressor object\n\
1955 \n\
1956 Create a new decompressor object. This object may be used to decompress\n\
1957 data sequentially. If you want to decompress data in one shot, use the\n\
1958 decompress() function instead.\n\
1959 ");
1960
1961 static PyTypeObject BZ2Decomp_Type = {
1962         PyObject_HEAD_INIT(NULL)
1963         0,                      /*ob_size*/
1964         "bz2.BZ2Decompressor",  /*tp_name*/
1965         sizeof(BZ2DecompObject), /*tp_basicsize*/
1966         0,                      /*tp_itemsize*/
1967         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1968         0,                      /*tp_print*/
1969         0,                      /*tp_getattr*/
1970         0,                      /*tp_setattr*/
1971         0,                      /*tp_compare*/
1972         0,                      /*tp_repr*/
1973         0,                      /*tp_as_number*/
1974         0,                      /*tp_as_sequence*/
1975         0,                      /*tp_as_mapping*/
1976         0,                      /*tp_hash*/
1977         0,                      /*tp_call*/
1978         0,                      /*tp_str*/
1979         PyObject_GenericGetAttr,/*tp_getattro*/
1980         PyObject_GenericSetAttr,/*tp_setattro*/
1981         0,                      /*tp_as_buffer*/
1982         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1983         BZ2Decomp__doc__,       /*tp_doc*/
1984         0,                      /*tp_traverse*/
1985         0,                      /*tp_clear*/
1986         0,                      /*tp_richcompare*/
1987         0,                      /*tp_weaklistoffset*/
1988         0,                      /*tp_iter*/
1989         0,                      /*tp_iternext*/
1990         BZ2Decomp_methods,      /*tp_methods*/
1991         BZ2Decomp_members,      /*tp_members*/
1992         0,                      /*tp_getset*/
1993         0,                      /*tp_base*/
1994         0,                      /*tp_dict*/
1995         0,                      /*tp_descr_get*/
1996         0,                      /*tp_descr_set*/
1997         0,                      /*tp_dictoffset*/
1998         (initproc)BZ2Decomp_init, /*tp_init*/
1999         PyType_GenericAlloc,    /*tp_alloc*/
2000         PyType_GenericNew,      /*tp_new*/
2001         _PyObject_Del,          /*tp_free*/
2002         0,                      /*tp_is_gc*/
2003 };
2004
2005
2006 /* ===================================================================== */
2007 /* Module functions. */
2008
2009 PyDoc_STRVAR(bz2_compress__doc__,
2010 "compress(data [, compresslevel=9]) -> string\n\
2011 \n\
2012 Compress data in one shot. If you want to compress data sequentially,\n\
2013 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2014 given, must be a number between 1 and 9.\n\
2015 ");
2016
2017 static PyObject *
2018 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2019 {
2020         int compresslevel=9;
2021         char *data;
2022         int datasize;
2023         int bufsize;
2024         PyObject *ret = NULL;
2025         bz_stream _bzs;
2026         bz_stream *bzs = &_bzs;
2027         int bzerror;
2028         static char *kwlist[] = {"data", "compresslevel", 0};
2029
2030         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2031                                          kwlist, &data, &datasize,
2032                                          &compresslevel))
2033                 return NULL;
2034
2035         if (compresslevel < 1 || compresslevel > 9) {
2036                 PyErr_SetString(PyExc_ValueError,
2037                                 "compresslevel must be between 1 and 9");
2038                 return NULL;
2039         }
2040
2041         /* Conforming to bz2 manual, this is large enough to fit compressed
2042          * data in one shot. We will check it later anyway. */
2043         bufsize = datasize + (datasize/100+1) + 600;
2044
2045         ret = PyString_FromStringAndSize(NULL, bufsize);
2046         if (!ret)
2047                 return NULL;
2048
2049         memset(bzs, 0, sizeof(bz_stream));
2050
2051         bzs->next_in = data;
2052         bzs->avail_in = datasize;
2053         bzs->next_out = BUF(ret);
2054         bzs->avail_out = bufsize;
2055
2056         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2057         if (bzerror != BZ_OK) {
2058                 Util_CatchBZ2Error(bzerror);
2059                 Py_DECREF(ret);
2060                 return NULL;
2061         }
2062
2063         for (;;) {
2064                 Py_BEGIN_ALLOW_THREADS
2065                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2066                 Py_END_ALLOW_THREADS
2067                 if (bzerror == BZ_STREAM_END) {
2068                         break;
2069                 } else if (bzerror != BZ_FINISH_OK) {
2070                         BZ2_bzCompressEnd(bzs);
2071                         Util_CatchBZ2Error(bzerror);
2072                         Py_DECREF(ret);
2073                         return NULL;
2074                 }
2075                 if (bzs->avail_out == 0) {
2076                         bufsize = Util_NewBufferSize(bufsize);
2077                         if (_PyString_Resize(&ret, bufsize) < 0) {
2078                                 BZ2_bzCompressEnd(bzs);
2079                                 Py_DECREF(ret);
2080                                 return NULL;
2081                         }
2082                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2083                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2084                 }
2085         }
2086
2087         if (bzs->avail_out != 0)
2088                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2089         BZ2_bzCompressEnd(bzs);
2090
2091         return ret;
2092 }
2093
2094 PyDoc_STRVAR(bz2_decompress__doc__,
2095 "decompress(data) -> decompressed data\n\
2096 \n\
2097 Decompress data in one shot. If you want to decompress data sequentially,\n\
2098 use an instance of BZ2Decompressor instead.\n\
2099 ");
2100
2101 static PyObject *
2102 bz2_decompress(PyObject *self, PyObject *args)
2103 {
2104         char *data;
2105         int datasize;
2106         int bufsize = SMALLCHUNK;
2107         PyObject *ret;
2108         bz_stream _bzs;
2109         bz_stream *bzs = &_bzs;
2110         int bzerror;
2111
2112         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2113                 return NULL;
2114
2115         if (datasize == 0)
2116                 return PyString_FromString("");
2117
2118         ret = PyString_FromStringAndSize(NULL, bufsize);
2119         if (!ret)
2120                 return NULL;
2121
2122         memset(bzs, 0, sizeof(bz_stream));
2123
2124         bzs->next_in = data;
2125         bzs->avail_in = datasize;
2126         bzs->next_out = BUF(ret);
2127         bzs->avail_out = bufsize;
2128
2129         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2130         if (bzerror != BZ_OK) {
2131                 Util_CatchBZ2Error(bzerror);
2132                 Py_DECREF(ret);
2133                 return NULL;
2134         }
2135
2136         for (;;) {
2137                 Py_BEGIN_ALLOW_THREADS
2138                 bzerror = BZ2_bzDecompress(bzs);
2139                 Py_END_ALLOW_THREADS
2140                 if (bzerror == BZ_STREAM_END) {
2141                         break;
2142                 } else if (bzerror != BZ_OK) {
2143                         BZ2_bzDecompressEnd(bzs);
2144                         Util_CatchBZ2Error(bzerror);
2145                         Py_DECREF(ret);
2146                         return NULL;
2147                 }
2148                 if (bzs->avail_out == 0) {
2149                         bufsize = Util_NewBufferSize(bufsize);
2150                         if (_PyString_Resize(&ret, bufsize) < 0) {
2151                                 BZ2_bzDecompressEnd(bzs);
2152                                 Py_DECREF(ret);
2153                                 return NULL;
2154                         }
2155                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2156                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2157                 } else if (bzs->avail_in == 0) {
2158                         BZ2_bzDecompressEnd(bzs);
2159                         PyErr_SetString(PyExc_ValueError,
2160                                         "couldn't find end of stream");
2161                         Py_DECREF(ret);
2162                         return NULL;
2163                 }
2164         }
2165
2166         if (bzs->avail_out != 0)
2167                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2168         BZ2_bzDecompressEnd(bzs);
2169
2170         return ret;
2171 }
2172
2173 static PyMethodDef bz2_methods[] = {
2174         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2175                 bz2_compress__doc__},
2176         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2177                 bz2_decompress__doc__},
2178         {NULL,          NULL}           /* sentinel */
2179 };
2180
2181 /* ===================================================================== */
2182 /* Initialization function. */
2183
2184 PyDoc_STRVAR(bz2__doc__,
2185 "The python bz2 module provides a comprehensive interface for\n\
2186 the bz2 compression library. It implements a complete file\n\
2187 interface, one shot (de)compression functions, and types for\n\
2188 sequential (de)compression.\n\
2189 ");
2190
2191 PyMODINIT_FUNC
2192 initbz2(void)
2193 {
2194         PyObject *m;
2195
2196         BZ2File_Type.ob_type = &PyType_Type;
2197         BZ2Comp_Type.ob_type = &PyType_Type;
2198         BZ2Decomp_Type.ob_type = &PyType_Type;
2199
2200         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2201         if (m == NULL)
2202                 return;
2203
2204         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2205
2206         Py_INCREF(&BZ2File_Type);
2207         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2208
2209         Py_INCREF(&BZ2Comp_Type);
2210         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2211
2212         Py_INCREF(&BZ2Decomp_Type);
2213         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2214 }