Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int bytes_read;
 239         int newlinetypes = f->f_newlinetypes;
 240         int skipnextlf = f->f_skipnextlf;
 241         int univ_newline = f->f_univ_newline;
 242
 243         total_v_size = n > 0 ? n : 100;
 244         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 245         if (v == NULL)
 246                 return NULL;
 247
 248         buf = BUF(v);
 249         end = buf + total_v_size;
 250
 251         for (;;) {
 252                 Py_BEGIN_ALLOW_THREADS
 253                 while (buf != end) {
 254                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                         f->pos++;
 256                         if (bytes_read == 0) break;
 257                         if (univ_newline) {
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with skipnextlf true means we
 262                                                  * saw a \r before.
 263                                                  */
 264                                                 newlinetypes |= NEWLINE_CRLF;
 265                                                 if (bzerror != BZ_OK) break;
 266                                                 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 267                                                 f->pos++;
 268                                                 if (bytes_read == 0) break;
 269                                         } else {
 270                                                 newlinetypes |= NEWLINE_CR;
 271                                         }
 272                                 }
 273                                 if (c == '\r') {
 274                                         skipnextlf = 1;
 275                                         c = '\n';
 276                                 } else if (c == '\n')
 277                                         newlinetypes |= NEWLINE_LF;
 278                         }
 279                         *buf++ = c;
 280                         if (bzerror != BZ_OK || c == '\n') break;
 281                 }
 282                 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
 283                         newlinetypes |= NEWLINE_CR;
 284                 Py_END_ALLOW_THREADS
 285                 f->f_newlinetypes = newlinetypes;
 286                 f->f_skipnextlf = skipnextlf;
 287                 if (bzerror == BZ_STREAM_END) {
 288                         f->size = f->pos;
 289                         f->mode = MODE_READ_EOF;
 290                         break;
 291                 } else if (bzerror != BZ_OK) {
 292                         Util_CatchBZ2Error(bzerror);
 293                         Py_DECREF(v);
 294                         return NULL;
 295                 }
 296                 if (c == '\n')
 297                         break;
 298                 /* Must be because buf == end */
 299                 if (n > 0)
 300                         break;
 301                 used_v_size = total_v_size;
 302                 increment = total_v_size >> 2; /* mild exponential growth */
 303                 total_v_size += increment;
 304                 if (total_v_size > INT_MAX) {
 305                         PyErr_SetString(PyExc_OverflowError,
 306                             "line is longer than a Python string can hold");
 307                         Py_DECREF(v);
 308                         return NULL;
 309                 }
 310                 if (_PyString_Resize(&v, total_v_size) < 0)
 311                         return NULL;
 312                 buf = BUF(v) + used_v_size;
 313                 end = BUF(v) + total_v_size;
 314         }
 315
 316         used_v_size = buf - BUF(v);
 317         if (used_v_size != total_v_size)
 318                 _PyString_Resize(&v, used_v_size);
 319         return v;
 320 }
 321
 322 /* This is a hacked version of Python's
 323  * fileobject.c:Py_UniversalNewlineFread(). */
 324 size_t
 325 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 326                      char* buf, size_t n, BZ2FileObject *f)
 327 {
 328         char *dst = buf;
 329         int newlinetypes, skipnextlf;
 330
 331         assert(buf != NULL);
 332         assert(stream != NULL);
 333
 334         if (!f->f_univ_newline)
 335                 return BZ2_bzRead(bzerror, stream, buf, n);
 336
 337         newlinetypes = f->f_newlinetypes;
 338         skipnextlf = f->f_skipnextlf;
 339
 340         /* Invariant:  n is the number of bytes remaining to be filled
 341          * in the buffer.
 342          */
 343         while (n) {
 344                 size_t nread;
 345                 int shortread;
 346                 char *src = dst;
 347
 348                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 349                 assert(nread <= n);
 350                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 351                 shortread = n != 0;     /* true iff EOF or error */
 352                 while (nread--) {
 353                         char c = *src++;
 354                         if (c == '\r') {
 355                                 /* Save as LF and set flag to skip next LF. */
 356                                 *dst++ = '\n';
 357                                 skipnextlf = 1;
 358                         }
 359                         else if (skipnextlf && c == '\n') {
 360                                 /* Skip LF, and remember we saw CR LF. */
 361                                 skipnextlf = 0;
 362                                 newlinetypes |= NEWLINE_CRLF;
 363                                 ++n;
 364                         }
 365                         else {
 366                                 /* Normal char to be stored in buffer.  Also
 367                                  * update the newlinetypes flag if either this
 368                                  * is an LF or the previous char was a CR.
 369                                  */
 370                                 if (c == '\n')
 371                                         newlinetypes |= NEWLINE_LF;
 372                                 else if (skipnextlf)
 373                                         newlinetypes |= NEWLINE_CR;
 374                                 *dst++ = c;
 375                                 skipnextlf = 0;
 376                         }
 377                 }
 378                 if (shortread) {
 379                         /* If this is EOF, update type flags. */
 380                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 381                                 newlinetypes |= NEWLINE_CR;
 382                         break;
 383                 }
 384         }
 385         f->f_newlinetypes = newlinetypes;
 386         f->f_skipnextlf = skipnextlf;
 387         return dst - buf;
 388 }
 389
 390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 391 static void
 392 Util_DropReadAhead(BZ2FileObject *f)
 393 {
 394         if (f->f_buf != NULL) {
 395                 PyMem_Free(f->f_buf);
 396                 f->f_buf = NULL;
 397         }
 398 }
 399
 400 /* This is a hacked version of Python's fileobject.c:readahead(). */
 401 static int
 402 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 403 {
 404         int chunksize;
 405         int bzerror;
 406
 407         if (f->f_buf != NULL) {
 408                 if((f->f_bufend - f->f_bufptr) >= 1)
 409                         return 0;
 410                 else
 411                         Util_DropReadAhead(f);
 412         }
 413         if (f->mode == MODE_READ_EOF) {
 414                 f->f_bufptr = f->f_buf;
 415                 f->f_bufend = f->f_buf;
 416                 return 0;
 417         }
 418         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 419                 return -1;
 420         }
 421         Py_BEGIN_ALLOW_THREADS
 422         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 423                                          bufsize, f);
 424         Py_END_ALLOW_THREADS
 425         f->pos += chunksize;
 426         if (bzerror == BZ_STREAM_END) {
 427                 f->size = f->pos;
 428                 f->mode = MODE_READ_EOF;
 429         } else if (bzerror != BZ_OK) {
 430                 Util_CatchBZ2Error(bzerror);
 431                 Util_DropReadAhead(f);
 432                 return -1;
 433         }
 434         f->f_bufptr = f->f_buf;
 435         f->f_bufend = f->f_buf + chunksize;
 436         return 0;
 437 }
 438
 439 /* This is a hacked version of Python's
 440  * fileobject.c:readahead_get_line_skip(). */
 441 static PyStringObject *
 442 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 443 {
 444         PyStringObject* s;
 445         char *bufptr;
 446         char *buf;
 447         int len;
 448
 449         if (f->f_buf == NULL)
 450                 if (Util_ReadAhead(f, bufsize) < 0)
 451                         return NULL;
 452
 453         len = f->f_bufend - f->f_bufptr;
 454         if (len == 0)
 455                 return (PyStringObject *)
 456                         PyString_FromStringAndSize(NULL, skip);
 457         bufptr = memchr(f->f_bufptr, '\n', len);
 458         if (bufptr != NULL) {
 459                 bufptr++;                       /* Count the '\n' */
 460                 len = bufptr - f->f_bufptr;
 461                 s = (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip+len);
 463                 if (s == NULL)
 464                         return NULL;
 465                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 466                 f->f_bufptr = bufptr;
 467                 if (bufptr == f->f_bufend)
 468                         Util_DropReadAhead(f);
 469         } else {
 470                 bufptr = f->f_bufptr;
 471                 buf = f->f_buf;
 472                 f->f_buf = NULL;        /* Force new readahead buffer */
 473                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 474                                               bufsize + (bufsize>>2));
 475                 if (s == NULL) {
 476                         PyMem_Free(buf);
 477                         return NULL;
 478                 }
 479                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 480                 PyMem_Free(buf);
 481         }
 482         return s;
 483 }
 484
 485 /* ===================================================================== */
 486 /* Methods of BZ2File. */
 487
 488 PyDoc_STRVAR(BZ2File_read__doc__,
 489 "read([size]) -> string\n\
 490 \n\
 491 Read at most size uncompressed bytes, returned as a string. If the size\n\
 492 argument is negative or omitted, read until EOF is reached.\n\
 493 ");
 494
 495 /* This is a hacked version of Python's fileobject.c:file_read(). */
 496 static PyObject *
 497 BZ2File_read(BZ2FileObject *self, PyObject *args)
 498 {
 499         long bytesrequested = -1;
 500         size_t bytesread, buffersize, chunksize;
 501         int bzerror;
 502         PyObject *ret = NULL;
 503
 504         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 505                 return NULL;
 506
 507         ACQUIRE_LOCK(self);
 508         switch (self->mode) {
 509                 case MODE_READ:
 510                         break;
 511                 case MODE_READ_EOF:
 512                         ret = PyString_FromString("");
 513                         goto cleanup;
 514                 case MODE_CLOSED:
 515                         PyErr_SetString(PyExc_ValueError,
 516                                         "I/O operation on closed file");
 517                         goto cleanup;
 518                 default:
 519                         PyErr_SetString(PyExc_IOError,
 520                                         "file is not ready for reading");
 521                         goto cleanup;
 522         }
 523
 524         if (bytesrequested < 0)
 525                 buffersize = Util_NewBufferSize((size_t)0);
 526         else
 527                 buffersize = bytesrequested;
 528         if (buffersize > INT_MAX) {
 529                 PyErr_SetString(PyExc_OverflowError,
 530                                 "requested number of bytes is "
 531                                 "more than a Python string can hold");
 532                 goto cleanup;
 533         }
 534         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 535         if (ret == NULL)
 536                 goto cleanup;
 537         bytesread = 0;
 538
 539         for (;;) {
 540                 Py_BEGIN_ALLOW_THREADS
 541                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 542                                                  BUF(ret)+bytesread,
 543                                                  buffersize-bytesread,
 544                                                  self);
 545                 self->pos += chunksize;
 546                 Py_END_ALLOW_THREADS
 547                 bytesread += chunksize;
 548                 if (bzerror == BZ_STREAM_END) {
 549                         self->size = self->pos;
 550                         self->mode = MODE_READ_EOF;
 551                         break;
 552                 } else if (bzerror != BZ_OK) {
 553                         Util_CatchBZ2Error(bzerror);
 554                         Py_DECREF(ret);
 555                         ret = NULL;
 556                         goto cleanup;
 557                 }
 558                 if (bytesrequested < 0) {
 559                         buffersize = Util_NewBufferSize(buffersize);
 560                         if (_PyString_Resize(&ret, buffersize) < 0)
 561                                 goto cleanup;
 562                 } else {
 563                         break;
 564                 }
 565         }
 566         if (bytesread != buffersize)
 567                 _PyString_Resize(&ret, bytesread);
 568
 569 cleanup:
 570         RELEASE_LOCK(self);
 571         return ret;
 572 }
 573
 574 PyDoc_STRVAR(BZ2File_readline__doc__,
 575 "readline([size]) -> string\n\
 576 \n\
 577 Return the next line from the file, as a string, retaining newline.\n\
 578 A non-negative size argument will limit the maximum number of bytes to\n\
 579 return (an incomplete line may be returned then). Return an empty\n\
 580 string at EOF.\n\
 581 ");
 582
 583 static PyObject *
 584 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 585 {
 586         PyObject *ret = NULL;
 587         int sizehint = -1;
 588
 589         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 590                 return NULL;
 591
 592         ACQUIRE_LOCK(self);
 593         switch (self->mode) {
 594                 case MODE_READ:
 595                         break;
 596                 case MODE_READ_EOF:
 597                         ret = PyString_FromString("");
 598                         goto cleanup;
 599                 case MODE_CLOSED:
 600                         PyErr_SetString(PyExc_ValueError,
 601                                         "I/O operation on closed file");
 602                         goto cleanup;
 603                 default:
 604                         PyErr_SetString(PyExc_IOError,
 605                                         "file is not ready for reading");
 606                         goto cleanup;
 607         }
 608
 609         if (sizehint == 0)
 610                 ret = PyString_FromString("");
 611         else
 612                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 613
 614 cleanup:
 615         RELEASE_LOCK(self);
 616         return ret;
 617 }
 618
 619 PyDoc_STRVAR(BZ2File_readlines__doc__,
 620 "readlines([size]) -> list\n\
 621 \n\
 622 Call readline() repeatedly and return a list of lines read.\n\
 623 The optional size argument, if given, is an approximate bound on the\n\
 624 total number of bytes in the lines returned.\n\
 625 ");
 626
 627 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 628 static PyObject *
 629 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 630 {
 631         long sizehint = 0;
 632         PyObject *list = NULL;
 633         PyObject *line;
 634         char small_buffer[SMALLCHUNK];
 635         char *buffer = small_buffer;
 636         size_t buffersize = SMALLCHUNK;
 637         PyObject *big_buffer = NULL;
 638         size_t nfilled = 0;
 639         size_t nread;
 640         size_t totalread = 0;
 641         char *p, *q, *end;
 642         int err;
 643         int shortread = 0;
 644         int bzerror;
 645
 646         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 647                 return NULL;
 648
 649         ACQUIRE_LOCK(self);
 650         switch (self->mode) {
 651                 case MODE_READ:
 652                         break;
 653                 case MODE_READ_EOF:
 654                         list = PyList_New(0);
 655                         goto cleanup;
 656                 case MODE_CLOSED:
 657                         PyErr_SetString(PyExc_ValueError,
 658                                         "I/O operation on closed file");
 659                         goto cleanup;
 660                 default:
 661                         PyErr_SetString(PyExc_IOError,
 662                                         "file is not ready for reading");
 663                         goto cleanup;
 664         }
 665
 666         if ((list = PyList_New(0)) == NULL)
 667                 goto cleanup;
 668
 669         for (;;) {
 670                 Py_BEGIN_ALLOW_THREADS
 671                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 672                                              buffer+nfilled,
 673                                              buffersize-nfilled, self);
 674                 self->pos += nread;
 675                 Py_END_ALLOW_THREADS
 676                 if (bzerror == BZ_STREAM_END) {
 677                         self->size = self->pos;
 678                         self->mode = MODE_READ_EOF;
 679                         if (nread == 0) {
 680                                 sizehint = 0;
 681                                 break;
 682                         }
 683                         shortread = 1;
 684                 } else if (bzerror != BZ_OK) {
 685                         Util_CatchBZ2Error(bzerror);
 686                   error:
 687                         Py_DECREF(list);
 688                         list = NULL;
 689                         goto cleanup;
 690                 }
 691                 totalread += nread;
 692                 p = memchr(buffer+nfilled, '\n', nread);
 693                 if (!shortread && p == NULL) {
 694                         /* Need a larger buffer to fit this line */
 695                         nfilled += nread;
 696                         buffersize *= 2;
 697                         if (buffersize > INT_MAX) {
 698                                 PyErr_SetString(PyExc_OverflowError,
 699                                 "line is longer than a Python string can hold");
 700                                 goto error;
 701                         }
 702                         if (big_buffer == NULL) {
 703                                 /* Create the big buffer */
 704                                 big_buffer = PyString_FromStringAndSize(
 705                                         NULL, buffersize);
 706                                 if (big_buffer == NULL)
 707                                         goto error;
 708                                 buffer = PyString_AS_STRING(big_buffer);
 709                                 memcpy(buffer, small_buffer, nfilled);
 710                         }
 711                         else {
 712                                 /* Grow the big buffer */
 713                                 _PyString_Resize(&big_buffer, buffersize);
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                         }
 716                         continue;
 717                 }
 718                 end = buffer+nfilled+nread;
 719                 q = buffer;
 720                 while (p != NULL) {
 721                         /* Process complete lines */
 722                         p++;
 723                         line = PyString_FromStringAndSize(q, p-q);
 724                         if (line == NULL)
 725                                 goto error;
 726                         err = PyList_Append(list, line);
 727                         Py_DECREF(line);
 728                         if (err != 0)
 729                                 goto error;
 730                         q = p;
 731                         p = memchr(q, '\n', end-q);
 732                 }
 733                 /* Move the remaining incomplete line to the start */
 734                 nfilled = end-q;
 735                 memmove(buffer, q, nfilled);
 736                 if (sizehint > 0)
 737                         if (totalread >= (size_t)sizehint)
 738                                 break;
 739                 if (shortread) {
 740                         sizehint = 0;
 741                         break;
 742                 }
 743         }
 744         if (nfilled != 0) {
 745                 /* Partial last line */
 746                 line = PyString_FromStringAndSize(buffer, nfilled);
 747                 if (line == NULL)
 748                         goto error;
 749                 if (sizehint > 0) {
 750                         /* Need to complete the last line */
 751                         PyObject *rest = Util_GetLine(self, 0);
 752                         if (rest == NULL) {
 753                                 Py_DECREF(line);
 754                                 goto error;
 755                         }
 756                         PyString_Concat(&line, rest);
 757                         Py_DECREF(rest);
 758                         if (line == NULL)
 759                                 goto error;
 760                 }
 761                 err = PyList_Append(list, line);
 762                 Py_DECREF(line);
 763                 if (err != 0)
 764                         goto error;
 765         }
 766
 767   cleanup:
 768         RELEASE_LOCK(self);
 769         if (big_buffer) {
 770                 Py_DECREF(big_buffer);
 771         }
 772         return list;
 773 }
 774
 775 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 776 "xreadlines() -> self\n\
 777 \n\
 778 For backward compatibility. BZ2File objects now include the performance\n\
 779 optimizations previously implemented in the xreadlines module.\n\
 780 ");
 781
 782 PyDoc_STRVAR(BZ2File_write__doc__,
 783 "write(data) -> None\n\
 784 \n\
 785 Write the 'data' string to file. Note that due to buffering, close() may\n\
 786 be needed before the file on disk reflects the data written.\n\
 787 ");
 788
 789 /* This is a hacked version of Python's fileobject.c:file_write(). */
 790 static PyObject *
 791 BZ2File_write(BZ2FileObject *self, PyObject *args)
 792 {
 793         PyObject *ret = NULL;
 794         char *buf;
 795         int len;
 796         int bzerror;
 797
 798         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 799                 return NULL;
 800
 801         ACQUIRE_LOCK(self);
 802         switch (self->mode) {
 803                 case MODE_WRITE:
 804                         break;
 805
 806                 case MODE_CLOSED:
 807                         PyErr_SetString(PyExc_ValueError,
 808                                         "I/O operation on closed file");
 809                         goto cleanup;
 810
 811                 default:
 812                         PyErr_SetString(PyExc_IOError,
 813                                         "file is not ready for writing");
 814                         goto cleanup;
 815         }
 816
 817         self->f_softspace = 0;
 818
 819         Py_BEGIN_ALLOW_THREADS
 820         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 821         self->pos += len;
 822         Py_END_ALLOW_THREADS
 823
 824         if (bzerror != BZ_OK) {
 825                 Util_CatchBZ2Error(bzerror);
 826                 goto cleanup;
 827         }
 828
 829         Py_INCREF(Py_None);
 830         ret = Py_None;
 831
 832 cleanup:
 833         RELEASE_LOCK(self);
 834         return ret;
 835 }
 836
 837 PyDoc_STRVAR(BZ2File_writelines__doc__,
 838 "writelines(sequence_of_strings) -> None\n\
 839 \n\
 840 Write the sequence of strings to the file. Note that newlines are not\n\
 841 added. The sequence can be any iterable object producing strings. This is\n\
 842 equivalent to calling write() for each string.\n\
 843 ");
 844
 845 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 846 static PyObject *
 847 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 848 {
 849 #define CHUNKSIZE 1000
 850         PyObject *list = NULL;
 851         PyObject *iter = NULL;
 852         PyObject *ret = NULL;
 853         PyObject *line;
 854         int i, j, index, len, islist;
 855         int bzerror;
 856
 857         ACQUIRE_LOCK(self);
 858         switch (self->mode) {
 859                 case MODE_WRITE:
 860                         break;
 861
 862                 case MODE_CLOSED:
 863                         PyErr_SetString(PyExc_ValueError,
 864                                         "I/O operation on closed file");
 865                         goto error;
 866
 867                 default:
 868                         PyErr_SetString(PyExc_IOError,
 869                                         "file is not ready for writing");
 870                         goto error;
 871         }
 872
 873         islist = PyList_Check(seq);
 874         if  (!islist) {
 875                 iter = PyObject_GetIter(seq);
 876                 if (iter == NULL) {
 877                         PyErr_SetString(PyExc_TypeError,
 878                                 "writelines() requires an iterable argument");
 879                         goto error;
 880                 }
 881                 list = PyList_New(CHUNKSIZE);
 882                 if (list == NULL)
 883                         goto error;
 884         }
 885
 886         /* Strategy: slurp CHUNKSIZE lines into a private list,
 887            checking that they are all strings, then write that list
 888            without holding the interpreter lock, then come back for more. */
 889         for (index = 0; ; index += CHUNKSIZE) {
 890                 if (islist) {
 891                         Py_XDECREF(list);
 892                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 893                         if (list == NULL)
 894                                 goto error;
 895                         j = PyList_GET_SIZE(list);
 896                 }
 897                 else {
 898                         for (j = 0; j < CHUNKSIZE; j++) {
 899                                 line = PyIter_Next(iter);
 900                                 if (line == NULL) {
 901                                         if (PyErr_Occurred())
 902                                                 goto error;
 903                                         break;
 904                                 }
 905                                 PyList_SetItem(list, j, line);
 906                         }
 907                 }
 908                 if (j == 0)
 909                         break;
 910
 911                 /* Check that all entries are indeed strings. If not,
 912                    apply the same rules as for file.write() and
 913                    convert the rets to strings. This is slow, but
 914                    seems to be the only way since all conversion APIs
 915                    could potentially execute Python code. */
 916                 for (i = 0; i < j; i++) {
 917                         PyObject *v = PyList_GET_ITEM(list, i);
 918                         if (!PyString_Check(v)) {
 919                                 const char *buffer;
 920                                 Py_ssize_t len;
 921                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 922                                         PyErr_SetString(PyExc_TypeError,
 923                                                         "writelines() "
 924                                                         "argument must be "
 925                                                         "a sequence of "
 926                                                         "strings");
 927                                         goto error;
 928                                 }
 929                                 line = PyString_FromStringAndSize(buffer,
 930                                                                   len);
 931                                 if (line == NULL)
 932                                         goto error;
 933                                 Py_DECREF(v);
 934                                 PyList_SET_ITEM(list, i, line);
 935                         }
 936                 }
 937
 938                 self->f_softspace = 0;
 939
 940                 /* Since we are releasing the global lock, the
 941                    following code may *not* execute Python code. */
 942                 Py_BEGIN_ALLOW_THREADS
 943                 for (i = 0; i < j; i++) {
 944                         line = PyList_GET_ITEM(list, i);
 945                         len = PyString_GET_SIZE(line);
 946                         BZ2_bzWrite (&bzerror, self->fp,
 947                                      PyString_AS_STRING(line), len);
 948                         if (bzerror != BZ_OK) {
 949                                 Py_BLOCK_THREADS
 950                                 Util_CatchBZ2Error(bzerror);
 951                                 goto error;
 952                         }
 953                 }
 954                 Py_END_ALLOW_THREADS
 955
 956                 if (j < CHUNKSIZE)
 957                         break;
 958         }
 959
 960         Py_INCREF(Py_None);
 961         ret = Py_None;
 962
 963   error:
 964         RELEASE_LOCK(self);
 965         Py_XDECREF(list);
 966         Py_XDECREF(iter);
 967         return ret;
 968 #undef CHUNKSIZE
 969 }
 970
 971 PyDoc_STRVAR(BZ2File_seek__doc__,
 972 "seek(offset [, whence]) -> None\n\
 973 \n\
 974 Move to new file position. Argument offset is a byte count. Optional\n\
 975 argument whence defaults to 0 (offset from start of file, offset\n\
 976 should be >= 0); other values are 1 (move relative to current position,\n\
 977 positive or negative), and 2 (move relative to end of file, usually\n\
 978 negative, although many platforms allow seeking beyond the end of a file).\n\
 979 \n\
 980 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 981 the operation may be extremely slow.\n\
 982 ");
 983
 984 static PyObject *
 985 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 986 {
 987         int where = 0;
 988         PyObject *offobj;
 989         Py_off_t offset;
 990         char small_buffer[SMALLCHUNK];
 991         char *buffer = small_buffer;
 992         size_t buffersize = SMALLCHUNK;
 993         Py_off_t bytesread = 0;
 994         size_t readsize;
 995         int chunksize;
 996         int bzerror;
 997         PyObject *ret = NULL;
 998
 999         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1000                 return NULL;
1001 #if !defined(HAVE_LARGEFILE_SUPPORT)
1002         offset = PyInt_AsLong(offobj);
1003 #else
1004         offset = PyLong_Check(offobj) ?
1005                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1006 #endif
1007         if (PyErr_Occurred())
1008                 return NULL;
1009
1010         ACQUIRE_LOCK(self);
1011         Util_DropReadAhead(self);
1012         switch (self->mode) {
1013                 case MODE_READ:
1014                 case MODE_READ_EOF:
1015                         break;
1016
1017                 case MODE_CLOSED:
1018                         PyErr_SetString(PyExc_ValueError,
1019                                         "I/O operation on closed file");
1020                         goto cleanup;
1021
1022                 default:
1023                         PyErr_SetString(PyExc_IOError,
1024                                         "seek works only while reading");
1025                         goto cleanup;
1026         }
1027
1028         if (where == 2) {
1029                 if (self->size == -1) {
1030                         assert(self->mode != MODE_READ_EOF);
1031                         for (;;) {
1032                                 Py_BEGIN_ALLOW_THREADS
1033                                 chunksize = Util_UnivNewlineRead(
1034                                                 &bzerror, self->fp,
1035                                                 buffer, buffersize,
1036                                                 self);
1037                                 self->pos += chunksize;
1038                                 Py_END_ALLOW_THREADS
1039
1040                                 bytesread += chunksize;
1041                                 if (bzerror == BZ_STREAM_END) {
1042                                         break;
1043                                 } else if (bzerror != BZ_OK) {
1044                                         Util_CatchBZ2Error(bzerror);
1045                                         goto cleanup;
1046                                 }
1047                         }
1048                         self->mode = MODE_READ_EOF;
1049                         self->size = self->pos;
1050                         bytesread = 0;
1051                 }
1052                 offset = self->size + offset;
1053         } else if (where == 1) {
1054                 offset = self->pos + offset;
1055         }
1056
1057         /* Before getting here, offset must be the absolute position the file
1058          * pointer should be set to. */
1059
1060         if (offset >= self->pos) {
1061                 /* we can move forward */
1062                 offset -= self->pos;
1063         } else {
1064                 /* we cannot move back, so rewind the stream */
1065                 BZ2_bzReadClose(&bzerror, self->fp);
1066                 if (self->fp) {
1067                         PyFile_DecUseCount((PyFileObject *)self->file);
1068                         self->fp = NULL;
1069                 }
1070                 if (bzerror != BZ_OK) {
1071                         Util_CatchBZ2Error(bzerror);
1072                         goto cleanup;
1073                 }
1074                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1075                 if (!ret)
1076                         goto cleanup;
1077                 Py_DECREF(ret);
1078                 ret = NULL;
1079                 self->pos = 0;
1080                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1081                                           0, 0, NULL, 0);
1082                 if (self->fp)
1083                         PyFile_IncUseCount((PyFileObject *)self->file);
1084                 if (bzerror != BZ_OK) {
1085                         Util_CatchBZ2Error(bzerror);
1086                         goto cleanup;
1087                 }
1088                 self->mode = MODE_READ;
1089         }
1090
1091         if (offset <= 0 || self->mode == MODE_READ_EOF)
1092                 goto exit;
1093
1094         /* Before getting here, offset must be set to the number of bytes
1095          * to walk forward. */
1096         for (;;) {
1097                 if (offset-bytesread > buffersize)
1098                         readsize = buffersize;
1099                 else
1100                         /* offset might be wider that readsize, but the result
1101                          * of the subtraction is bound by buffersize (see the
1102                          * condition above). buffersize is 8192. */
1103                         readsize = (size_t)(offset-bytesread);
1104                 Py_BEGIN_ALLOW_THREADS
1105                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1106                                                  buffer, readsize, self);
1107                 self->pos += chunksize;
1108                 Py_END_ALLOW_THREADS
1109                 bytesread += chunksize;
1110                 if (bzerror == BZ_STREAM_END) {
1111                         self->size = self->pos;
1112                         self->mode = MODE_READ_EOF;
1113                         break;
1114                 } else if (bzerror != BZ_OK) {
1115                         Util_CatchBZ2Error(bzerror);
1116                         goto cleanup;
1117                 }
1118                 if (bytesread == offset)
1119                         break;
1120         }
1121
1122 exit:
1123         Py_INCREF(Py_None);
1124         ret = Py_None;
1125
1126 cleanup:
1127         RELEASE_LOCK(self);
1128         return ret;
1129 }
1130
1131 PyDoc_STRVAR(BZ2File_tell__doc__,
1132 "tell() -> int\n\
1133 \n\
1134 Return the current file position, an integer (may be a long integer).\n\
1135 ");
1136
1137 static PyObject *
1138 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1139 {
1140         PyObject *ret = NULL;
1141
1142         if (self->mode == MODE_CLOSED) {
1143                 PyErr_SetString(PyExc_ValueError,
1144                                 "I/O operation on closed file");
1145                 goto cleanup;
1146         }
1147
1148 #if !defined(HAVE_LARGEFILE_SUPPORT)
1149         ret = PyInt_FromLong(self->pos);
1150 #else
1151         ret = PyLong_FromLongLong(self->pos);
1152 #endif
1153
1154 cleanup:
1155         return ret;
1156 }
1157
1158 PyDoc_STRVAR(BZ2File_close__doc__,
1159 "close() -> None or (perhaps) an integer\n\
1160 \n\
1161 Close the file. Sets data attribute .closed to true. A closed file\n\
1162 cannot be used for further I/O operations. close() may be called more\n\
1163 than once without error.\n\
1164 ");
1165
1166 static PyObject *
1167 BZ2File_close(BZ2FileObject *self)
1168 {
1169         PyObject *ret = NULL;
1170         int bzerror = BZ_OK;
1171
1172         ACQUIRE_LOCK(self);
1173         switch (self->mode) {
1174                 case MODE_READ:
1175                 case MODE_READ_EOF:
1176                         BZ2_bzReadClose(&bzerror, self->fp);
1177                         break;
1178                 case MODE_WRITE:
1179                         BZ2_bzWriteClose(&bzerror, self->fp,
1180                                          0, NULL, NULL);
1181                         break;
1182         }
1183         if (self->fp) {
1184                 PyFile_DecUseCount((PyFileObject *)self->file);
1185                 self->fp = NULL;
1186         }
1187         self->mode = MODE_CLOSED;
1188         ret = PyObject_CallMethod(self->file, "close", NULL);
1189         if (bzerror != BZ_OK) {
1190                 Util_CatchBZ2Error(bzerror);
1191                 Py_XDECREF(ret);
1192                 ret = NULL;
1193         }
1194
1195         RELEASE_LOCK(self);
1196         return ret;
1197 }
1198
1199 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1200
1201 static PyMethodDef BZ2File_methods[] = {
1202         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1203         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1204         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1205         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1206         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1207         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1208         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1209         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1210         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1211         {NULL,          NULL}           /* sentinel */
1212 };
1213
1214
1215 /* ===================================================================== */
1216 /* Getters and setters of BZ2File. */
1217
1218 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1219 static PyObject *
1220 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1221 {
1222         switch (self->f_newlinetypes) {
1223         case NEWLINE_UNKNOWN:
1224                 Py_INCREF(Py_None);
1225                 return Py_None;
1226         case NEWLINE_CR:
1227                 return PyString_FromString("\r");
1228         case NEWLINE_LF:
1229                 return PyString_FromString("\n");
1230         case NEWLINE_CR|NEWLINE_LF:
1231                 return Py_BuildValue("(ss)", "\r", "\n");
1232         case NEWLINE_CRLF:
1233                 return PyString_FromString("\r\n");
1234         case NEWLINE_CR|NEWLINE_CRLF:
1235                 return Py_BuildValue("(ss)", "\r", "\r\n");
1236         case NEWLINE_LF|NEWLINE_CRLF:
1237                 return Py_BuildValue("(ss)", "\n", "\r\n");
1238         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1239                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1240         default:
1241                 PyErr_Format(PyExc_SystemError,
1242                              "Unknown newlines value 0x%x\n",
1243                              self->f_newlinetypes);
1244                 return NULL;
1245         }
1246 }
1247
1248 static PyObject *
1249 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1250 {
1251         return PyInt_FromLong(self->mode == MODE_CLOSED);
1252 }
1253
1254 static PyObject *
1255 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1256 {
1257         return PyObject_GetAttrString(self->file, "mode");
1258 }
1259
1260 static PyObject *
1261 BZ2File_get_name(BZ2FileObject *self, void *closure)
1262 {
1263         return PyObject_GetAttrString(self->file, "name");
1264 }
1265
1266 static PyGetSetDef BZ2File_getset[] = {
1267         {"closed", (getter)BZ2File_get_closed, NULL,
1268                         "True if the file is closed"},
1269         {"newlines", (getter)BZ2File_get_newlines, NULL,
1270                         "end-of-line convention used in this file"},
1271         {"mode", (getter)BZ2File_get_mode, NULL,
1272                         "file mode ('r', 'w', or 'U')"},
1273         {"name", (getter)BZ2File_get_name, NULL,
1274                         "file name"},
1275         {NULL}  /* Sentinel */
1276 };
1277
1278
1279 /* ===================================================================== */
1280 /* Members of BZ2File_Type. */
1281
1282 #undef OFF
1283 #define OFF(x) offsetof(BZ2FileObject, x)
1284
1285 static PyMemberDef BZ2File_members[] = {
1286         {"softspace",   T_INT,          OFF(f_softspace), 0,
1287          "flag indicating that a space needs to be printed; used by print"},
1288         {NULL}  /* Sentinel */
1289 };
1290
1291 /* ===================================================================== */
1292 /* Slot definitions for BZ2File_Type. */
1293
1294 static int
1295 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1296 {
1297         static char *kwlist[] = {"filename", "mode", "buffering",
1298                                        "compresslevel", 0};
1299         PyObject *name;
1300         char *mode = "r";
1301         int buffering = -1;
1302         int compresslevel = 9;
1303         int bzerror;
1304         int mode_char = 0;
1305
1306         self->size = -1;
1307
1308         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1309                                          kwlist, &name, &mode, &buffering,
1310                                          &compresslevel))
1311                 return -1;
1312
1313         if (compresslevel < 1 || compresslevel > 9) {
1314                 PyErr_SetString(PyExc_ValueError,
1315                                 "compresslevel must be between 1 and 9");
1316                 return -1;
1317         }
1318
1319         for (;;) {
1320                 int error = 0;
1321                 switch (*mode) {
1322                         case 'r':
1323                         case 'w':
1324                                 if (mode_char)
1325                                         error = 1;
1326                                 mode_char = *mode;
1327                                 break;
1328
1329                         case 'b':
1330                                 break;
1331
1332                         case 'U':
1333 #ifdef __VMS
1334                                 self->f_univ_newline = 0;
1335 #else
1336                                 self->f_univ_newline = 1;
1337 #endif
1338                                 break;
1339
1340                         default:
1341                                 error = 1;
1342                                 break;
1343                 }
1344                 if (error) {
1345                         PyErr_Format(PyExc_ValueError,
1346                                      "invalid mode char %c", *mode);
1347                         return -1;
1348                 }
1349                 mode++;
1350                 if (*mode == '\0')
1351                         break;
1352         }
1353
1354         if (mode_char == 0) {
1355                 mode_char = 'r';
1356         }
1357
1358         mode = (mode_char == 'r') ? "rb" : "wb";
1359
1360         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1361                                            name, mode, buffering);
1362         if (self->file == NULL)
1363                 return -1;
1364
1365         /* From now on, we have stuff to dealloc, so jump to error label
1366          * instead of returning */
1367
1368 #ifdef WITH_THREAD
1369         self->lock = PyThread_allocate_lock();
1370         if (!self->lock) {
1371                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1372                 goto error;
1373         }
1374 #endif
1375
1376         if (mode_char == 'r')
1377                 self->fp = BZ2_bzReadOpen(&bzerror,
1378                                           PyFile_AsFile(self->file),
1379                                           0, 0, NULL, 0);
1380         else
1381                 self->fp = BZ2_bzWriteOpen(&bzerror,
1382                                            PyFile_AsFile(self->file),
1383                                            compresslevel, 0, 0);
1384
1385         if (bzerror != BZ_OK) {
1386                 Util_CatchBZ2Error(bzerror);
1387                 goto error;
1388         }
1389         PyFile_IncUseCount((PyFileObject *)self->file);
1390
1391         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1392
1393         return 0;
1394
1395 error:
1396         Py_CLEAR(self->file);
1397 #ifdef WITH_THREAD
1398         if (self->lock) {
1399                 PyThread_free_lock(self->lock);
1400                 self->lock = NULL;
1401         }
1402 #endif
1403         return -1;
1404 }
1405
1406 static void
1407 BZ2File_dealloc(BZ2FileObject *self)
1408 {
1409         int bzerror;
1410 #ifdef WITH_THREAD
1411         if (self->lock)
1412                 PyThread_free_lock(self->lock);
1413 #endif
1414         switch (self->mode) {
1415                 case MODE_READ:
1416                 case MODE_READ_EOF:
1417                         BZ2_bzReadClose(&bzerror, self->fp);
1418                         break;
1419                 case MODE_WRITE:
1420                         BZ2_bzWriteClose(&bzerror, self->fp,
1421                                          0, NULL, NULL);
1422                         break;
1423         }
1424         if (self->fp) {
1425                 PyFile_DecUseCount((PyFileObject *)self->file);
1426                 self->fp = NULL;
1427         }
1428         Util_DropReadAhead(self);
1429         Py_XDECREF(self->file);
1430         Py_TYPE(self)->tp_free((PyObject *)self);
1431 }
1432
1433 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1434 static PyObject *
1435 BZ2File_getiter(BZ2FileObject *self)
1436 {
1437         if (self->mode == MODE_CLOSED) {
1438                 PyErr_SetString(PyExc_ValueError,
1439                                 "I/O operation on closed file");
1440                 return NULL;
1441         }
1442         Py_INCREF((PyObject*)self);
1443         return (PyObject *)self;
1444 }
1445
1446 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1447 #define READAHEAD_BUFSIZE 8192
1448 static PyObject *
1449 BZ2File_iternext(BZ2FileObject *self)
1450 {
1451         PyStringObject* ret;
1452         ACQUIRE_LOCK(self);
1453         if (self->mode == MODE_CLOSED) {
1454                 RELEASE_LOCK(self);
1455                 PyErr_SetString(PyExc_ValueError,
1456                                 "I/O operation on closed file");
1457                 return NULL;
1458         }
1459         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1460         RELEASE_LOCK(self);
1461         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1462                 Py_XDECREF(ret);
1463                 return NULL;
1464         }
1465         return (PyObject *)ret;
1466 }
1467
1468 /* ===================================================================== */
1469 /* BZ2File_Type definition. */
1470
1471 PyDoc_VAR(BZ2File__doc__) =
1472 PyDoc_STR(
1473 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1474 \n\
1475 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1476 writing. When opened for writing, the file will be created if it doesn't\n\
1477 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1478 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1479 is given, must be a number between 1 and 9.\n\
1480 ")
1481 PyDoc_STR(
1482 "\n\
1483 Add a 'U' to mode to open the file for input with universal newline\n\
1484 support. Any line ending in the input file will be seen as a '\\n' in\n\
1485 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1486 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1487 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1488 newlines are available only when reading.\n\
1489 ")
1490 ;
1491
1492 static PyTypeObject BZ2File_Type = {
1493         PyVarObject_HEAD_INIT(NULL, 0)
1494         "bz2.BZ2File",          /*tp_name*/
1495         sizeof(BZ2FileObject),  /*tp_basicsize*/
1496         0,                      /*tp_itemsize*/
1497         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1498         0,                      /*tp_print*/
1499         0,                      /*tp_getattr*/
1500         0,                      /*tp_setattr*/
1501         0,                      /*tp_compare*/
1502         0,                      /*tp_repr*/
1503         0,                      /*tp_as_number*/
1504         0,                      /*tp_as_sequence*/
1505         0,                      /*tp_as_mapping*/
1506         0,                      /*tp_hash*/
1507         0,                      /*tp_call*/
1508         0,                      /*tp_str*/
1509         PyObject_GenericGetAttr,/*tp_getattro*/
1510         PyObject_GenericSetAttr,/*tp_setattro*/
1511         0,                      /*tp_as_buffer*/
1512         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1513         BZ2File__doc__,         /*tp_doc*/
1514         0,                      /*tp_traverse*/
1515         0,                      /*tp_clear*/
1516         0,                      /*tp_richcompare*/
1517         0,                      /*tp_weaklistoffset*/
1518         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1519         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1520         BZ2File_methods,        /*tp_methods*/
1521         BZ2File_members,        /*tp_members*/
1522         BZ2File_getset,         /*tp_getset*/
1523         0,                      /*tp_base*/
1524         0,                      /*tp_dict*/
1525         0,                      /*tp_descr_get*/
1526         0,                      /*tp_descr_set*/
1527         0,                      /*tp_dictoffset*/
1528         (initproc)BZ2File_init, /*tp_init*/
1529         PyType_GenericAlloc,    /*tp_alloc*/
1530         PyType_GenericNew,      /*tp_new*/
1531         _PyObject_Del,          /*tp_free*/
1532         0,                      /*tp_is_gc*/
1533 };
1534
1535
1536 /* ===================================================================== */
1537 /* Methods of BZ2Comp. */
1538
1539 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1540 "compress(data) -> string\n\
1541 \n\
1542 Provide more data to the compressor object. It will return chunks of\n\
1543 compressed data whenever possible. When you've finished providing data\n\
1544 to compress, call the flush() method to finish the compression process,\n\
1545 and return what is left in the internal buffers.\n\
1546 ");
1547
1548 static PyObject *
1549 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1550 {
1551         char *data;
1552         int datasize;
1553         int bufsize = SMALLCHUNK;
1554         PY_LONG_LONG totalout;
1555         PyObject *ret = NULL;
1556         bz_stream *bzs = &self->bzs;
1557         int bzerror;
1558
1559         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1560                 return NULL;
1561
1562         if (datasize == 0)
1563                 return PyString_FromString("");
1564
1565         ACQUIRE_LOCK(self);
1566         if (!self->running) {
1567                 PyErr_SetString(PyExc_ValueError,
1568                                 "this object was already flushed");
1569                 goto error;
1570         }
1571
1572         ret = PyString_FromStringAndSize(NULL, bufsize);
1573         if (!ret)
1574                 goto error;
1575
1576         bzs->next_in = data;
1577         bzs->avail_in = datasize;
1578         bzs->next_out = BUF(ret);
1579         bzs->avail_out = bufsize;
1580
1581         totalout = BZS_TOTAL_OUT(bzs);
1582
1583         for (;;) {
1584                 Py_BEGIN_ALLOW_THREADS
1585                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1586                 Py_END_ALLOW_THREADS
1587                 if (bzerror != BZ_RUN_OK) {
1588                         Util_CatchBZ2Error(bzerror);
1589                         goto error;
1590                 }
1591                 if (bzs->avail_in == 0)
1592                         break; /* no more input data */
1593                 if (bzs->avail_out == 0) {
1594                         bufsize = Util_NewBufferSize(bufsize);
1595                         if (_PyString_Resize(&ret, bufsize) < 0) {
1596                                 BZ2_bzCompressEnd(bzs);
1597                                 goto error;
1598                         }
1599                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1600                                                     - totalout);
1601                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1602                 }
1603         }
1604
1605         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1606
1607         RELEASE_LOCK(self);
1608         return ret;
1609
1610 error:
1611         RELEASE_LOCK(self);
1612         Py_XDECREF(ret);
1613         return NULL;
1614 }
1615
1616 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1617 "flush() -> string\n\
1618 \n\
1619 Finish the compression process and return what is left in internal buffers.\n\
1620 You must not use the compressor object after calling this method.\n\
1621 ");
1622
1623 static PyObject *
1624 BZ2Comp_flush(BZ2CompObject *self)
1625 {
1626         int bufsize = SMALLCHUNK;
1627         PyObject *ret = NULL;
1628         bz_stream *bzs = &self->bzs;
1629         PY_LONG_LONG totalout;
1630         int bzerror;
1631
1632         ACQUIRE_LOCK(self);
1633         if (!self->running) {
1634                 PyErr_SetString(PyExc_ValueError, "object was already "
1635                                                   "flushed");
1636                 goto error;
1637         }
1638         self->running = 0;
1639
1640         ret = PyString_FromStringAndSize(NULL, bufsize);
1641         if (!ret)
1642                 goto error;
1643
1644         bzs->next_out = BUF(ret);
1645         bzs->avail_out = bufsize;
1646
1647         totalout = BZS_TOTAL_OUT(bzs);
1648
1649         for (;;) {
1650                 Py_BEGIN_ALLOW_THREADS
1651                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1652                 Py_END_ALLOW_THREADS
1653                 if (bzerror == BZ_STREAM_END) {
1654                         break;
1655                 } else if (bzerror != BZ_FINISH_OK) {
1656                         Util_CatchBZ2Error(bzerror);
1657                         goto error;
1658                 }
1659                 if (bzs->avail_out == 0) {
1660                         bufsize = Util_NewBufferSize(bufsize);
1661                         if (_PyString_Resize(&ret, bufsize) < 0)
1662                                 goto error;
1663                         bzs->next_out = BUF(ret);
1664                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1665                                                     - totalout);
1666                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1667                 }
1668         }
1669
1670         if (bzs->avail_out != 0)
1671                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1672
1673         RELEASE_LOCK(self);
1674         return ret;
1675
1676 error:
1677         RELEASE_LOCK(self);
1678         Py_XDECREF(ret);
1679         return NULL;
1680 }
1681
1682 static PyMethodDef BZ2Comp_methods[] = {
1683         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1684          BZ2Comp_compress__doc__},
1685         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1686          BZ2Comp_flush__doc__},
1687         {NULL,          NULL}           /* sentinel */
1688 };
1689
1690
1691 /* ===================================================================== */
1692 /* Slot definitions for BZ2Comp_Type. */
1693
1694 static int
1695 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1696 {
1697         int compresslevel = 9;
1698         int bzerror;
1699         static char *kwlist[] = {"compresslevel", 0};
1700
1701         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1702                                          kwlist, &compresslevel))
1703                 return -1;
1704
1705         if (compresslevel < 1 || compresslevel > 9) {
1706                 PyErr_SetString(PyExc_ValueError,
1707                                 "compresslevel must be between 1 and 9");
1708                 goto error;
1709         }
1710
1711 #ifdef WITH_THREAD
1712         self->lock = PyThread_allocate_lock();
1713         if (!self->lock) {
1714                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1715                 goto error;
1716         }
1717 #endif
1718
1719         memset(&self->bzs, 0, sizeof(bz_stream));
1720         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1721         if (bzerror != BZ_OK) {
1722                 Util_CatchBZ2Error(bzerror);
1723                 goto error;
1724         }
1725
1726         self->running = 1;
1727
1728         return 0;
1729 error:
1730 #ifdef WITH_THREAD
1731         if (self->lock) {
1732                 PyThread_free_lock(self->lock);
1733                 self->lock = NULL;
1734         }
1735 #endif
1736         return -1;
1737 }
1738
1739 static void
1740 BZ2Comp_dealloc(BZ2CompObject *self)
1741 {
1742 #ifdef WITH_THREAD
1743         if (self->lock)
1744                 PyThread_free_lock(self->lock);
1745 #endif
1746         BZ2_bzCompressEnd(&self->bzs);
1747         Py_TYPE(self)->tp_free((PyObject *)self);
1748 }
1749
1750
1751 /* ===================================================================== */
1752 /* BZ2Comp_Type definition. */
1753
1754 PyDoc_STRVAR(BZ2Comp__doc__,
1755 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1756 \n\
1757 Create a new compressor object. This object may be used to compress\n\
1758 data sequentially. If you want to compress data in one shot, use the\n\
1759 compress() function instead. The compresslevel parameter, if given,\n\
1760 must be a number between 1 and 9.\n\
1761 ");
1762
1763 static PyTypeObject BZ2Comp_Type = {
1764         PyVarObject_HEAD_INIT(NULL, 0)
1765         "bz2.BZ2Compressor",    /*tp_name*/
1766         sizeof(BZ2CompObject),  /*tp_basicsize*/
1767         0,                      /*tp_itemsize*/
1768         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1769         0,                      /*tp_print*/
1770         0,                      /*tp_getattr*/
1771         0,                      /*tp_setattr*/
1772         0,                      /*tp_compare*/
1773         0,                      /*tp_repr*/
1774         0,                      /*tp_as_number*/
1775         0,                      /*tp_as_sequence*/
1776         0,                      /*tp_as_mapping*/
1777         0,                      /*tp_hash*/
1778         0,                      /*tp_call*/
1779         0,                      /*tp_str*/
1780         PyObject_GenericGetAttr,/*tp_getattro*/
1781         PyObject_GenericSetAttr,/*tp_setattro*/
1782         0,                      /*tp_as_buffer*/
1783         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1784         BZ2Comp__doc__,         /*tp_doc*/
1785         0,                      /*tp_traverse*/
1786         0,                      /*tp_clear*/
1787         0,                      /*tp_richcompare*/
1788         0,                      /*tp_weaklistoffset*/
1789         0,                      /*tp_iter*/
1790         0,                      /*tp_iternext*/
1791         BZ2Comp_methods,        /*tp_methods*/
1792         0,                      /*tp_members*/
1793         0,                      /*tp_getset*/
1794         0,                      /*tp_base*/
1795         0,                      /*tp_dict*/
1796         0,                      /*tp_descr_get*/
1797         0,                      /*tp_descr_set*/
1798         0,                      /*tp_dictoffset*/
1799         (initproc)BZ2Comp_init, /*tp_init*/
1800         PyType_GenericAlloc,    /*tp_alloc*/
1801         PyType_GenericNew,      /*tp_new*/
1802         _PyObject_Del,          /*tp_free*/
1803         0,                      /*tp_is_gc*/
1804 };
1805
1806
1807 /* ===================================================================== */
1808 /* Members of BZ2Decomp. */
1809
1810 #undef OFF
1811 #define OFF(x) offsetof(BZ2DecompObject, x)
1812
1813 static PyMemberDef BZ2Decomp_members[] = {
1814         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1815         {NULL}  /* Sentinel */
1816 };
1817
1818
1819 /* ===================================================================== */
1820 /* Methods of BZ2Decomp. */
1821
1822 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1823 "decompress(data) -> string\n\
1824 \n\
1825 Provide more data to the decompressor object. It will return chunks\n\
1826 of decompressed data whenever possible. If you try to decompress data\n\
1827 after the end of stream is found, EOFError will be raised. If any data\n\
1828 was found after the end of stream, it'll be ignored and saved in\n\
1829 unused_data attribute.\n\
1830 ");
1831
1832 static PyObject *
1833 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1834 {
1835         char *data;
1836         int datasize;
1837         int bufsize = SMALLCHUNK;
1838         PY_LONG_LONG totalout;
1839         PyObject *ret = NULL;
1840         bz_stream *bzs = &self->bzs;
1841         int bzerror;
1842
1843         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1844                 return NULL;
1845
1846         ACQUIRE_LOCK(self);
1847         if (!self->running) {
1848                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1849                                                 "already found");
1850                 goto error;
1851         }
1852
1853         ret = PyString_FromStringAndSize(NULL, bufsize);
1854         if (!ret)
1855                 goto error;
1856
1857         bzs->next_in = data;
1858         bzs->avail_in = datasize;
1859         bzs->next_out = BUF(ret);
1860         bzs->avail_out = bufsize;
1861
1862         totalout = BZS_TOTAL_OUT(bzs);
1863
1864         for (;;) {
1865                 Py_BEGIN_ALLOW_THREADS
1866                 bzerror = BZ2_bzDecompress(bzs);
1867                 Py_END_ALLOW_THREADS
1868                 if (bzerror == BZ_STREAM_END) {
1869                         if (bzs->avail_in != 0) {
1870                                 Py_DECREF(self->unused_data);
1871                                 self->unused_data =
1872                                     PyString_FromStringAndSize(bzs->next_in,
1873                                                                bzs->avail_in);
1874                         }
1875                         self->running = 0;
1876                         break;
1877                 }
1878                 if (bzerror != BZ_OK) {
1879                         Util_CatchBZ2Error(bzerror);
1880                         goto error;
1881                 }
1882                 if (bzs->avail_in == 0)
1883                         break; /* no more input data */
1884                 if (bzs->avail_out == 0) {
1885                         bufsize = Util_NewBufferSize(bufsize);
1886                         if (_PyString_Resize(&ret, bufsize) < 0) {
1887                                 BZ2_bzDecompressEnd(bzs);
1888                                 goto error;
1889                         }
1890                         bzs->next_out = BUF(ret);
1891                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1892                                                     - totalout);
1893                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1894                 }
1895         }
1896
1897         if (bzs->avail_out != 0)
1898                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1899
1900         RELEASE_LOCK(self);
1901         return ret;
1902
1903 error:
1904         RELEASE_LOCK(self);
1905         Py_XDECREF(ret);
1906         return NULL;
1907 }
1908
1909 static PyMethodDef BZ2Decomp_methods[] = {
1910         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1911         {NULL,          NULL}           /* sentinel */
1912 };
1913
1914
1915 /* ===================================================================== */
1916 /* Slot definitions for BZ2Decomp_Type. */
1917
1918 static int
1919 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1920 {
1921         int bzerror;
1922
1923         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1924                 return -1;
1925
1926 #ifdef WITH_THREAD
1927         self->lock = PyThread_allocate_lock();
1928         if (!self->lock) {
1929                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1930                 goto error;
1931         }
1932 #endif
1933
1934         self->unused_data = PyString_FromString("");
1935         if (!self->unused_data)
1936                 goto error;
1937
1938         memset(&self->bzs, 0, sizeof(bz_stream));
1939         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1940         if (bzerror != BZ_OK) {
1941                 Util_CatchBZ2Error(bzerror);
1942                 goto error;
1943         }
1944
1945         self->running = 1;
1946
1947         return 0;
1948
1949 error:
1950 #ifdef WITH_THREAD
1951         if (self->lock) {
1952                 PyThread_free_lock(self->lock);
1953                 self->lock = NULL;
1954         }
1955 #endif
1956         Py_CLEAR(self->unused_data);
1957         return -1;
1958 }
1959
1960 static void
1961 BZ2Decomp_dealloc(BZ2DecompObject *self)
1962 {
1963 #ifdef WITH_THREAD
1964         if (self->lock)
1965                 PyThread_free_lock(self->lock);
1966 #endif
1967         Py_XDECREF(self->unused_data);
1968         BZ2_bzDecompressEnd(&self->bzs);
1969         Py_TYPE(self)->tp_free((PyObject *)self);
1970 }
1971
1972
1973 /* ===================================================================== */
1974 /* BZ2Decomp_Type definition. */
1975
1976 PyDoc_STRVAR(BZ2Decomp__doc__,
1977 "BZ2Decompressor() -> decompressor object\n\
1978 \n\
1979 Create a new decompressor object. This object may be used to decompress\n\
1980 data sequentially. If you want to decompress data in one shot, use the\n\
1981 decompress() function instead.\n\
1982 ");
1983
1984 static PyTypeObject BZ2Decomp_Type = {
1985         PyVarObject_HEAD_INIT(NULL, 0)
1986         "bz2.BZ2Decompressor",  /*tp_name*/
1987         sizeof(BZ2DecompObject), /*tp_basicsize*/
1988         0,                      /*tp_itemsize*/
1989         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1990         0,                      /*tp_print*/
1991         0,                      /*tp_getattr*/
1992         0,                      /*tp_setattr*/
1993         0,                      /*tp_compare*/
1994         0,                      /*tp_repr*/
1995         0,                      /*tp_as_number*/
1996         0,                      /*tp_as_sequence*/
1997         0,                      /*tp_as_mapping*/
1998         0,                      /*tp_hash*/
1999         0,                      /*tp_call*/
2000         0,                      /*tp_str*/
2001         PyObject_GenericGetAttr,/*tp_getattro*/
2002         PyObject_GenericSetAttr,/*tp_setattro*/
2003         0,                      /*tp_as_buffer*/
2004         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2005         BZ2Decomp__doc__,       /*tp_doc*/
2006         0,                      /*tp_traverse*/
2007         0,                      /*tp_clear*/
2008         0,                      /*tp_richcompare*/
2009         0,                      /*tp_weaklistoffset*/
2010         0,                      /*tp_iter*/
2011         0,                      /*tp_iternext*/
2012         BZ2Decomp_methods,      /*tp_methods*/
2013         BZ2Decomp_members,      /*tp_members*/
2014         0,                      /*tp_getset*/
2015         0,                      /*tp_base*/
2016         0,                      /*tp_dict*/
2017         0,                      /*tp_descr_get*/
2018         0,                      /*tp_descr_set*/
2019         0,                      /*tp_dictoffset*/
2020         (initproc)BZ2Decomp_init, /*tp_init*/
2021         PyType_GenericAlloc,    /*tp_alloc*/
2022         PyType_GenericNew,      /*tp_new*/
2023         _PyObject_Del,          /*tp_free*/
2024         0,                      /*tp_is_gc*/
2025 };
2026
2027
2028 /* ===================================================================== */
2029 /* Module functions. */
2030
2031 PyDoc_STRVAR(bz2_compress__doc__,
2032 "compress(data [, compresslevel=9]) -> string\n\
2033 \n\
2034 Compress data in one shot. If you want to compress data sequentially,\n\
2035 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2036 given, must be a number between 1 and 9.\n\
2037 ");
2038
2039 static PyObject *
2040 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2041 {
2042         int compresslevel=9;
2043         char *data;
2044         int datasize;
2045         int bufsize;
2046         PyObject *ret = NULL;
2047         bz_stream _bzs;
2048         bz_stream *bzs = &_bzs;
2049         int bzerror;
2050         static char *kwlist[] = {"data", "compresslevel", 0};
2051
2052         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2053                                          kwlist, &data, &datasize,
2054                                          &compresslevel))
2055                 return NULL;
2056
2057         if (compresslevel < 1 || compresslevel > 9) {
2058                 PyErr_SetString(PyExc_ValueError,
2059                                 "compresslevel must be between 1 and 9");
2060                 return NULL;
2061         }
2062
2063         /* Conforming to bz2 manual, this is large enough to fit compressed
2064          * data in one shot. We will check it later anyway. */
2065         bufsize = datasize + (datasize/100+1) + 600;
2066
2067         ret = PyString_FromStringAndSize(NULL, bufsize);
2068         if (!ret)
2069                 return NULL;
2070
2071         memset(bzs, 0, sizeof(bz_stream));
2072
2073         bzs->next_in = data;
2074         bzs->avail_in = datasize;
2075         bzs->next_out = BUF(ret);
2076         bzs->avail_out = bufsize;
2077
2078         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2079         if (bzerror != BZ_OK) {
2080                 Util_CatchBZ2Error(bzerror);
2081                 Py_DECREF(ret);
2082                 return NULL;
2083         }
2084
2085         for (;;) {
2086                 Py_BEGIN_ALLOW_THREADS
2087                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2088                 Py_END_ALLOW_THREADS
2089                 if (bzerror == BZ_STREAM_END) {
2090                         break;
2091                 } else if (bzerror != BZ_FINISH_OK) {
2092                         BZ2_bzCompressEnd(bzs);
2093                         Util_CatchBZ2Error(bzerror);
2094                         Py_DECREF(ret);
2095                         return NULL;
2096                 }
2097                 if (bzs->avail_out == 0) {
2098                         bufsize = Util_NewBufferSize(bufsize);
2099                         if (_PyString_Resize(&ret, bufsize) < 0) {
2100                                 BZ2_bzCompressEnd(bzs);
2101                                 Py_DECREF(ret);
2102                                 return NULL;
2103                         }
2104                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2105                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2106                 }
2107         }
2108
2109         if (bzs->avail_out != 0)
2110                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2111         BZ2_bzCompressEnd(bzs);
2112
2113         return ret;
2114 }
2115
2116 PyDoc_STRVAR(bz2_decompress__doc__,
2117 "decompress(data) -> decompressed data\n\
2118 \n\
2119 Decompress data in one shot. If you want to decompress data sequentially,\n\
2120 use an instance of BZ2Decompressor instead.\n\
2121 ");
2122
2123 static PyObject *
2124 bz2_decompress(PyObject *self, PyObject *args)
2125 {
2126         char *data;
2127         int datasize;
2128         int bufsize = SMALLCHUNK;
2129         PyObject *ret;
2130         bz_stream _bzs;
2131         bz_stream *bzs = &_bzs;
2132         int bzerror;
2133
2134         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2135                 return NULL;
2136
2137         if (datasize == 0)
2138                 return PyString_FromString("");
2139
2140         ret = PyString_FromStringAndSize(NULL, bufsize);
2141         if (!ret)
2142                 return NULL;
2143
2144         memset(bzs, 0, sizeof(bz_stream));
2145
2146         bzs->next_in = data;
2147         bzs->avail_in = datasize;
2148         bzs->next_out = BUF(ret);
2149         bzs->avail_out = bufsize;
2150
2151         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2152         if (bzerror != BZ_OK) {
2153                 Util_CatchBZ2Error(bzerror);
2154                 Py_DECREF(ret);
2155                 return NULL;
2156         }
2157
2158         for (;;) {
2159                 Py_BEGIN_ALLOW_THREADS
2160                 bzerror = BZ2_bzDecompress(bzs);
2161                 Py_END_ALLOW_THREADS
2162                 if (bzerror == BZ_STREAM_END) {
2163                         break;
2164                 } else if (bzerror != BZ_OK) {
2165                         BZ2_bzDecompressEnd(bzs);
2166                         Util_CatchBZ2Error(bzerror);
2167                         Py_DECREF(ret);
2168                         return NULL;
2169                 }
2170                 if (bzs->avail_in == 0) {
2171                         BZ2_bzDecompressEnd(bzs);
2172                         PyErr_SetString(PyExc_ValueError,
2173                                         "couldn't find end of stream");
2174                         Py_DECREF(ret);
2175                         return NULL;
2176                 }
2177                 if (bzs->avail_out == 0) {
2178                         bufsize = Util_NewBufferSize(bufsize);
2179                         if (_PyString_Resize(&ret, bufsize) < 0) {
2180                                 BZ2_bzDecompressEnd(bzs);
2181                                 Py_DECREF(ret);
2182                                 return NULL;
2183                         }
2184                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2185                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2186                 }
2187         }
2188
2189         if (bzs->avail_out != 0)
2190                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2191         BZ2_bzDecompressEnd(bzs);
2192
2193         return ret;
2194 }
2195
2196 static PyMethodDef bz2_methods[] = {
2197         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2198                 bz2_compress__doc__},
2199         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2200                 bz2_decompress__doc__},
2201         {NULL,          NULL}           /* sentinel */
2202 };
2203
2204 /* ===================================================================== */
2205 /* Initialization function. */
2206
2207 PyDoc_STRVAR(bz2__doc__,
2208 "The python bz2 module provides a comprehensive interface for\n\
2209 the bz2 compression library. It implements a complete file\n\
2210 interface, one shot (de)compression functions, and types for\n\
2211 sequential (de)compression.\n\
2212 ");
2213
2214 PyMODINIT_FUNC
2215 initbz2(void)
2216 {
2217         PyObject *m;
2218
2219         Py_TYPE(&BZ2File_Type) = &PyType_Type;
2220         Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2221         Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2222
2223         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2224         if (m == NULL)
2225                 return;
2226
2227         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2228
2229         Py_INCREF(&BZ2File_Type);
2230         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2231
2232         Py_INCREF(&BZ2Comp_Type);
2233         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2234
2235         Py_INCREF(&BZ2Decomp_Type);
2236         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2237 }