Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int bytes_read;
 239         int newlinetypes = f->f_newlinetypes;
 240         int skipnextlf = f->f_skipnextlf;
 241         int univ_newline = f->f_univ_newline;
 242
 243         total_v_size = n > 0 ? n : 100;
 244         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 245         if (v == NULL)
 246                 return NULL;
 247
 248         buf = BUF(v);
 249         end = buf + total_v_size;
 250
 251         for (;;) {
 252                 Py_BEGIN_ALLOW_THREADS
 253                 while (buf != end) {
 254                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                         f->pos++;
 256                         if (bytes_read == 0) break;
 257                         if (univ_newline) {
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with skipnextlf true means we
 262                                                  * saw a \r before.
 263                                                  */
 264                                                 newlinetypes |= NEWLINE_CRLF;
 265                                                 if (bzerror != BZ_OK) break;
 266                                                 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 267                                                 f->pos++;
 268                                                 if (bytes_read == 0) break;
 269                                         } else {
 270                                                 newlinetypes |= NEWLINE_CR;
 271                                         }
 272                                 }
 273                                 if (c == '\r') {
 274                                         skipnextlf = 1;
 275                                         c = '\n';
 276                                 } else if (c == '\n')
 277                                         newlinetypes |= NEWLINE_LF;
 278                         }
 279                         *buf++ = c;
 280                         if (bzerror != BZ_OK || c == '\n') break;
 281                 }
 282                 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
 283                         newlinetypes |= NEWLINE_CR;
 284                 Py_END_ALLOW_THREADS
 285                 f->f_newlinetypes = newlinetypes;
 286                 f->f_skipnextlf = skipnextlf;
 287                 if (bzerror == BZ_STREAM_END) {
 288                         f->size = f->pos;
 289                         f->mode = MODE_READ_EOF;
 290                         break;
 291                 } else if (bzerror != BZ_OK) {
 292                         Util_CatchBZ2Error(bzerror);
 293                         Py_DECREF(v);
 294                         return NULL;
 295                 }
 296                 if (c == '\n')
 297                         break;
 298                 /* Must be because buf == end */
 299                 if (n > 0)
 300                         break;
 301                 used_v_size = total_v_size;
 302                 increment = total_v_size >> 2; /* mild exponential growth */
 303                 total_v_size += increment;
 304                 if (total_v_size > INT_MAX) {
 305                         PyErr_SetString(PyExc_OverflowError,
 306                             "line is longer than a Python string can hold");
 307                         Py_DECREF(v);
 308                         return NULL;
 309                 }
 310                 if (_PyString_Resize(&v, total_v_size) < 0)
 311                         return NULL;
 312                 buf = BUF(v) + used_v_size;
 313                 end = BUF(v) + total_v_size;
 314         }
 315
 316         used_v_size = buf - BUF(v);
 317         if (used_v_size != total_v_size)
 318                 _PyString_Resize(&v, used_v_size);
 319         return v;
 320 }
 321
 322 /* This is a hacked version of Python's
 323  * fileobject.c:Py_UniversalNewlineFread(). */
 324 size_t
 325 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 326                      char* buf, size_t n, BZ2FileObject *f)
 327 {
 328         char *dst = buf;
 329         int newlinetypes, skipnextlf;
 330
 331         assert(buf != NULL);
 332         assert(stream != NULL);
 333
 334         if (!f->f_univ_newline)
 335                 return BZ2_bzRead(bzerror, stream, buf, n);
 336
 337         newlinetypes = f->f_newlinetypes;
 338         skipnextlf = f->f_skipnextlf;
 339
 340         /* Invariant:  n is the number of bytes remaining to be filled
 341          * in the buffer.
 342          */
 343         while (n) {
 344                 size_t nread;
 345                 int shortread;
 346                 char *src = dst;
 347
 348                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 349                 assert(nread <= n);
 350                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 351                 shortread = n != 0;     /* true iff EOF or error */
 352                 while (nread--) {
 353                         char c = *src++;
 354                         if (c == '\r') {
 355                                 /* Save as LF and set flag to skip next LF. */
 356                                 *dst++ = '\n';
 357                                 skipnextlf = 1;
 358                         }
 359                         else if (skipnextlf && c == '\n') {
 360                                 /* Skip LF, and remember we saw CR LF. */
 361                                 skipnextlf = 0;
 362                                 newlinetypes |= NEWLINE_CRLF;
 363                                 ++n;
 364                         }
 365                         else {
 366                                 /* Normal char to be stored in buffer.  Also
 367                                  * update the newlinetypes flag if either this
 368                                  * is an LF or the previous char was a CR.
 369                                  */
 370                                 if (c == '\n')
 371                                         newlinetypes |= NEWLINE_LF;
 372                                 else if (skipnextlf)
 373                                         newlinetypes |= NEWLINE_CR;
 374                                 *dst++ = c;
 375                                 skipnextlf = 0;
 376                         }
 377                 }
 378                 if (shortread) {
 379                         /* If this is EOF, update type flags. */
 380                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 381                                 newlinetypes |= NEWLINE_CR;
 382                         break;
 383                 }
 384         }
 385         f->f_newlinetypes = newlinetypes;
 386         f->f_skipnextlf = skipnextlf;
 387         return dst - buf;
 388 }
 389
 390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 391 static void
 392 Util_DropReadAhead(BZ2FileObject *f)
 393 {
 394         if (f->f_buf != NULL) {
 395                 PyMem_Free(f->f_buf);
 396                 f->f_buf = NULL;
 397         }
 398 }
 399
 400 /* This is a hacked version of Python's fileobject.c:readahead(). */
 401 static int
 402 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 403 {
 404         int chunksize;
 405         int bzerror;
 406
 407         if (f->f_buf != NULL) {
 408                 if((f->f_bufend - f->f_bufptr) >= 1)
 409                         return 0;
 410                 else
 411                         Util_DropReadAhead(f);
 412         }
 413         if (f->mode == MODE_READ_EOF) {
 414                 f->f_bufptr = f->f_buf;
 415                 f->f_bufend = f->f_buf;
 416                 return 0;
 417         }
 418         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 419                 return -1;
 420         }
 421         Py_BEGIN_ALLOW_THREADS
 422         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 423                                          bufsize, f);
 424         Py_END_ALLOW_THREADS
 425         f->pos += chunksize;
 426         if (bzerror == BZ_STREAM_END) {
 427                 f->size = f->pos;
 428                 f->mode = MODE_READ_EOF;
 429         } else if (bzerror != BZ_OK) {
 430                 Util_CatchBZ2Error(bzerror);
 431                 Util_DropReadAhead(f);
 432                 return -1;
 433         }
 434         f->f_bufptr = f->f_buf;
 435         f->f_bufend = f->f_buf + chunksize;
 436         return 0;
 437 }
 438
 439 /* This is a hacked version of Python's
 440  * fileobject.c:readahead_get_line_skip(). */
 441 static PyStringObject *
 442 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 443 {
 444         PyStringObject* s;
 445         char *bufptr;
 446         char *buf;
 447         int len;
 448
 449         if (f->f_buf == NULL)
 450                 if (Util_ReadAhead(f, bufsize) < 0)
 451                         return NULL;
 452
 453         len = f->f_bufend - f->f_bufptr;
 454         if (len == 0)
 455                 return (PyStringObject *)
 456                         PyString_FromStringAndSize(NULL, skip);
 457         bufptr = memchr(f->f_bufptr, '\n', len);
 458         if (bufptr != NULL) {
 459                 bufptr++;                       /* Count the '\n' */
 460                 len = bufptr - f->f_bufptr;
 461                 s = (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip+len);
 463                 if (s == NULL)
 464                         return NULL;
 465                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 466                 f->f_bufptr = bufptr;
 467                 if (bufptr == f->f_bufend)
 468                         Util_DropReadAhead(f);
 469         } else {
 470                 bufptr = f->f_bufptr;
 471                 buf = f->f_buf;
 472                 f->f_buf = NULL;        /* Force new readahead buffer */
 473                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 474                                               bufsize + (bufsize>>2));
 475                 if (s == NULL) {
 476                         PyMem_Free(buf);
 477                         return NULL;
 478                 }
 479                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 480                 PyMem_Free(buf);
 481         }
 482         return s;
 483 }
 484
 485 /* ===================================================================== */
 486 /* Methods of BZ2File. */
 487
 488 PyDoc_STRVAR(BZ2File_read__doc__,
 489 "read([size]) -> string\n\
 490 \n\
 491 Read at most size uncompressed bytes, returned as a string. If the size\n\
 492 argument is negative or omitted, read until EOF is reached.\n\
 493 ");
 494
 495 /* This is a hacked version of Python's fileobject.c:file_read(). */
 496 static PyObject *
 497 BZ2File_read(BZ2FileObject *self, PyObject *args)
 498 {
 499         long bytesrequested = -1;
 500         size_t bytesread, buffersize, chunksize;
 501         int bzerror;
 502         PyObject *ret = NULL;
 503
 504         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 505                 return NULL;
 506
 507         ACQUIRE_LOCK(self);
 508         switch (self->mode) {
 509                 case MODE_READ:
 510                         break;
 511                 case MODE_READ_EOF:
 512                         ret = PyString_FromString("");
 513                         goto cleanup;
 514                 case MODE_CLOSED:
 515                         PyErr_SetString(PyExc_ValueError,
 516                                         "I/O operation on closed file");
 517                         goto cleanup;
 518                 default:
 519                         PyErr_SetString(PyExc_IOError,
 520                                         "file is not ready for reading");
 521                         goto cleanup;
 522         }
 523
 524         if (bytesrequested < 0)
 525                 buffersize = Util_NewBufferSize((size_t)0);
 526         else
 527                 buffersize = bytesrequested;
 528         if (buffersize > INT_MAX) {
 529                 PyErr_SetString(PyExc_OverflowError,
 530                                 "requested number of bytes is "
 531                                 "more than a Python string can hold");
 532                 goto cleanup;
 533         }
 534         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 535         if (ret == NULL)
 536                 goto cleanup;
 537         bytesread = 0;
 538
 539         for (;;) {
 540                 Py_BEGIN_ALLOW_THREADS
 541                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 542                                                  BUF(ret)+bytesread,
 543                                                  buffersize-bytesread,
 544                                                  self);
 545                 self->pos += chunksize;
 546                 Py_END_ALLOW_THREADS
 547                 bytesread += chunksize;
 548                 if (bzerror == BZ_STREAM_END) {
 549                         self->size = self->pos;
 550                         self->mode = MODE_READ_EOF;
 551                         break;
 552                 } else if (bzerror != BZ_OK) {
 553                         Util_CatchBZ2Error(bzerror);
 554                         Py_DECREF(ret);
 555                         ret = NULL;
 556                         goto cleanup;
 557                 }
 558                 if (bytesrequested < 0) {
 559                         buffersize = Util_NewBufferSize(buffersize);
 560                         if (_PyString_Resize(&ret, buffersize) < 0)
 561                                 goto cleanup;
 562                 } else {
 563                         break;
 564                 }
 565         }
 566         if (bytesread != buffersize)
 567                 _PyString_Resize(&ret, bytesread);
 568
 569 cleanup:
 570         RELEASE_LOCK(self);
 571         return ret;
 572 }
 573
 574 PyDoc_STRVAR(BZ2File_readline__doc__,
 575 "readline([size]) -> string\n\
 576 \n\
 577 Return the next line from the file, as a string, retaining newline.\n\
 578 A non-negative size argument will limit the maximum number of bytes to\n\
 579 return (an incomplete line may be returned then). Return an empty\n\
 580 string at EOF.\n\
 581 ");
 582
 583 static PyObject *
 584 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 585 {
 586         PyObject *ret = NULL;
 587         int sizehint = -1;
 588
 589         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 590                 return NULL;
 591
 592         ACQUIRE_LOCK(self);
 593         switch (self->mode) {
 594                 case MODE_READ:
 595                         break;
 596                 case MODE_READ_EOF:
 597                         ret = PyString_FromString("");
 598                         goto cleanup;
 599                 case MODE_CLOSED:
 600                         PyErr_SetString(PyExc_ValueError,
 601                                         "I/O operation on closed file");
 602                         goto cleanup;
 603                 default:
 604                         PyErr_SetString(PyExc_IOError,
 605                                         "file is not ready for reading");
 606                         goto cleanup;
 607         }
 608
 609         if (sizehint == 0)
 610                 ret = PyString_FromString("");
 611         else
 612                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 613
 614 cleanup:
 615         RELEASE_LOCK(self);
 616         return ret;
 617 }
 618
 619 PyDoc_STRVAR(BZ2File_readlines__doc__,
 620 "readlines([size]) -> list\n\
 621 \n\
 622 Call readline() repeatedly and return a list of lines read.\n\
 623 The optional size argument, if given, is an approximate bound on the\n\
 624 total number of bytes in the lines returned.\n\
 625 ");
 626
 627 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 628 static PyObject *
 629 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 630 {
 631         long sizehint = 0;
 632         PyObject *list = NULL;
 633         PyObject *line;
 634         char small_buffer[SMALLCHUNK];
 635         char *buffer = small_buffer;
 636         size_t buffersize = SMALLCHUNK;
 637         PyObject *big_buffer = NULL;
 638         size_t nfilled = 0;
 639         size_t nread;
 640         size_t totalread = 0;
 641         char *p, *q, *end;
 642         int err;
 643         int shortread = 0;
 644         int bzerror;
 645
 646         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 647                 return NULL;
 648
 649         ACQUIRE_LOCK(self);
 650         switch (self->mode) {
 651                 case MODE_READ:
 652                         break;
 653                 case MODE_READ_EOF:
 654                         list = PyList_New(0);
 655                         goto cleanup;
 656                 case MODE_CLOSED:
 657                         PyErr_SetString(PyExc_ValueError,
 658                                         "I/O operation on closed file");
 659                         goto cleanup;
 660                 default:
 661                         PyErr_SetString(PyExc_IOError,
 662                                         "file is not ready for reading");
 663                         goto cleanup;
 664         }
 665
 666         if ((list = PyList_New(0)) == NULL)
 667                 goto cleanup;
 668
 669         for (;;) {
 670                 Py_BEGIN_ALLOW_THREADS
 671                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 672                                              buffer+nfilled,
 673                                              buffersize-nfilled, self);
 674                 self->pos += nread;
 675                 Py_END_ALLOW_THREADS
 676                 if (bzerror == BZ_STREAM_END) {
 677                         self->size = self->pos;
 678                         self->mode = MODE_READ_EOF;
 679                         if (nread == 0) {
 680                                 sizehint = 0;
 681                                 break;
 682                         }
 683                         shortread = 1;
 684                 } else if (bzerror != BZ_OK) {
 685                         Util_CatchBZ2Error(bzerror);
 686                   error:
 687                         Py_DECREF(list);
 688                         list = NULL;
 689                         goto cleanup;
 690                 }
 691                 totalread += nread;
 692                 p = memchr(buffer+nfilled, '\n', nread);
 693                 if (!shortread && p == NULL) {
 694                         /* Need a larger buffer to fit this line */
 695                         nfilled += nread;
 696                         buffersize *= 2;
 697                         if (buffersize > INT_MAX) {
 698                                 PyErr_SetString(PyExc_OverflowError,
 699                                 "line is longer than a Python string can hold");
 700                                 goto error;
 701                         }
 702                         if (big_buffer == NULL) {
 703                                 /* Create the big buffer */
 704                                 big_buffer = PyString_FromStringAndSize(
 705                                         NULL, buffersize);
 706                                 if (big_buffer == NULL)
 707                                         goto error;
 708                                 buffer = PyString_AS_STRING(big_buffer);
 709                                 memcpy(buffer, small_buffer, nfilled);
 710                         }
 711                         else {
 712                                 /* Grow the big buffer */
 713                                 _PyString_Resize(&big_buffer, buffersize);
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                         }
 716                         continue;
 717                 }
 718                 end = buffer+nfilled+nread;
 719                 q = buffer;
 720                 while (p != NULL) {
 721                         /* Process complete lines */
 722                         p++;
 723                         line = PyString_FromStringAndSize(q, p-q);
 724                         if (line == NULL)
 725                                 goto error;
 726                         err = PyList_Append(list, line);
 727                         Py_DECREF(line);
 728                         if (err != 0)
 729                                 goto error;
 730                         q = p;
 731                         p = memchr(q, '\n', end-q);
 732                 }
 733                 /* Move the remaining incomplete line to the start */
 734                 nfilled = end-q;
 735                 memmove(buffer, q, nfilled);
 736                 if (sizehint > 0)
 737                         if (totalread >= (size_t)sizehint)
 738                                 break;
 739                 if (shortread) {
 740                         sizehint = 0;
 741                         break;
 742                 }
 743         }
 744         if (nfilled != 0) {
 745                 /* Partial last line */
 746                 line = PyString_FromStringAndSize(buffer, nfilled);
 747                 if (line == NULL)
 748                         goto error;
 749                 if (sizehint > 0) {
 750                         /* Need to complete the last line */
 751                         PyObject *rest = Util_GetLine(self, 0);
 752                         if (rest == NULL) {
 753                                 Py_DECREF(line);
 754                                 goto error;
 755                         }
 756                         PyString_Concat(&line, rest);
 757                         Py_DECREF(rest);
 758                         if (line == NULL)
 759                                 goto error;
 760                 }
 761                 err = PyList_Append(list, line);
 762                 Py_DECREF(line);
 763                 if (err != 0)
 764                         goto error;
 765         }
 766
 767   cleanup:
 768         RELEASE_LOCK(self);
 769         if (big_buffer) {
 770                 Py_DECREF(big_buffer);
 771         }
 772         return list;
 773 }
 774
 775 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 776 "xreadlines() -> self\n\
 777 \n\
 778 For backward compatibility. BZ2File objects now include the performance\n\
 779 optimizations previously implemented in the xreadlines module.\n\
 780 ");
 781
 782 PyDoc_STRVAR(BZ2File_write__doc__,
 783 "write(data) -> None\n\
 784 \n\
 785 Write the 'data' string to file. Note that due to buffering, close() may\n\
 786 be needed before the file on disk reflects the data written.\n\
 787 ");
 788
 789 /* This is a hacked version of Python's fileobject.c:file_write(). */
 790 static PyObject *
 791 BZ2File_write(BZ2FileObject *self, PyObject *args)
 792 {
 793         PyObject *ret = NULL;
 794         char *buf;
 795         int len;
 796         int bzerror;
 797
 798         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 799                 return NULL;
 800
 801         ACQUIRE_LOCK(self);
 802         switch (self->mode) {
 803                 case MODE_WRITE:
 804                         break;
 805
 806                 case MODE_CLOSED:
 807                         PyErr_SetString(PyExc_ValueError,
 808                                         "I/O operation on closed file");
 809                         goto cleanup;
 810
 811                 default:
 812                         PyErr_SetString(PyExc_IOError,
 813                                         "file is not ready for writing");
 814                         goto cleanup;
 815         }
 816
 817         self->f_softspace = 0;
 818
 819         Py_BEGIN_ALLOW_THREADS
 820         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 821         self->pos += len;
 822         Py_END_ALLOW_THREADS
 823
 824         if (bzerror != BZ_OK) {
 825                 Util_CatchBZ2Error(bzerror);
 826                 goto cleanup;
 827         }
 828
 829         Py_INCREF(Py_None);
 830         ret = Py_None;
 831
 832 cleanup:
 833         RELEASE_LOCK(self);
 834         return ret;
 835 }
 836
 837 PyDoc_STRVAR(BZ2File_writelines__doc__,
 838 "writelines(sequence_of_strings) -> None\n\
 839 \n\
 840 Write the sequence of strings to the file. Note that newlines are not\n\
 841 added. The sequence can be any iterable object producing strings. This is\n\
 842 equivalent to calling write() for each string.\n\
 843 ");
 844
 845 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 846 static PyObject *
 847 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 848 {
 849 #define CHUNKSIZE 1000
 850         PyObject *list = NULL;
 851         PyObject *iter = NULL;
 852         PyObject *ret = NULL;
 853         PyObject *line;
 854         int i, j, index, len, islist;
 855         int bzerror;
 856
 857         ACQUIRE_LOCK(self);
 858         switch (self->mode) {
 859                 case MODE_WRITE:
 860                         break;
 861
 862                 case MODE_CLOSED:
 863                         PyErr_SetString(PyExc_ValueError,
 864                                         "I/O operation on closed file");
 865                         goto error;
 866
 867                 default:
 868                         PyErr_SetString(PyExc_IOError,
 869                                         "file is not ready for writing");
 870                         goto error;
 871         }
 872
 873         islist = PyList_Check(seq);
 874         if  (!islist) {
 875                 iter = PyObject_GetIter(seq);
 876                 if (iter == NULL) {
 877                         PyErr_SetString(PyExc_TypeError,
 878                                 "writelines() requires an iterable argument");
 879                         goto error;
 880                 }
 881                 list = PyList_New(CHUNKSIZE);
 882                 if (list == NULL)
 883                         goto error;
 884         }
 885
 886         /* Strategy: slurp CHUNKSIZE lines into a private list,
 887            checking that they are all strings, then write that list
 888            without holding the interpreter lock, then come back for more. */
 889         for (index = 0; ; index += CHUNKSIZE) {
 890                 if (islist) {
 891                         Py_XDECREF(list);
 892                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 893                         if (list == NULL)
 894                                 goto error;
 895                         j = PyList_GET_SIZE(list);
 896                 }
 897                 else {
 898                         for (j = 0; j < CHUNKSIZE; j++) {
 899                                 line = PyIter_Next(iter);
 900                                 if (line == NULL) {
 901                                         if (PyErr_Occurred())
 902                                                 goto error;
 903                                         break;
 904                                 }
 905                                 PyList_SetItem(list, j, line);
 906                         }
 907                 }
 908                 if (j == 0)
 909                         break;
 910
 911                 /* Check that all entries are indeed strings. If not,
 912                    apply the same rules as for file.write() and
 913                    convert the rets to strings. This is slow, but
 914                    seems to be the only way since all conversion APIs
 915                    could potentially execute Python code. */
 916                 for (i = 0; i < j; i++) {
 917                         PyObject *v = PyList_GET_ITEM(list, i);
 918                         if (!PyString_Check(v)) {
 919                                 const char *buffer;
 920                                 Py_ssize_t len;
 921                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 922                                         PyErr_SetString(PyExc_TypeError,
 923                                                         "writelines() "
 924                                                         "argument must be "
 925                                                         "a sequence of "
 926                                                         "strings");
 927                                         goto error;
 928                                 }
 929                                 line = PyString_FromStringAndSize(buffer,
 930                                                                   len);
 931                                 if (line == NULL)
 932                                         goto error;
 933                                 Py_DECREF(v);
 934                                 PyList_SET_ITEM(list, i, line);
 935                         }
 936                 }
 937
 938                 self->f_softspace = 0;
 939
 940                 /* Since we are releasing the global lock, the
 941                    following code may *not* execute Python code. */
 942                 Py_BEGIN_ALLOW_THREADS
 943                 for (i = 0; i < j; i++) {
 944                         line = PyList_GET_ITEM(list, i);
 945                         len = PyString_GET_SIZE(line);
 946                         BZ2_bzWrite (&bzerror, self->fp,
 947                                      PyString_AS_STRING(line), len);
 948                         if (bzerror != BZ_OK) {
 949                                 Py_BLOCK_THREADS
 950                                 Util_CatchBZ2Error(bzerror);
 951                                 goto error;
 952                         }
 953                 }
 954                 Py_END_ALLOW_THREADS
 955
 956                 if (j < CHUNKSIZE)
 957                         break;
 958         }
 959
 960         Py_INCREF(Py_None);
 961         ret = Py_None;
 962
 963   error:
 964         RELEASE_LOCK(self);
 965         Py_XDECREF(list);
 966         Py_XDECREF(iter);
 967         return ret;
 968 #undef CHUNKSIZE
 969 }
 970
 971 PyDoc_STRVAR(BZ2File_seek__doc__,
 972 "seek(offset [, whence]) -> None\n\
 973 \n\
 974 Move to new file position. Argument offset is a byte count. Optional\n\
 975 argument whence defaults to 0 (offset from start of file, offset\n\
 976 should be >= 0); other values are 1 (move relative to current position,\n\
 977 positive or negative), and 2 (move relative to end of file, usually\n\
 978 negative, although many platforms allow seeking beyond the end of a file).\n\
 979 \n\
 980 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 981 the operation may be extremely slow.\n\
 982 ");
 983
 984 static PyObject *
 985 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 986 {
 987         int where = 0;
 988         PyObject *offobj;
 989         Py_off_t offset;
 990         char small_buffer[SMALLCHUNK];
 991         char *buffer = small_buffer;
 992         size_t buffersize = SMALLCHUNK;
 993         Py_off_t bytesread = 0;
 994         size_t readsize;
 995         int chunksize;
 996         int bzerror;
 997         PyObject *ret = NULL;
 998
 999         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1000                 return NULL;
1001 #if !defined(HAVE_LARGEFILE_SUPPORT)
1002         offset = PyInt_AsLong(offobj);
1003 #else
1004         offset = PyLong_Check(offobj) ?
1005                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1006 #endif
1007         if (PyErr_Occurred())
1008                 return NULL;
1009
1010         ACQUIRE_LOCK(self);
1011         Util_DropReadAhead(self);
1012         switch (self->mode) {
1013                 case MODE_READ:
1014                 case MODE_READ_EOF:
1015                         break;
1016
1017                 case MODE_CLOSED:
1018                         PyErr_SetString(PyExc_ValueError,
1019                                         "I/O operation on closed file");
1020                         goto cleanup;
1021
1022                 default:
1023                         PyErr_SetString(PyExc_IOError,
1024                                         "seek works only while reading");
1025                         goto cleanup;
1026         }
1027
1028         if (where == 2) {
1029                 if (self->size == -1) {
1030                         assert(self->mode != MODE_READ_EOF);
1031                         for (;;) {
1032                                 Py_BEGIN_ALLOW_THREADS
1033                                 chunksize = Util_UnivNewlineRead(
1034                                                 &bzerror, self->fp,
1035                                                 buffer, buffersize,
1036                                                 self);
1037                                 self->pos += chunksize;
1038                                 Py_END_ALLOW_THREADS
1039
1040                                 bytesread += chunksize;
1041                                 if (bzerror == BZ_STREAM_END) {
1042                                         break;
1043                                 } else if (bzerror != BZ_OK) {
1044                                         Util_CatchBZ2Error(bzerror);
1045                                         goto cleanup;
1046                                 }
1047                         }
1048                         self->mode = MODE_READ_EOF;
1049                         self->size = self->pos;
1050                         bytesread = 0;
1051                 }
1052                 offset = self->size + offset;
1053         } else if (where == 1) {
1054                 offset = self->pos + offset;
1055         }
1056
1057         /* Before getting here, offset must be the absolute position the file
1058          * pointer should be set to. */
1059
1060         if (offset >= self->pos) {
1061                 /* we can move forward */
1062                 offset -= self->pos;
1063         } else {
1064                 /* we cannot move back, so rewind the stream */
1065                 BZ2_bzReadClose(&bzerror, self->fp);
1066                 if (self->fp) {
1067                         PyFile_DecUseCount((PyFileObject *)self->file);
1068                         self->fp = NULL;
1069                 }
1070                 if (bzerror != BZ_OK) {
1071                         Util_CatchBZ2Error(bzerror);
1072                         goto cleanup;
1073                 }
1074                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1075                 if (!ret)
1076                         goto cleanup;
1077                 Py_DECREF(ret);
1078                 ret = NULL;
1079                 self->pos = 0;
1080                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1081                                           0, 0, NULL, 0);
1082                 if (self->fp)
1083                         PyFile_IncUseCount((PyFileObject *)self->file);
1084                 if (bzerror != BZ_OK) {
1085                         Util_CatchBZ2Error(bzerror);
1086                         goto cleanup;
1087                 }
1088                 self->mode = MODE_READ;
1089         }
1090
1091         if (offset <= 0 || self->mode == MODE_READ_EOF)
1092                 goto exit;
1093
1094         /* Before getting here, offset must be set to the number of bytes
1095          * to walk forward. */
1096         for (;;) {
1097                 if (offset-bytesread > buffersize)
1098                         readsize = buffersize;
1099                 else
1100                         /* offset might be wider that readsize, but the result
1101                          * of the subtraction is bound by buffersize (see the
1102                          * condition above). buffersize is 8192. */
1103                         readsize = (size_t)(offset-bytesread);
1104                 Py_BEGIN_ALLOW_THREADS
1105                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1106                                                  buffer, readsize, self);
1107                 self->pos += chunksize;
1108                 Py_END_ALLOW_THREADS
1109                 bytesread += chunksize;
1110                 if (bzerror == BZ_STREAM_END) {
1111                         self->size = self->pos;
1112                         self->mode = MODE_READ_EOF;
1113                         break;
1114                 } else if (bzerror != BZ_OK) {
1115                         Util_CatchBZ2Error(bzerror);
1116                         goto cleanup;
1117                 }
1118                 if (bytesread == offset)
1119                         break;
1120         }
1121
1122 exit:
1123         Py_INCREF(Py_None);
1124         ret = Py_None;
1125
1126 cleanup:
1127         RELEASE_LOCK(self);
1128         return ret;
1129 }
1130
1131 PyDoc_STRVAR(BZ2File_tell__doc__,
1132 "tell() -> int\n\
1133 \n\
1134 Return the current file position, an integer (may be a long integer).\n\
1135 ");
1136
1137 static PyObject *
1138 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1139 {
1140         PyObject *ret = NULL;
1141
1142         if (self->mode == MODE_CLOSED) {
1143                 PyErr_SetString(PyExc_ValueError,
1144                                 "I/O operation on closed file");
1145                 goto cleanup;
1146         }
1147
1148 #if !defined(HAVE_LARGEFILE_SUPPORT)
1149         ret = PyInt_FromLong(self->pos);
1150 #else
1151         ret = PyLong_FromLongLong(self->pos);
1152 #endif
1153
1154 cleanup:
1155         return ret;
1156 }
1157
1158 PyDoc_STRVAR(BZ2File_close__doc__,
1159 "close() -> None or (perhaps) an integer\n\
1160 \n\
1161 Close the file. Sets data attribute .closed to true. A closed file\n\
1162 cannot be used for further I/O operations. close() may be called more\n\
1163 than once without error.\n\
1164 ");
1165
1166 static PyObject *
1167 BZ2File_close(BZ2FileObject *self)
1168 {
1169         PyObject *ret = NULL;
1170         int bzerror = BZ_OK;
1171
1172         ACQUIRE_LOCK(self);
1173         switch (self->mode) {
1174                 case MODE_READ:
1175                 case MODE_READ_EOF:
1176                         BZ2_bzReadClose(&bzerror, self->fp);
1177                         break;
1178                 case MODE_WRITE:
1179                         BZ2_bzWriteClose(&bzerror, self->fp,
1180                                          0, NULL, NULL);
1181                         break;
1182         }
1183         if (self->fp) {
1184                 PyFile_DecUseCount((PyFileObject *)self->file);
1185                 self->fp = NULL;
1186         }
1187         self->mode = MODE_CLOSED;
1188         ret = PyObject_CallMethod(self->file, "close", NULL);
1189         if (bzerror != BZ_OK) {
1190                 Util_CatchBZ2Error(bzerror);
1191                 Py_XDECREF(ret);
1192                 ret = NULL;
1193         }
1194
1195         RELEASE_LOCK(self);
1196         return ret;
1197 }
1198
1199 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1200
1201 static PyMethodDef BZ2File_methods[] = {
1202         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1203         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1204         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1205         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1206         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1207         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1208         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1209         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1210         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1211         {NULL,          NULL}           /* sentinel */
1212 };
1213
1214
1215 /* ===================================================================== */
1216 /* Getters and setters of BZ2File. */
1217
1218 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1219 static PyObject *
1220 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1221 {
1222         switch (self->f_newlinetypes) {
1223         case NEWLINE_UNKNOWN:
1224                 Py_INCREF(Py_None);
1225                 return Py_None;
1226         case NEWLINE_CR:
1227                 return PyString_FromString("\r");
1228         case NEWLINE_LF:
1229                 return PyString_FromString("\n");
1230         case NEWLINE_CR|NEWLINE_LF:
1231                 return Py_BuildValue("(ss)", "\r", "\n");
1232         case NEWLINE_CRLF:
1233                 return PyString_FromString("\r\n");
1234         case NEWLINE_CR|NEWLINE_CRLF:
1235                 return Py_BuildValue("(ss)", "\r", "\r\n");
1236         case NEWLINE_LF|NEWLINE_CRLF:
1237                 return Py_BuildValue("(ss)", "\n", "\r\n");
1238         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1239                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1240         default:
1241                 PyErr_Format(PyExc_SystemError,
1242                              "Unknown newlines value 0x%x\n",
1243                              self->f_newlinetypes);
1244                 return NULL;
1245         }
1246 }
1247
1248 static PyObject *
1249 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1250 {
1251         return PyInt_FromLong(self->mode == MODE_CLOSED);
1252 }
1253
1254 static PyObject *
1255 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1256 {
1257         return PyObject_GetAttrString(self->file, "mode");
1258 }
1259
1260 static PyObject *
1261 BZ2File_get_name(BZ2FileObject *self, void *closure)
1262 {
1263         return PyObject_GetAttrString(self->file, "name");
1264 }
1265
1266 static PyGetSetDef BZ2File_getset[] = {
1267         {"closed", (getter)BZ2File_get_closed, NULL,
1268                         "True if the file is closed"},
1269         {"newlines", (getter)BZ2File_get_newlines, NULL,
1270                         "end-of-line convention used in this file"},
1271         {"mode", (getter)BZ2File_get_mode, NULL,
1272                         "file mode ('r', 'w', or 'U')"},
1273         {"name", (getter)BZ2File_get_name, NULL,
1274                         "file name"},
1275         {NULL}  /* Sentinel */
1276 };
1277
1278
1279 /* ===================================================================== */
1280 /* Members of BZ2File_Type. */
1281
1282 #undef OFF
1283 #define OFF(x) offsetof(BZ2FileObject, x)
1284
1285 static PyMemberDef BZ2File_members[] = {
1286         {"softspace",   T_INT,          OFF(f_softspace), 0,
1287          "flag indicating that a space needs to be printed; used by print"},
1288         {NULL}  /* Sentinel */
1289 };
1290
1291 /* ===================================================================== */
1292 /* Slot definitions for BZ2File_Type. */
1293
1294 static int
1295 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1296 {
1297         static char *kwlist[] = {"filename", "mode", "buffering",
1298                                        "compresslevel", 0};
1299         PyObject *name;
1300         char *mode = "r";
1301         int buffering = -1;
1302         int compresslevel = 9;
1303         int bzerror;
1304         int mode_char = 0;
1305
1306         self->size = -1;
1307
1308         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1309                                          kwlist, &name, &mode, &buffering,
1310                                          &compresslevel))
1311                 return -1;
1312
1313         if (compresslevel < 1 || compresslevel > 9) {
1314                 PyErr_SetString(PyExc_ValueError,
1315                                 "compresslevel must be between 1 and 9");
1316                 return -1;
1317         }
1318
1319         for (;;) {
1320                 int error = 0;
1321                 switch (*mode) {
1322                         case 'r':
1323                         case 'w':
1324                                 if (mode_char)
1325                                         error = 1;
1326                                 mode_char = *mode;
1327                                 break;
1328
1329                         case 'b':
1330                                 break;
1331
1332                         case 'U':
1333 #ifdef __VMS
1334                                 self->f_univ_newline = 0;
1335 #else
1336                                 self->f_univ_newline = 1;
1337 #endif
1338                                 break;
1339
1340                         default:
1341                                 error = 1;
1342                                 break;
1343                 }
1344                 if (error) {
1345                         PyErr_Format(PyExc_ValueError,
1346                                      "invalid mode char %c", *mode);
1347                         return -1;
1348                 }
1349                 mode++;
1350                 if (*mode == '\0')
1351                         break;
1352         }
1353
1354         if (mode_char == 0) {
1355                 mode_char = 'r';
1356         }
1357
1358         mode = (mode_char == 'r') ? "rb" : "wb";
1359
1360         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1361                                            name, mode, buffering);
1362         if (self->file == NULL)
1363                 return -1;
1364
1365         /* From now on, we have stuff to dealloc, so jump to error label
1366          * instead of returning */
1367
1368 #ifdef WITH_THREAD
1369         self->lock = PyThread_allocate_lock();
1370         if (!self->lock) {
1371                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1372                 goto error;
1373         }
1374 #endif
1375
1376         if (mode_char == 'r')
1377                 self->fp = BZ2_bzReadOpen(&bzerror,
1378                                           PyFile_AsFile(self->file),
1379                                           0, 0, NULL, 0);
1380         else
1381                 self->fp = BZ2_bzWriteOpen(&bzerror,
1382                                            PyFile_AsFile(self->file),
1383                                            compresslevel, 0, 0);
1384
1385         if (bzerror != BZ_OK) {
1386                 Util_CatchBZ2Error(bzerror);
1387                 goto error;
1388         }
1389         PyFile_IncUseCount((PyFileObject *)self->file);
1390
1391         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1392
1393         return 0;
1394
1395 error:
1396         Py_CLEAR(self->file);
1397 #ifdef WITH_THREAD
1398         if (self->lock) {
1399                 PyThread_free_lock(self->lock);
1400                 self->lock = NULL;
1401         }
1402 #endif
1403         return -1;
1404 }
1405
1406 static void
1407 BZ2File_dealloc(BZ2FileObject *self)
1408 {
1409         int bzerror;
1410 #ifdef WITH_THREAD
1411         if (self->lock)
1412                 PyThread_free_lock(self->lock);
1413 #endif
1414         switch (self->mode) {
1415                 case MODE_READ:
1416                 case MODE_READ_EOF:
1417                         BZ2_bzReadClose(&bzerror, self->fp);
1418                         break;
1419                 case MODE_WRITE:
1420                         BZ2_bzWriteClose(&bzerror, self->fp,
1421                                          0, NULL, NULL);
1422                         break;
1423         }
1424         if (self->fp) {
1425                 PyFile_DecUseCount((PyFileObject *)self->file);
1426                 self->fp = NULL;
1427         }
1428         Util_DropReadAhead(self);
1429         Py_XDECREF(self->file);
1430         Py_TYPE(self)->tp_free((PyObject *)self);
1431 }
1432
1433 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1434 static PyObject *
1435 BZ2File_getiter(BZ2FileObject *self)
1436 {
1437         if (self->mode == MODE_CLOSED) {
1438                 PyErr_SetString(PyExc_ValueError,
1439                                 "I/O operation on closed file");
1440                 return NULL;
1441         }
1442         Py_INCREF((PyObject*)self);
1443         return (PyObject *)self;
1444 }
1445
1446 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1447 #define READAHEAD_BUFSIZE 8192
1448 static PyObject *
1449 BZ2File_iternext(BZ2FileObject *self)
1450 {
1451         PyStringObject* ret;
1452         ACQUIRE_LOCK(self);
1453         if (self->mode == MODE_CLOSED) {
1454                 PyErr_SetString(PyExc_ValueError,
1455                                 "I/O operation on closed file");
1456                 return NULL;
1457         }
1458         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1459         RELEASE_LOCK(self);
1460         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1461                 Py_XDECREF(ret);
1462                 return NULL;
1463         }
1464         return (PyObject *)ret;
1465 }
1466
1467 /* ===================================================================== */
1468 /* BZ2File_Type definition. */
1469
1470 PyDoc_VAR(BZ2File__doc__) =
1471 PyDoc_STR(
1472 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1473 \n\
1474 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1475 writing. When opened for writing, the file will be created if it doesn't\n\
1476 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1477 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1478 is given, must be a number between 1 and 9.\n\
1479 ")
1480 PyDoc_STR(
1481 "\n\
1482 Add a 'U' to mode to open the file for input with universal newline\n\
1483 support. Any line ending in the input file will be seen as a '\\n' in\n\
1484 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1485 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1486 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1487 newlines are available only when reading.\n\
1488 ")
1489 ;
1490
1491 static PyTypeObject BZ2File_Type = {
1492         PyVarObject_HEAD_INIT(NULL, 0)
1493         "bz2.BZ2File",          /*tp_name*/
1494         sizeof(BZ2FileObject),  /*tp_basicsize*/
1495         0,                      /*tp_itemsize*/
1496         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1497         0,                      /*tp_print*/
1498         0,                      /*tp_getattr*/
1499         0,                      /*tp_setattr*/
1500         0,                      /*tp_compare*/
1501         0,                      /*tp_repr*/
1502         0,                      /*tp_as_number*/
1503         0,                      /*tp_as_sequence*/
1504         0,                      /*tp_as_mapping*/
1505         0,                      /*tp_hash*/
1506         0,                      /*tp_call*/
1507         0,                      /*tp_str*/
1508         PyObject_GenericGetAttr,/*tp_getattro*/
1509         PyObject_GenericSetAttr,/*tp_setattro*/
1510         0,                      /*tp_as_buffer*/
1511         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1512         BZ2File__doc__,         /*tp_doc*/
1513         0,                      /*tp_traverse*/
1514         0,                      /*tp_clear*/
1515         0,                      /*tp_richcompare*/
1516         0,                      /*tp_weaklistoffset*/
1517         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1518         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1519         BZ2File_methods,        /*tp_methods*/
1520         BZ2File_members,        /*tp_members*/
1521         BZ2File_getset,         /*tp_getset*/
1522         0,                      /*tp_base*/
1523         0,                      /*tp_dict*/
1524         0,                      /*tp_descr_get*/
1525         0,                      /*tp_descr_set*/
1526         0,                      /*tp_dictoffset*/
1527         (initproc)BZ2File_init, /*tp_init*/
1528         PyType_GenericAlloc,    /*tp_alloc*/
1529         PyType_GenericNew,      /*tp_new*/
1530         _PyObject_Del,          /*tp_free*/
1531         0,                      /*tp_is_gc*/
1532 };
1533
1534
1535 /* ===================================================================== */
1536 /* Methods of BZ2Comp. */
1537
1538 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1539 "compress(data) -> string\n\
1540 \n\
1541 Provide more data to the compressor object. It will return chunks of\n\
1542 compressed data whenever possible. When you've finished providing data\n\
1543 to compress, call the flush() method to finish the compression process,\n\
1544 and return what is left in the internal buffers.\n\
1545 ");
1546
1547 static PyObject *
1548 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1549 {
1550         char *data;
1551         int datasize;
1552         int bufsize = SMALLCHUNK;
1553         PY_LONG_LONG totalout;
1554         PyObject *ret = NULL;
1555         bz_stream *bzs = &self->bzs;
1556         int bzerror;
1557
1558         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1559                 return NULL;
1560
1561         if (datasize == 0)
1562                 return PyString_FromString("");
1563
1564         ACQUIRE_LOCK(self);
1565         if (!self->running) {
1566                 PyErr_SetString(PyExc_ValueError,
1567                                 "this object was already flushed");
1568                 goto error;
1569         }
1570
1571         ret = PyString_FromStringAndSize(NULL, bufsize);
1572         if (!ret)
1573                 goto error;
1574
1575         bzs->next_in = data;
1576         bzs->avail_in = datasize;
1577         bzs->next_out = BUF(ret);
1578         bzs->avail_out = bufsize;
1579
1580         totalout = BZS_TOTAL_OUT(bzs);
1581
1582         for (;;) {
1583                 Py_BEGIN_ALLOW_THREADS
1584                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1585                 Py_END_ALLOW_THREADS
1586                 if (bzerror != BZ_RUN_OK) {
1587                         Util_CatchBZ2Error(bzerror);
1588                         goto error;
1589                 }
1590                 if (bzs->avail_in == 0)
1591                         break; /* no more input data */
1592                 if (bzs->avail_out == 0) {
1593                         bufsize = Util_NewBufferSize(bufsize);
1594                         if (_PyString_Resize(&ret, bufsize) < 0) {
1595                                 BZ2_bzCompressEnd(bzs);
1596                                 goto error;
1597                         }
1598                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1599                                                     - totalout);
1600                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1601                 }
1602         }
1603
1604         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1605
1606         RELEASE_LOCK(self);
1607         return ret;
1608
1609 error:
1610         RELEASE_LOCK(self);
1611         Py_XDECREF(ret);
1612         return NULL;
1613 }
1614
1615 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1616 "flush() -> string\n\
1617 \n\
1618 Finish the compression process and return what is left in internal buffers.\n\
1619 You must not use the compressor object after calling this method.\n\
1620 ");
1621
1622 static PyObject *
1623 BZ2Comp_flush(BZ2CompObject *self)
1624 {
1625         int bufsize = SMALLCHUNK;
1626         PyObject *ret = NULL;
1627         bz_stream *bzs = &self->bzs;
1628         PY_LONG_LONG totalout;
1629         int bzerror;
1630
1631         ACQUIRE_LOCK(self);
1632         if (!self->running) {
1633                 PyErr_SetString(PyExc_ValueError, "object was already "
1634                                                   "flushed");
1635                 goto error;
1636         }
1637         self->running = 0;
1638
1639         ret = PyString_FromStringAndSize(NULL, bufsize);
1640         if (!ret)
1641                 goto error;
1642
1643         bzs->next_out = BUF(ret);
1644         bzs->avail_out = bufsize;
1645
1646         totalout = BZS_TOTAL_OUT(bzs);
1647
1648         for (;;) {
1649                 Py_BEGIN_ALLOW_THREADS
1650                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1651                 Py_END_ALLOW_THREADS
1652                 if (bzerror == BZ_STREAM_END) {
1653                         break;
1654                 } else if (bzerror != BZ_FINISH_OK) {
1655                         Util_CatchBZ2Error(bzerror);
1656                         goto error;
1657                 }
1658                 if (bzs->avail_out == 0) {
1659                         bufsize = Util_NewBufferSize(bufsize);
1660                         if (_PyString_Resize(&ret, bufsize) < 0)
1661                                 goto error;
1662                         bzs->next_out = BUF(ret);
1663                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1664                                                     - totalout);
1665                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1666                 }
1667         }
1668
1669         if (bzs->avail_out != 0)
1670                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1671
1672         RELEASE_LOCK(self);
1673         return ret;
1674
1675 error:
1676         RELEASE_LOCK(self);
1677         Py_XDECREF(ret);
1678         return NULL;
1679 }
1680
1681 static PyMethodDef BZ2Comp_methods[] = {
1682         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1683          BZ2Comp_compress__doc__},
1684         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1685          BZ2Comp_flush__doc__},
1686         {NULL,          NULL}           /* sentinel */
1687 };
1688
1689
1690 /* ===================================================================== */
1691 /* Slot definitions for BZ2Comp_Type. */
1692
1693 static int
1694 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1695 {
1696         int compresslevel = 9;
1697         int bzerror;
1698         static char *kwlist[] = {"compresslevel", 0};
1699
1700         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1701                                          kwlist, &compresslevel))
1702                 return -1;
1703
1704         if (compresslevel < 1 || compresslevel > 9) {
1705                 PyErr_SetString(PyExc_ValueError,
1706                                 "compresslevel must be between 1 and 9");
1707                 goto error;
1708         }
1709
1710 #ifdef WITH_THREAD
1711         self->lock = PyThread_allocate_lock();
1712         if (!self->lock) {
1713                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1714                 goto error;
1715         }
1716 #endif
1717
1718         memset(&self->bzs, 0, sizeof(bz_stream));
1719         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1720         if (bzerror != BZ_OK) {
1721                 Util_CatchBZ2Error(bzerror);
1722                 goto error;
1723         }
1724
1725         self->running = 1;
1726
1727         return 0;
1728 error:
1729 #ifdef WITH_THREAD
1730         if (self->lock) {
1731                 PyThread_free_lock(self->lock);
1732                 self->lock = NULL;
1733         }
1734 #endif
1735         return -1;
1736 }
1737
1738 static void
1739 BZ2Comp_dealloc(BZ2CompObject *self)
1740 {
1741 #ifdef WITH_THREAD
1742         if (self->lock)
1743                 PyThread_free_lock(self->lock);
1744 #endif
1745         BZ2_bzCompressEnd(&self->bzs);
1746         Py_TYPE(self)->tp_free((PyObject *)self);
1747 }
1748
1749
1750 /* ===================================================================== */
1751 /* BZ2Comp_Type definition. */
1752
1753 PyDoc_STRVAR(BZ2Comp__doc__,
1754 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1755 \n\
1756 Create a new compressor object. This object may be used to compress\n\
1757 data sequentially. If you want to compress data in one shot, use the\n\
1758 compress() function instead. The compresslevel parameter, if given,\n\
1759 must be a number between 1 and 9.\n\
1760 ");
1761
1762 static PyTypeObject BZ2Comp_Type = {
1763         PyVarObject_HEAD_INIT(NULL, 0)
1764         "bz2.BZ2Compressor",    /*tp_name*/
1765         sizeof(BZ2CompObject),  /*tp_basicsize*/
1766         0,                      /*tp_itemsize*/
1767         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1768         0,                      /*tp_print*/
1769         0,                      /*tp_getattr*/
1770         0,                      /*tp_setattr*/
1771         0,                      /*tp_compare*/
1772         0,                      /*tp_repr*/
1773         0,                      /*tp_as_number*/
1774         0,                      /*tp_as_sequence*/
1775         0,                      /*tp_as_mapping*/
1776         0,                      /*tp_hash*/
1777         0,                      /*tp_call*/
1778         0,                      /*tp_str*/
1779         PyObject_GenericGetAttr,/*tp_getattro*/
1780         PyObject_GenericSetAttr,/*tp_setattro*/
1781         0,                      /*tp_as_buffer*/
1782         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1783         BZ2Comp__doc__,         /*tp_doc*/
1784         0,                      /*tp_traverse*/
1785         0,                      /*tp_clear*/
1786         0,                      /*tp_richcompare*/
1787         0,                      /*tp_weaklistoffset*/
1788         0,                      /*tp_iter*/
1789         0,                      /*tp_iternext*/
1790         BZ2Comp_methods,        /*tp_methods*/
1791         0,                      /*tp_members*/
1792         0,                      /*tp_getset*/
1793         0,                      /*tp_base*/
1794         0,                      /*tp_dict*/
1795         0,                      /*tp_descr_get*/
1796         0,                      /*tp_descr_set*/
1797         0,                      /*tp_dictoffset*/
1798         (initproc)BZ2Comp_init, /*tp_init*/
1799         PyType_GenericAlloc,    /*tp_alloc*/
1800         PyType_GenericNew,      /*tp_new*/
1801         _PyObject_Del,          /*tp_free*/
1802         0,                      /*tp_is_gc*/
1803 };
1804
1805
1806 /* ===================================================================== */
1807 /* Members of BZ2Decomp. */
1808
1809 #undef OFF
1810 #define OFF(x) offsetof(BZ2DecompObject, x)
1811
1812 static PyMemberDef BZ2Decomp_members[] = {
1813         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1814         {NULL}  /* Sentinel */
1815 };
1816
1817
1818 /* ===================================================================== */
1819 /* Methods of BZ2Decomp. */
1820
1821 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1822 "decompress(data) -> string\n\
1823 \n\
1824 Provide more data to the decompressor object. It will return chunks\n\
1825 of decompressed data whenever possible. If you try to decompress data\n\
1826 after the end of stream is found, EOFError will be raised. If any data\n\
1827 was found after the end of stream, it'll be ignored and saved in\n\
1828 unused_data attribute.\n\
1829 ");
1830
1831 static PyObject *
1832 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1833 {
1834         char *data;
1835         int datasize;
1836         int bufsize = SMALLCHUNK;
1837         PY_LONG_LONG totalout;
1838         PyObject *ret = NULL;
1839         bz_stream *bzs = &self->bzs;
1840         int bzerror;
1841
1842         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1843                 return NULL;
1844
1845         ACQUIRE_LOCK(self);
1846         if (!self->running) {
1847                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1848                                                 "already found");
1849                 goto error;
1850         }
1851
1852         ret = PyString_FromStringAndSize(NULL, bufsize);
1853         if (!ret)
1854                 goto error;
1855
1856         bzs->next_in = data;
1857         bzs->avail_in = datasize;
1858         bzs->next_out = BUF(ret);
1859         bzs->avail_out = bufsize;
1860
1861         totalout = BZS_TOTAL_OUT(bzs);
1862
1863         for (;;) {
1864                 Py_BEGIN_ALLOW_THREADS
1865                 bzerror = BZ2_bzDecompress(bzs);
1866                 Py_END_ALLOW_THREADS
1867                 if (bzerror == BZ_STREAM_END) {
1868                         if (bzs->avail_in != 0) {
1869                                 Py_DECREF(self->unused_data);
1870                                 self->unused_data =
1871                                     PyString_FromStringAndSize(bzs->next_in,
1872                                                                bzs->avail_in);
1873                         }
1874                         self->running = 0;
1875                         break;
1876                 }
1877                 if (bzerror != BZ_OK) {
1878                         Util_CatchBZ2Error(bzerror);
1879                         goto error;
1880                 }
1881                 if (bzs->avail_in == 0)
1882                         break; /* no more input data */
1883                 if (bzs->avail_out == 0) {
1884                         bufsize = Util_NewBufferSize(bufsize);
1885                         if (_PyString_Resize(&ret, bufsize) < 0) {
1886                                 BZ2_bzDecompressEnd(bzs);
1887                                 goto error;
1888                         }
1889                         bzs->next_out = BUF(ret);
1890                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1891                                                     - totalout);
1892                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1893                 }
1894         }
1895
1896         if (bzs->avail_out != 0)
1897                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1898
1899         RELEASE_LOCK(self);
1900         return ret;
1901
1902 error:
1903         RELEASE_LOCK(self);
1904         Py_XDECREF(ret);
1905         return NULL;
1906 }
1907
1908 static PyMethodDef BZ2Decomp_methods[] = {
1909         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1910         {NULL,          NULL}           /* sentinel */
1911 };
1912
1913
1914 /* ===================================================================== */
1915 /* Slot definitions for BZ2Decomp_Type. */
1916
1917 static int
1918 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1919 {
1920         int bzerror;
1921
1922         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1923                 return -1;
1924
1925 #ifdef WITH_THREAD
1926         self->lock = PyThread_allocate_lock();
1927         if (!self->lock) {
1928                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1929                 goto error;
1930         }
1931 #endif
1932
1933         self->unused_data = PyString_FromString("");
1934         if (!self->unused_data)
1935                 goto error;
1936
1937         memset(&self->bzs, 0, sizeof(bz_stream));
1938         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1939         if (bzerror != BZ_OK) {
1940                 Util_CatchBZ2Error(bzerror);
1941                 goto error;
1942         }
1943
1944         self->running = 1;
1945
1946         return 0;
1947
1948 error:
1949 #ifdef WITH_THREAD
1950         if (self->lock) {
1951                 PyThread_free_lock(self->lock);
1952                 self->lock = NULL;
1953         }
1954 #endif
1955         Py_CLEAR(self->unused_data);
1956         return -1;
1957 }
1958
1959 static void
1960 BZ2Decomp_dealloc(BZ2DecompObject *self)
1961 {
1962 #ifdef WITH_THREAD
1963         if (self->lock)
1964                 PyThread_free_lock(self->lock);
1965 #endif
1966         Py_XDECREF(self->unused_data);
1967         BZ2_bzDecompressEnd(&self->bzs);
1968         Py_TYPE(self)->tp_free((PyObject *)self);
1969 }
1970
1971
1972 /* ===================================================================== */
1973 /* BZ2Decomp_Type definition. */
1974
1975 PyDoc_STRVAR(BZ2Decomp__doc__,
1976 "BZ2Decompressor() -> decompressor object\n\
1977 \n\
1978 Create a new decompressor object. This object may be used to decompress\n\
1979 data sequentially. If you want to decompress data in one shot, use the\n\
1980 decompress() function instead.\n\
1981 ");
1982
1983 static PyTypeObject BZ2Decomp_Type = {
1984         PyVarObject_HEAD_INIT(NULL, 0)
1985         "bz2.BZ2Decompressor",  /*tp_name*/
1986         sizeof(BZ2DecompObject), /*tp_basicsize*/
1987         0,                      /*tp_itemsize*/
1988         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1989         0,                      /*tp_print*/
1990         0,                      /*tp_getattr*/
1991         0,                      /*tp_setattr*/
1992         0,                      /*tp_compare*/
1993         0,                      /*tp_repr*/
1994         0,                      /*tp_as_number*/
1995         0,                      /*tp_as_sequence*/
1996         0,                      /*tp_as_mapping*/
1997         0,                      /*tp_hash*/
1998         0,                      /*tp_call*/
1999         0,                      /*tp_str*/
2000         PyObject_GenericGetAttr,/*tp_getattro*/
2001         PyObject_GenericSetAttr,/*tp_setattro*/
2002         0,                      /*tp_as_buffer*/
2003         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2004         BZ2Decomp__doc__,       /*tp_doc*/
2005         0,                      /*tp_traverse*/
2006         0,                      /*tp_clear*/
2007         0,                      /*tp_richcompare*/
2008         0,                      /*tp_weaklistoffset*/
2009         0,                      /*tp_iter*/
2010         0,                      /*tp_iternext*/
2011         BZ2Decomp_methods,      /*tp_methods*/
2012         BZ2Decomp_members,      /*tp_members*/
2013         0,                      /*tp_getset*/
2014         0,                      /*tp_base*/
2015         0,                      /*tp_dict*/
2016         0,                      /*tp_descr_get*/
2017         0,                      /*tp_descr_set*/
2018         0,                      /*tp_dictoffset*/
2019         (initproc)BZ2Decomp_init, /*tp_init*/
2020         PyType_GenericAlloc,    /*tp_alloc*/
2021         PyType_GenericNew,      /*tp_new*/
2022         _PyObject_Del,          /*tp_free*/
2023         0,                      /*tp_is_gc*/
2024 };
2025
2026
2027 /* ===================================================================== */
2028 /* Module functions. */
2029
2030 PyDoc_STRVAR(bz2_compress__doc__,
2031 "compress(data [, compresslevel=9]) -> string\n\
2032 \n\
2033 Compress data in one shot. If you want to compress data sequentially,\n\
2034 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2035 given, must be a number between 1 and 9.\n\
2036 ");
2037
2038 static PyObject *
2039 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2040 {
2041         int compresslevel=9;
2042         char *data;
2043         int datasize;
2044         int bufsize;
2045         PyObject *ret = NULL;
2046         bz_stream _bzs;
2047         bz_stream *bzs = &_bzs;
2048         int bzerror;
2049         static char *kwlist[] = {"data", "compresslevel", 0};
2050
2051         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2052                                          kwlist, &data, &datasize,
2053                                          &compresslevel))
2054                 return NULL;
2055
2056         if (compresslevel < 1 || compresslevel > 9) {
2057                 PyErr_SetString(PyExc_ValueError,
2058                                 "compresslevel must be between 1 and 9");
2059                 return NULL;
2060         }
2061
2062         /* Conforming to bz2 manual, this is large enough to fit compressed
2063          * data in one shot. We will check it later anyway. */
2064         bufsize = datasize + (datasize/100+1) + 600;
2065
2066         ret = PyString_FromStringAndSize(NULL, bufsize);
2067         if (!ret)
2068                 return NULL;
2069
2070         memset(bzs, 0, sizeof(bz_stream));
2071
2072         bzs->next_in = data;
2073         bzs->avail_in = datasize;
2074         bzs->next_out = BUF(ret);
2075         bzs->avail_out = bufsize;
2076
2077         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2078         if (bzerror != BZ_OK) {
2079                 Util_CatchBZ2Error(bzerror);
2080                 Py_DECREF(ret);
2081                 return NULL;
2082         }
2083
2084         for (;;) {
2085                 Py_BEGIN_ALLOW_THREADS
2086                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2087                 Py_END_ALLOW_THREADS
2088                 if (bzerror == BZ_STREAM_END) {
2089                         break;
2090                 } else if (bzerror != BZ_FINISH_OK) {
2091                         BZ2_bzCompressEnd(bzs);
2092                         Util_CatchBZ2Error(bzerror);
2093                         Py_DECREF(ret);
2094                         return NULL;
2095                 }
2096                 if (bzs->avail_out == 0) {
2097                         bufsize = Util_NewBufferSize(bufsize);
2098                         if (_PyString_Resize(&ret, bufsize) < 0) {
2099                                 BZ2_bzCompressEnd(bzs);
2100                                 Py_DECREF(ret);
2101                                 return NULL;
2102                         }
2103                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2104                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2105                 }
2106         }
2107
2108         if (bzs->avail_out != 0)
2109                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2110         BZ2_bzCompressEnd(bzs);
2111
2112         return ret;
2113 }
2114
2115 PyDoc_STRVAR(bz2_decompress__doc__,
2116 "decompress(data) -> decompressed data\n\
2117 \n\
2118 Decompress data in one shot. If you want to decompress data sequentially,\n\
2119 use an instance of BZ2Decompressor instead.\n\
2120 ");
2121
2122 static PyObject *
2123 bz2_decompress(PyObject *self, PyObject *args)
2124 {
2125         char *data;
2126         int datasize;
2127         int bufsize = SMALLCHUNK;
2128         PyObject *ret;
2129         bz_stream _bzs;
2130         bz_stream *bzs = &_bzs;
2131         int bzerror;
2132
2133         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2134                 return NULL;
2135
2136         if (datasize == 0)
2137                 return PyString_FromString("");
2138
2139         ret = PyString_FromStringAndSize(NULL, bufsize);
2140         if (!ret)
2141                 return NULL;
2142
2143         memset(bzs, 0, sizeof(bz_stream));
2144
2145         bzs->next_in = data;
2146         bzs->avail_in = datasize;
2147         bzs->next_out = BUF(ret);
2148         bzs->avail_out = bufsize;
2149
2150         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2151         if (bzerror != BZ_OK) {
2152                 Util_CatchBZ2Error(bzerror);
2153                 Py_DECREF(ret);
2154                 return NULL;
2155         }
2156
2157         for (;;) {
2158                 Py_BEGIN_ALLOW_THREADS
2159                 bzerror = BZ2_bzDecompress(bzs);
2160                 Py_END_ALLOW_THREADS
2161                 if (bzerror == BZ_STREAM_END) {
2162                         break;
2163                 } else if (bzerror != BZ_OK) {
2164                         BZ2_bzDecompressEnd(bzs);
2165                         Util_CatchBZ2Error(bzerror);
2166                         Py_DECREF(ret);
2167                         return NULL;
2168                 }
2169                 if (bzs->avail_in == 0) {
2170                         BZ2_bzDecompressEnd(bzs);
2171                         PyErr_SetString(PyExc_ValueError,
2172                                         "couldn't find end of stream");
2173                         Py_DECREF(ret);
2174                         return NULL;
2175                 }
2176                 if (bzs->avail_out == 0) {
2177                         bufsize = Util_NewBufferSize(bufsize);
2178                         if (_PyString_Resize(&ret, bufsize) < 0) {
2179                                 BZ2_bzDecompressEnd(bzs);
2180                                 Py_DECREF(ret);
2181                                 return NULL;
2182                         }
2183                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2184                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2185                 }
2186         }
2187
2188         if (bzs->avail_out != 0)
2189                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2190         BZ2_bzDecompressEnd(bzs);
2191
2192         return ret;
2193 }
2194
2195 static PyMethodDef bz2_methods[] = {
2196         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2197                 bz2_compress__doc__},
2198         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2199                 bz2_decompress__doc__},
2200         {NULL,          NULL}           /* sentinel */
2201 };
2202
2203 /* ===================================================================== */
2204 /* Initialization function. */
2205
2206 PyDoc_STRVAR(bz2__doc__,
2207 "The python bz2 module provides a comprehensive interface for\n\
2208 the bz2 compression library. It implements a complete file\n\
2209 interface, one shot (de)compression functions, and types for\n\
2210 sequential (de)compression.\n\
2211 ");
2212
2213 PyMODINIT_FUNC
2214 initbz2(void)
2215 {
2216         PyObject *m;
2217
2218         Py_TYPE(&BZ2File_Type) = &PyType_Type;
2219         Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2220         Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2221
2222         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2223         if (m == NULL)
2224                 return;
2225
2226         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2227
2228         Py_INCREF(&BZ2File_Type);
2229         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2230
2231         Py_INCREF(&BZ2Comp_Type);
2232         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2233
2234         Py_INCREF(&BZ2Decomp_Type);
2235         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2236 }