Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int bytes_read;
 239         int newlinetypes = f->f_newlinetypes;
 240         int skipnextlf = f->f_skipnextlf;
 241         int univ_newline = f->f_univ_newline;
 242
 243         total_v_size = n > 0 ? n : 100;
 244         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 245         if (v == NULL)
 246                 return NULL;
 247
 248         buf = BUF(v);
 249         end = buf + total_v_size;
 250
 251         for (;;) {
 252                 Py_BEGIN_ALLOW_THREADS
 253                 while (buf != end) {
 254                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                         f->pos++;
 256                         if (bytes_read == 0) break;
 257                         if (univ_newline) {
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with skipnextlf true means we
 262                                                  * saw a \r before.
 263                                                  */
 264                                                 newlinetypes |= NEWLINE_CRLF;
 265                                                 if (bzerror != BZ_OK) break;
 266                                                 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 267                                                 f->pos++;
 268                                                 if (bytes_read == 0) break;
 269                                         } else {
 270                                                 newlinetypes |= NEWLINE_CR;
 271                                         }
 272                                 }
 273                                 if (c == '\r') {
 274                                         skipnextlf = 1;
 275                                         c = '\n';
 276                                 } else if (c == '\n')
 277                                         newlinetypes |= NEWLINE_LF;
 278                         }
 279                         *buf++ = c;
 280                         if (bzerror != BZ_OK || c == '\n') break;
 281                 }
 282                 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
 283                         newlinetypes |= NEWLINE_CR;
 284                 Py_END_ALLOW_THREADS
 285                 f->f_newlinetypes = newlinetypes;
 286                 f->f_skipnextlf = skipnextlf;
 287                 if (bzerror == BZ_STREAM_END) {
 288                         f->size = f->pos;
 289                         f->mode = MODE_READ_EOF;
 290                         break;
 291                 } else if (bzerror != BZ_OK) {
 292                         Util_CatchBZ2Error(bzerror);
 293                         Py_DECREF(v);
 294                         return NULL;
 295                 }
 296                 if (c == '\n')
 297                         break;
 298                 /* Must be because buf == end */
 299                 if (n > 0)
 300                         break;
 301                 used_v_size = total_v_size;
 302                 increment = total_v_size >> 2; /* mild exponential growth */
 303                 total_v_size += increment;
 304                 if (total_v_size > INT_MAX) {
 305                         PyErr_SetString(PyExc_OverflowError,
 306                             "line is longer than a Python string can hold");
 307                         Py_DECREF(v);
 308                         return NULL;
 309                 }
 310                 if (_PyString_Resize(&v, total_v_size) < 0)
 311                         return NULL;
 312                 buf = BUF(v) + used_v_size;
 313                 end = BUF(v) + total_v_size;
 314         }
 315
 316         used_v_size = buf - BUF(v);
 317         if (used_v_size != total_v_size)
 318                 _PyString_Resize(&v, used_v_size);
 319         return v;
 320 }
 321
 322 /* This is a hacked version of Python's
 323  * fileobject.c:Py_UniversalNewlineFread(). */
 324 size_t
 325 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 326                      char* buf, size_t n, BZ2FileObject *f)
 327 {
 328         char *dst = buf;
 329         int newlinetypes, skipnextlf;
 330
 331         assert(buf != NULL);
 332         assert(stream != NULL);
 333
 334         if (!f->f_univ_newline)
 335                 return BZ2_bzRead(bzerror, stream, buf, n);
 336
 337         newlinetypes = f->f_newlinetypes;
 338         skipnextlf = f->f_skipnextlf;
 339
 340         /* Invariant:  n is the number of bytes remaining to be filled
 341          * in the buffer.
 342          */
 343         while (n) {
 344                 size_t nread;
 345                 int shortread;
 346                 char *src = dst;
 347
 348                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 349                 assert(nread <= n);
 350                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 351                 shortread = n != 0;     /* true iff EOF or error */
 352                 while (nread--) {
 353                         char c = *src++;
 354                         if (c == '\r') {
 355                                 /* Save as LF and set flag to skip next LF. */
 356                                 *dst++ = '\n';
 357                                 skipnextlf = 1;
 358                         }
 359                         else if (skipnextlf && c == '\n') {
 360                                 /* Skip LF, and remember we saw CR LF. */
 361                                 skipnextlf = 0;
 362                                 newlinetypes |= NEWLINE_CRLF;
 363                                 ++n;
 364                         }
 365                         else {
 366                                 /* Normal char to be stored in buffer.  Also
 367                                  * update the newlinetypes flag if either this
 368                                  * is an LF or the previous char was a CR.
 369                                  */
 370                                 if (c == '\n')
 371                                         newlinetypes |= NEWLINE_LF;
 372                                 else if (skipnextlf)
 373                                         newlinetypes |= NEWLINE_CR;
 374                                 *dst++ = c;
 375                                 skipnextlf = 0;
 376                         }
 377                 }
 378                 if (shortread) {
 379                         /* If this is EOF, update type flags. */
 380                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 381                                 newlinetypes |= NEWLINE_CR;
 382                         break;
 383                 }
 384         }
 385         f->f_newlinetypes = newlinetypes;
 386         f->f_skipnextlf = skipnextlf;
 387         return dst - buf;
 388 }
 389
 390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 391 static void
 392 Util_DropReadAhead(BZ2FileObject *f)
 393 {
 394         if (f->f_buf != NULL) {
 395                 PyMem_Free(f->f_buf);
 396                 f->f_buf = NULL;
 397         }
 398 }
 399
 400 /* This is a hacked version of Python's fileobject.c:readahead(). */
 401 static int
 402 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 403 {
 404         int chunksize;
 405         int bzerror;
 406
 407         if (f->f_buf != NULL) {
 408                 if((f->f_bufend - f->f_bufptr) >= 1)
 409                         return 0;
 410                 else
 411                         Util_DropReadAhead(f);
 412         }
 413         if (f->mode == MODE_READ_EOF) {
 414                 f->f_bufptr = f->f_buf;
 415                 f->f_bufend = f->f_buf;
 416                 return 0;
 417         }
 418         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 419                 PyErr_NoMemory();
 420                 return -1;
 421         }
 422         Py_BEGIN_ALLOW_THREADS
 423         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 424                                          bufsize, f);
 425         Py_END_ALLOW_THREADS
 426         f->pos += chunksize;
 427         if (bzerror == BZ_STREAM_END) {
 428                 f->size = f->pos;
 429                 f->mode = MODE_READ_EOF;
 430         } else if (bzerror != BZ_OK) {
 431                 Util_CatchBZ2Error(bzerror);
 432                 Util_DropReadAhead(f);
 433                 return -1;
 434         }
 435         f->f_bufptr = f->f_buf;
 436         f->f_bufend = f->f_buf + chunksize;
 437         return 0;
 438 }
 439
 440 /* This is a hacked version of Python's
 441  * fileobject.c:readahead_get_line_skip(). */
 442 static PyStringObject *
 443 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 444 {
 445         PyStringObject* s;
 446         char *bufptr;
 447         char *buf;
 448         int len;
 449
 450         if (f->f_buf == NULL)
 451                 if (Util_ReadAhead(f, bufsize) < 0)
 452                         return NULL;
 453
 454         len = f->f_bufend - f->f_bufptr;
 455         if (len == 0)
 456                 return (PyStringObject *)
 457                         PyString_FromStringAndSize(NULL, skip);
 458         bufptr = memchr(f->f_bufptr, '\n', len);
 459         if (bufptr != NULL) {
 460                 bufptr++;                       /* Count the '\n' */
 461                 len = bufptr - f->f_bufptr;
 462                 s = (PyStringObject *)
 463                         PyString_FromStringAndSize(NULL, skip+len);
 464                 if (s == NULL)
 465                         return NULL;
 466                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 467                 f->f_bufptr = bufptr;
 468                 if (bufptr == f->f_bufend)
 469                         Util_DropReadAhead(f);
 470         } else {
 471                 bufptr = f->f_bufptr;
 472                 buf = f->f_buf;
 473                 f->f_buf = NULL;        /* Force new readahead buffer */
 474                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 475                                               bufsize + (bufsize>>2));
 476                 if (s == NULL) {
 477                         PyMem_Free(buf);
 478                         return NULL;
 479                 }
 480                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 481                 PyMem_Free(buf);
 482         }
 483         return s;
 484 }
 485
 486 /* ===================================================================== */
 487 /* Methods of BZ2File. */
 488
 489 PyDoc_STRVAR(BZ2File_read__doc__,
 490 "read([size]) -> string\n\
 491 \n\
 492 Read at most size uncompressed bytes, returned as a string. If the size\n\
 493 argument is negative or omitted, read until EOF is reached.\n\
 494 ");
 495
 496 /* This is a hacked version of Python's fileobject.c:file_read(). */
 497 static PyObject *
 498 BZ2File_read(BZ2FileObject *self, PyObject *args)
 499 {
 500         long bytesrequested = -1;
 501         size_t bytesread, buffersize, chunksize;
 502         int bzerror;
 503         PyObject *ret = NULL;
 504
 505         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 506                 return NULL;
 507
 508         ACQUIRE_LOCK(self);
 509         switch (self->mode) {
 510                 case MODE_READ:
 511                         break;
 512                 case MODE_READ_EOF:
 513                         ret = PyString_FromString("");
 514                         goto cleanup;
 515                 case MODE_CLOSED:
 516                         PyErr_SetString(PyExc_ValueError,
 517                                         "I/O operation on closed file");
 518                         goto cleanup;
 519                 default:
 520                         PyErr_SetString(PyExc_IOError,
 521                                         "file is not ready for reading");
 522                         goto cleanup;
 523         }
 524
 525         if (bytesrequested < 0)
 526                 buffersize = Util_NewBufferSize((size_t)0);
 527         else
 528                 buffersize = bytesrequested;
 529         if (buffersize > INT_MAX) {
 530                 PyErr_SetString(PyExc_OverflowError,
 531                                 "requested number of bytes is "
 532                                 "more than a Python string can hold");
 533                 goto cleanup;
 534         }
 535         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 536         if (ret == NULL)
 537                 goto cleanup;
 538         bytesread = 0;
 539
 540         for (;;) {
 541                 Py_BEGIN_ALLOW_THREADS
 542                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 543                                                  BUF(ret)+bytesread,
 544                                                  buffersize-bytesread,
 545                                                  self);
 546                 self->pos += chunksize;
 547                 Py_END_ALLOW_THREADS
 548                 bytesread += chunksize;
 549                 if (bzerror == BZ_STREAM_END) {
 550                         self->size = self->pos;
 551                         self->mode = MODE_READ_EOF;
 552                         break;
 553                 } else if (bzerror != BZ_OK) {
 554                         Util_CatchBZ2Error(bzerror);
 555                         Py_DECREF(ret);
 556                         ret = NULL;
 557                         goto cleanup;
 558                 }
 559                 if (bytesrequested < 0) {
 560                         buffersize = Util_NewBufferSize(buffersize);
 561                         if (_PyString_Resize(&ret, buffersize) < 0)
 562                                 goto cleanup;
 563                 } else {
 564                         break;
 565                 }
 566         }
 567         if (bytesread != buffersize)
 568                 _PyString_Resize(&ret, bytesread);
 569
 570 cleanup:
 571         RELEASE_LOCK(self);
 572         return ret;
 573 }
 574
 575 PyDoc_STRVAR(BZ2File_readline__doc__,
 576 "readline([size]) -> string\n\
 577 \n\
 578 Return the next line from the file, as a string, retaining newline.\n\
 579 A non-negative size argument will limit the maximum number of bytes to\n\
 580 return (an incomplete line may be returned then). Return an empty\n\
 581 string at EOF.\n\
 582 ");
 583
 584 static PyObject *
 585 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 586 {
 587         PyObject *ret = NULL;
 588         int sizehint = -1;
 589
 590         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 591                 return NULL;
 592
 593         ACQUIRE_LOCK(self);
 594         switch (self->mode) {
 595                 case MODE_READ:
 596                         break;
 597                 case MODE_READ_EOF:
 598                         ret = PyString_FromString("");
 599                         goto cleanup;
 600                 case MODE_CLOSED:
 601                         PyErr_SetString(PyExc_ValueError,
 602                                         "I/O operation on closed file");
 603                         goto cleanup;
 604                 default:
 605                         PyErr_SetString(PyExc_IOError,
 606                                         "file is not ready for reading");
 607                         goto cleanup;
 608         }
 609
 610         if (sizehint == 0)
 611                 ret = PyString_FromString("");
 612         else
 613                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 614
 615 cleanup:
 616         RELEASE_LOCK(self);
 617         return ret;
 618 }
 619
 620 PyDoc_STRVAR(BZ2File_readlines__doc__,
 621 "readlines([size]) -> list\n\
 622 \n\
 623 Call readline() repeatedly and return a list of lines read.\n\
 624 The optional size argument, if given, is an approximate bound on the\n\
 625 total number of bytes in the lines returned.\n\
 626 ");
 627
 628 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 629 static PyObject *
 630 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 631 {
 632         long sizehint = 0;
 633         PyObject *list = NULL;
 634         PyObject *line;
 635         char small_buffer[SMALLCHUNK];
 636         char *buffer = small_buffer;
 637         size_t buffersize = SMALLCHUNK;
 638         PyObject *big_buffer = NULL;
 639         size_t nfilled = 0;
 640         size_t nread;
 641         size_t totalread = 0;
 642         char *p, *q, *end;
 643         int err;
 644         int shortread = 0;
 645         int bzerror;
 646
 647         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 648                 return NULL;
 649
 650         ACQUIRE_LOCK(self);
 651         switch (self->mode) {
 652                 case MODE_READ:
 653                         break;
 654                 case MODE_READ_EOF:
 655                         list = PyList_New(0);
 656                         goto cleanup;
 657                 case MODE_CLOSED:
 658                         PyErr_SetString(PyExc_ValueError,
 659                                         "I/O operation on closed file");
 660                         goto cleanup;
 661                 default:
 662                         PyErr_SetString(PyExc_IOError,
 663                                         "file is not ready for reading");
 664                         goto cleanup;
 665         }
 666
 667         if ((list = PyList_New(0)) == NULL)
 668                 goto cleanup;
 669
 670         for (;;) {
 671                 Py_BEGIN_ALLOW_THREADS
 672                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 673                                              buffer+nfilled,
 674                                              buffersize-nfilled, self);
 675                 self->pos += nread;
 676                 Py_END_ALLOW_THREADS
 677                 if (bzerror == BZ_STREAM_END) {
 678                         self->size = self->pos;
 679                         self->mode = MODE_READ_EOF;
 680                         if (nread == 0) {
 681                                 sizehint = 0;
 682                                 break;
 683                         }
 684                         shortread = 1;
 685                 } else if (bzerror != BZ_OK) {
 686                         Util_CatchBZ2Error(bzerror);
 687                   error:
 688                         Py_DECREF(list);
 689                         list = NULL;
 690                         goto cleanup;
 691                 }
 692                 totalread += nread;
 693                 p = memchr(buffer+nfilled, '\n', nread);
 694                 if (!shortread && p == NULL) {
 695                         /* Need a larger buffer to fit this line */
 696                         nfilled += nread;
 697                         buffersize *= 2;
 698                         if (buffersize > INT_MAX) {
 699                                 PyErr_SetString(PyExc_OverflowError,
 700                                 "line is longer than a Python string can hold");
 701                                 goto error;
 702                         }
 703                         if (big_buffer == NULL) {
 704                                 /* Create the big buffer */
 705                                 big_buffer = PyString_FromStringAndSize(
 706                                         NULL, buffersize);
 707                                 if (big_buffer == NULL)
 708                                         goto error;
 709                                 buffer = PyString_AS_STRING(big_buffer);
 710                                 memcpy(buffer, small_buffer, nfilled);
 711                         }
 712                         else {
 713                                 /* Grow the big buffer */
 714                                 _PyString_Resize(&big_buffer, buffersize);
 715                                 buffer = PyString_AS_STRING(big_buffer);
 716                         }
 717                         continue;
 718                 }
 719                 end = buffer+nfilled+nread;
 720                 q = buffer;
 721                 while (p != NULL) {
 722                         /* Process complete lines */
 723                         p++;
 724                         line = PyString_FromStringAndSize(q, p-q);
 725                         if (line == NULL)
 726                                 goto error;
 727                         err = PyList_Append(list, line);
 728                         Py_DECREF(line);
 729                         if (err != 0)
 730                                 goto error;
 731                         q = p;
 732                         p = memchr(q, '\n', end-q);
 733                 }
 734                 /* Move the remaining incomplete line to the start */
 735                 nfilled = end-q;
 736                 memmove(buffer, q, nfilled);
 737                 if (sizehint > 0)
 738                         if (totalread >= (size_t)sizehint)
 739                                 break;
 740                 if (shortread) {
 741                         sizehint = 0;
 742                         break;
 743                 }
 744         }
 745         if (nfilled != 0) {
 746                 /* Partial last line */
 747                 line = PyString_FromStringAndSize(buffer, nfilled);
 748                 if (line == NULL)
 749                         goto error;
 750                 if (sizehint > 0) {
 751                         /* Need to complete the last line */
 752                         PyObject *rest = Util_GetLine(self, 0);
 753                         if (rest == NULL) {
 754                                 Py_DECREF(line);
 755                                 goto error;
 756                         }
 757                         PyString_Concat(&line, rest);
 758                         Py_DECREF(rest);
 759                         if (line == NULL)
 760                                 goto error;
 761                 }
 762                 err = PyList_Append(list, line);
 763                 Py_DECREF(line);
 764                 if (err != 0)
 765                         goto error;
 766         }
 767
 768   cleanup:
 769         RELEASE_LOCK(self);
 770         if (big_buffer) {
 771                 Py_DECREF(big_buffer);
 772         }
 773         return list;
 774 }
 775
 776 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 777 "xreadlines() -> self\n\
 778 \n\
 779 For backward compatibility. BZ2File objects now include the performance\n\
 780 optimizations previously implemented in the xreadlines module.\n\
 781 ");
 782
 783 PyDoc_STRVAR(BZ2File_write__doc__,
 784 "write(data) -> None\n\
 785 \n\
 786 Write the 'data' string to file. Note that due to buffering, close() may\n\
 787 be needed before the file on disk reflects the data written.\n\
 788 ");
 789
 790 /* This is a hacked version of Python's fileobject.c:file_write(). */
 791 static PyObject *
 792 BZ2File_write(BZ2FileObject *self, PyObject *args)
 793 {
 794         PyObject *ret = NULL;
 795         Py_buffer pbuf;
 796         char *buf;
 797         int len;
 798         int bzerror;
 799
 800         if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
 801                 return NULL;
 802         buf = pbuf.buf;
 803         len = pbuf.len;
 804
 805         ACQUIRE_LOCK(self);
 806         switch (self->mode) {
 807                 case MODE_WRITE:
 808                         break;
 809
 810                 case MODE_CLOSED:
 811                         PyErr_SetString(PyExc_ValueError,
 812                                         "I/O operation on closed file");
 813                         goto cleanup;
 814
 815                 default:
 816                         PyErr_SetString(PyExc_IOError,
 817                                         "file is not ready for writing");
 818                         goto cleanup;
 819         }
 820
 821         self->f_softspace = 0;
 822
 823         Py_BEGIN_ALLOW_THREADS
 824         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 825         self->pos += len;
 826         Py_END_ALLOW_THREADS
 827
 828         if (bzerror != BZ_OK) {
 829                 Util_CatchBZ2Error(bzerror);
 830                 goto cleanup;
 831         }
 832
 833         Py_INCREF(Py_None);
 834         ret = Py_None;
 835
 836 cleanup:
 837         PyBuffer_Release(&pbuf);
 838         RELEASE_LOCK(self);
 839         return ret;
 840 }
 841
 842 PyDoc_STRVAR(BZ2File_writelines__doc__,
 843 "writelines(sequence_of_strings) -> None\n\
 844 \n\
 845 Write the sequence of strings to the file. Note that newlines are not\n\
 846 added. The sequence can be any iterable object producing strings. This is\n\
 847 equivalent to calling write() for each string.\n\
 848 ");
 849
 850 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 851 static PyObject *
 852 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 853 {
 854 #define CHUNKSIZE 1000
 855         PyObject *list = NULL;
 856         PyObject *iter = NULL;
 857         PyObject *ret = NULL;
 858         PyObject *line;
 859         int i, j, index, len, islist;
 860         int bzerror;
 861
 862         ACQUIRE_LOCK(self);
 863         switch (self->mode) {
 864                 case MODE_WRITE:
 865                         break;
 866
 867                 case MODE_CLOSED:
 868                         PyErr_SetString(PyExc_ValueError,
 869                                         "I/O operation on closed file");
 870                         goto error;
 871
 872                 default:
 873                         PyErr_SetString(PyExc_IOError,
 874                                         "file is not ready for writing");
 875                         goto error;
 876         }
 877
 878         islist = PyList_Check(seq);
 879         if  (!islist) {
 880                 iter = PyObject_GetIter(seq);
 881                 if (iter == NULL) {
 882                         PyErr_SetString(PyExc_TypeError,
 883                                 "writelines() requires an iterable argument");
 884                         goto error;
 885                 }
 886                 list = PyList_New(CHUNKSIZE);
 887                 if (list == NULL)
 888                         goto error;
 889         }
 890
 891         /* Strategy: slurp CHUNKSIZE lines into a private list,
 892            checking that they are all strings, then write that list
 893            without holding the interpreter lock, then come back for more. */
 894         for (index = 0; ; index += CHUNKSIZE) {
 895                 if (islist) {
 896                         Py_XDECREF(list);
 897                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 898                         if (list == NULL)
 899                                 goto error;
 900                         j = PyList_GET_SIZE(list);
 901                 }
 902                 else {
 903                         for (j = 0; j < CHUNKSIZE; j++) {
 904                                 line = PyIter_Next(iter);
 905                                 if (line == NULL) {
 906                                         if (PyErr_Occurred())
 907                                                 goto error;
 908                                         break;
 909                                 }
 910                                 PyList_SetItem(list, j, line);
 911                         }
 912                 }
 913                 if (j == 0)
 914                         break;
 915
 916                 /* Check that all entries are indeed strings. If not,
 917                    apply the same rules as for file.write() and
 918                    convert the rets to strings. This is slow, but
 919                    seems to be the only way since all conversion APIs
 920                    could potentially execute Python code. */
 921                 for (i = 0; i < j; i++) {
 922                         PyObject *v = PyList_GET_ITEM(list, i);
 923                         if (!PyString_Check(v)) {
 924                                 const char *buffer;
 925                                 Py_ssize_t len;
 926                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 927                                         PyErr_SetString(PyExc_TypeError,
 928                                                         "writelines() "
 929                                                         "argument must be "
 930                                                         "a sequence of "
 931                                                         "strings");
 932                                         goto error;
 933                                 }
 934                                 line = PyString_FromStringAndSize(buffer,
 935                                                                   len);
 936                                 if (line == NULL)
 937                                         goto error;
 938                                 Py_DECREF(v);
 939                                 PyList_SET_ITEM(list, i, line);
 940                         }
 941                 }
 942
 943                 self->f_softspace = 0;
 944
 945                 /* Since we are releasing the global lock, the
 946                    following code may *not* execute Python code. */
 947                 Py_BEGIN_ALLOW_THREADS
 948                 for (i = 0; i < j; i++) {
 949                         line = PyList_GET_ITEM(list, i);
 950                         len = PyString_GET_SIZE(line);
 951                         BZ2_bzWrite (&bzerror, self->fp,
 952                                      PyString_AS_STRING(line), len);
 953                         if (bzerror != BZ_OK) {
 954                                 Py_BLOCK_THREADS
 955                                 Util_CatchBZ2Error(bzerror);
 956                                 goto error;
 957                         }
 958                 }
 959                 Py_END_ALLOW_THREADS
 960
 961                 if (j < CHUNKSIZE)
 962                         break;
 963         }
 964
 965         Py_INCREF(Py_None);
 966         ret = Py_None;
 967
 968   error:
 969         RELEASE_LOCK(self);
 970         Py_XDECREF(list);
 971         Py_XDECREF(iter);
 972         return ret;
 973 #undef CHUNKSIZE
 974 }
 975
 976 PyDoc_STRVAR(BZ2File_seek__doc__,
 977 "seek(offset [, whence]) -> None\n\
 978 \n\
 979 Move to new file position. Argument offset is a byte count. Optional\n\
 980 argument whence defaults to 0 (offset from start of file, offset\n\
 981 should be >= 0); other values are 1 (move relative to current position,\n\
 982 positive or negative), and 2 (move relative to end of file, usually\n\
 983 negative, although many platforms allow seeking beyond the end of a file).\n\
 984 \n\
 985 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 986 the operation may be extremely slow.\n\
 987 ");
 988
 989 static PyObject *
 990 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 991 {
 992         int where = 0;
 993         PyObject *offobj;
 994         Py_off_t offset;
 995         char small_buffer[SMALLCHUNK];
 996         char *buffer = small_buffer;
 997         size_t buffersize = SMALLCHUNK;
 998         Py_off_t bytesread = 0;
 999         size_t readsize;
1000         int chunksize;
1001         int bzerror;
1002         PyObject *ret = NULL;
1003
1004         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1005                 return NULL;
1006 #if !defined(HAVE_LARGEFILE_SUPPORT)
1007         offset = PyInt_AsLong(offobj);
1008 #else
1009         offset = PyLong_Check(offobj) ?
1010                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1011 #endif
1012         if (PyErr_Occurred())
1013                 return NULL;
1014
1015         ACQUIRE_LOCK(self);
1016         Util_DropReadAhead(self);
1017         switch (self->mode) {
1018                 case MODE_READ:
1019                 case MODE_READ_EOF:
1020                         break;
1021
1022                 case MODE_CLOSED:
1023                         PyErr_SetString(PyExc_ValueError,
1024                                         "I/O operation on closed file");
1025                         goto cleanup;
1026
1027                 default:
1028                         PyErr_SetString(PyExc_IOError,
1029                                         "seek works only while reading");
1030                         goto cleanup;
1031         }
1032
1033         if (where == 2) {
1034                 if (self->size == -1) {
1035                         assert(self->mode != MODE_READ_EOF);
1036                         for (;;) {
1037                                 Py_BEGIN_ALLOW_THREADS
1038                                 chunksize = Util_UnivNewlineRead(
1039                                                 &bzerror, self->fp,
1040                                                 buffer, buffersize,
1041                                                 self);
1042                                 self->pos += chunksize;
1043                                 Py_END_ALLOW_THREADS
1044
1045                                 bytesread += chunksize;
1046                                 if (bzerror == BZ_STREAM_END) {
1047                                         break;
1048                                 } else if (bzerror != BZ_OK) {
1049                                         Util_CatchBZ2Error(bzerror);
1050                                         goto cleanup;
1051                                 }
1052                         }
1053                         self->mode = MODE_READ_EOF;
1054                         self->size = self->pos;
1055                         bytesread = 0;
1056                 }
1057                 offset = self->size + offset;
1058         } else if (where == 1) {
1059                 offset = self->pos + offset;
1060         }
1061
1062         /* Before getting here, offset must be the absolute position the file
1063          * pointer should be set to. */
1064
1065         if (offset >= self->pos) {
1066                 /* we can move forward */
1067                 offset -= self->pos;
1068         } else {
1069                 /* we cannot move back, so rewind the stream */
1070                 BZ2_bzReadClose(&bzerror, self->fp);
1071                 if (self->fp) {
1072                         PyFile_DecUseCount((PyFileObject *)self->file);
1073                         self->fp = NULL;
1074                 }
1075                 if (bzerror != BZ_OK) {
1076                         Util_CatchBZ2Error(bzerror);
1077                         goto cleanup;
1078                 }
1079                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1080                 if (!ret)
1081                         goto cleanup;
1082                 Py_DECREF(ret);
1083                 ret = NULL;
1084                 self->pos = 0;
1085                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1086                                           0, 0, NULL, 0);
1087                 if (self->fp)
1088                         PyFile_IncUseCount((PyFileObject *)self->file);
1089                 if (bzerror != BZ_OK) {
1090                         Util_CatchBZ2Error(bzerror);
1091                         goto cleanup;
1092                 }
1093                 self->mode = MODE_READ;
1094         }
1095
1096         if (offset <= 0 || self->mode == MODE_READ_EOF)
1097                 goto exit;
1098
1099         /* Before getting here, offset must be set to the number of bytes
1100          * to walk forward. */
1101         for (;;) {
1102                 if (offset-bytesread > buffersize)
1103                         readsize = buffersize;
1104                 else
1105                         /* offset might be wider that readsize, but the result
1106                          * of the subtraction is bound by buffersize (see the
1107                          * condition above). buffersize is 8192. */
1108                         readsize = (size_t)(offset-bytesread);
1109                 Py_BEGIN_ALLOW_THREADS
1110                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1111                                                  buffer, readsize, self);
1112                 self->pos += chunksize;
1113                 Py_END_ALLOW_THREADS
1114                 bytesread += chunksize;
1115                 if (bzerror == BZ_STREAM_END) {
1116                         self->size = self->pos;
1117                         self->mode = MODE_READ_EOF;
1118                         break;
1119                 } else if (bzerror != BZ_OK) {
1120                         Util_CatchBZ2Error(bzerror);
1121                         goto cleanup;
1122                 }
1123                 if (bytesread == offset)
1124                         break;
1125         }
1126
1127 exit:
1128         Py_INCREF(Py_None);
1129         ret = Py_None;
1130
1131 cleanup:
1132         RELEASE_LOCK(self);
1133         return ret;
1134 }
1135
1136 PyDoc_STRVAR(BZ2File_tell__doc__,
1137 "tell() -> int\n\
1138 \n\
1139 Return the current file position, an integer (may be a long integer).\n\
1140 ");
1141
1142 static PyObject *
1143 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1144 {
1145         PyObject *ret = NULL;
1146
1147         if (self->mode == MODE_CLOSED) {
1148                 PyErr_SetString(PyExc_ValueError,
1149                                 "I/O operation on closed file");
1150                 goto cleanup;
1151         }
1152
1153 #if !defined(HAVE_LARGEFILE_SUPPORT)
1154         ret = PyInt_FromLong(self->pos);
1155 #else
1156         ret = PyLong_FromLongLong(self->pos);
1157 #endif
1158
1159 cleanup:
1160         return ret;
1161 }
1162
1163 PyDoc_STRVAR(BZ2File_close__doc__,
1164 "close() -> None or (perhaps) an integer\n\
1165 \n\
1166 Close the file. Sets data attribute .closed to true. A closed file\n\
1167 cannot be used for further I/O operations. close() may be called more\n\
1168 than once without error.\n\
1169 ");
1170
1171 static PyObject *
1172 BZ2File_close(BZ2FileObject *self)
1173 {
1174         PyObject *ret = NULL;
1175         int bzerror = BZ_OK;
1176
1177         ACQUIRE_LOCK(self);
1178         switch (self->mode) {
1179                 case MODE_READ:
1180                 case MODE_READ_EOF:
1181                         BZ2_bzReadClose(&bzerror, self->fp);
1182                         break;
1183                 case MODE_WRITE:
1184                         BZ2_bzWriteClose(&bzerror, self->fp,
1185                                          0, NULL, NULL);
1186                         break;
1187         }
1188         if (self->fp) {
1189                 PyFile_DecUseCount((PyFileObject *)self->file);
1190                 self->fp = NULL;
1191         }
1192         self->mode = MODE_CLOSED;
1193         ret = PyObject_CallMethod(self->file, "close", NULL);
1194         if (bzerror != BZ_OK) {
1195                 Util_CatchBZ2Error(bzerror);
1196                 Py_XDECREF(ret);
1197                 ret = NULL;
1198         }
1199
1200         RELEASE_LOCK(self);
1201         return ret;
1202 }
1203
1204 PyDoc_STRVAR(BZ2File_enter_doc,
1205 "__enter__() -> self.");
1206
1207 static PyObject *
1208 BZ2File_enter(BZ2FileObject *self)
1209 {
1210         if (self->mode == MODE_CLOSED) {
1211                 PyErr_SetString(PyExc_ValueError,
1212                         "I/O operation on closed file");
1213                 return NULL;
1214         }
1215         Py_INCREF(self);
1216         return (PyObject *) self;
1217 }
1218
1219 PyDoc_STRVAR(BZ2File_exit_doc,
1220 "__exit__(*excinfo) -> None.  Closes the file.");
1221
1222 static PyObject *
1223 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1224 {
1225         PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1226         if (!ret)
1227                 /* If error occurred, pass through */
1228                 return NULL;
1229         Py_DECREF(ret);
1230         Py_RETURN_NONE;
1231 }
1232
1233
1234 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1235
1236 static PyMethodDef BZ2File_methods[] = {
1237         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1238         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1239         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1240         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1241         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1242         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1243         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1244         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1245         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1246         {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1247         {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1248         {NULL,          NULL}           /* sentinel */
1249 };
1250
1251
1252 /* ===================================================================== */
1253 /* Getters and setters of BZ2File. */
1254
1255 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1256 static PyObject *
1257 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1258 {
1259         switch (self->f_newlinetypes) {
1260         case NEWLINE_UNKNOWN:
1261                 Py_INCREF(Py_None);
1262                 return Py_None;
1263         case NEWLINE_CR:
1264                 return PyString_FromString("\r");
1265         case NEWLINE_LF:
1266                 return PyString_FromString("\n");
1267         case NEWLINE_CR|NEWLINE_LF:
1268                 return Py_BuildValue("(ss)", "\r", "\n");
1269         case NEWLINE_CRLF:
1270                 return PyString_FromString("\r\n");
1271         case NEWLINE_CR|NEWLINE_CRLF:
1272                 return Py_BuildValue("(ss)", "\r", "\r\n");
1273         case NEWLINE_LF|NEWLINE_CRLF:
1274                 return Py_BuildValue("(ss)", "\n", "\r\n");
1275         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1276                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1277         default:
1278                 PyErr_Format(PyExc_SystemError,
1279                              "Unknown newlines value 0x%x\n",
1280                              self->f_newlinetypes);
1281                 return NULL;
1282         }
1283 }
1284
1285 static PyObject *
1286 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1287 {
1288         return PyInt_FromLong(self->mode == MODE_CLOSED);
1289 }
1290
1291 static PyObject *
1292 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1293 {
1294         return PyObject_GetAttrString(self->file, "mode");
1295 }
1296
1297 static PyObject *
1298 BZ2File_get_name(BZ2FileObject *self, void *closure)
1299 {
1300         return PyObject_GetAttrString(self->file, "name");
1301 }
1302
1303 static PyGetSetDef BZ2File_getset[] = {
1304         {"closed", (getter)BZ2File_get_closed, NULL,
1305                         "True if the file is closed"},
1306         {"newlines", (getter)BZ2File_get_newlines, NULL,
1307                         "end-of-line convention used in this file"},
1308         {"mode", (getter)BZ2File_get_mode, NULL,
1309                         "file mode ('r', 'w', or 'U')"},
1310         {"name", (getter)BZ2File_get_name, NULL,
1311                         "file name"},
1312         {NULL}  /* Sentinel */
1313 };
1314
1315
1316 /* ===================================================================== */
1317 /* Members of BZ2File_Type. */
1318
1319 #undef OFF
1320 #define OFF(x) offsetof(BZ2FileObject, x)
1321
1322 static PyMemberDef BZ2File_members[] = {
1323         {"softspace",   T_INT,          OFF(f_softspace), 0,
1324          "flag indicating that a space needs to be printed; used by print"},
1325         {NULL}  /* Sentinel */
1326 };
1327
1328 /* ===================================================================== */
1329 /* Slot definitions for BZ2File_Type. */
1330
1331 static int
1332 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1333 {
1334         static char *kwlist[] = {"filename", "mode", "buffering",
1335                                        "compresslevel", 0};
1336         PyObject *name;
1337         char *mode = "r";
1338         int buffering = -1;
1339         int compresslevel = 9;
1340         int bzerror;
1341         int mode_char = 0;
1342
1343         self->size = -1;
1344
1345         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1346                                          kwlist, &name, &mode, &buffering,
1347                                          &compresslevel))
1348                 return -1;
1349
1350         if (compresslevel < 1 || compresslevel > 9) {
1351                 PyErr_SetString(PyExc_ValueError,
1352                                 "compresslevel must be between 1 and 9");
1353                 return -1;
1354         }
1355
1356         for (;;) {
1357                 int error = 0;
1358                 switch (*mode) {
1359                         case 'r':
1360                         case 'w':
1361                                 if (mode_char)
1362                                         error = 1;
1363                                 mode_char = *mode;
1364                                 break;
1365
1366                         case 'b':
1367                                 break;
1368
1369                         case 'U':
1370 #ifdef __VMS
1371                                 self->f_univ_newline = 0;
1372 #else
1373                                 self->f_univ_newline = 1;
1374 #endif
1375                                 break;
1376
1377                         default:
1378                                 error = 1;
1379                                 break;
1380                 }
1381                 if (error) {
1382                         PyErr_Format(PyExc_ValueError,
1383                                      "invalid mode char %c", *mode);
1384                         return -1;
1385                 }
1386                 mode++;
1387                 if (*mode == '\0')
1388                         break;
1389         }
1390
1391         if (mode_char == 0) {
1392                 mode_char = 'r';
1393         }
1394
1395         mode = (mode_char == 'r') ? "rb" : "wb";
1396
1397         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1398                                            name, mode, buffering);
1399         if (self->file == NULL)
1400                 return -1;
1401
1402         /* From now on, we have stuff to dealloc, so jump to error label
1403          * instead of returning */
1404
1405 #ifdef WITH_THREAD
1406         self->lock = PyThread_allocate_lock();
1407         if (!self->lock) {
1408                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1409                 goto error;
1410         }
1411 #endif
1412
1413         if (mode_char == 'r')
1414                 self->fp = BZ2_bzReadOpen(&bzerror,
1415                                           PyFile_AsFile(self->file),
1416                                           0, 0, NULL, 0);
1417         else
1418                 self->fp = BZ2_bzWriteOpen(&bzerror,
1419                                            PyFile_AsFile(self->file),
1420                                            compresslevel, 0, 0);
1421
1422         if (bzerror != BZ_OK) {
1423                 Util_CatchBZ2Error(bzerror);
1424                 goto error;
1425         }
1426         PyFile_IncUseCount((PyFileObject *)self->file);
1427
1428         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1429
1430         return 0;
1431
1432 error:
1433         Py_CLEAR(self->file);
1434 #ifdef WITH_THREAD
1435         if (self->lock) {
1436                 PyThread_free_lock(self->lock);
1437                 self->lock = NULL;
1438         }
1439 #endif
1440         return -1;
1441 }
1442
1443 static void
1444 BZ2File_dealloc(BZ2FileObject *self)
1445 {
1446         int bzerror;
1447 #ifdef WITH_THREAD
1448         if (self->lock)
1449                 PyThread_free_lock(self->lock);
1450 #endif
1451         switch (self->mode) {
1452                 case MODE_READ:
1453                 case MODE_READ_EOF:
1454                         BZ2_bzReadClose(&bzerror, self->fp);
1455                         break;
1456                 case MODE_WRITE:
1457                         BZ2_bzWriteClose(&bzerror, self->fp,
1458                                          0, NULL, NULL);
1459                         break;
1460         }
1461         if (self->fp) {
1462                 PyFile_DecUseCount((PyFileObject *)self->file);
1463                 self->fp = NULL;
1464         }
1465         Util_DropReadAhead(self);
1466         Py_XDECREF(self->file);
1467         Py_TYPE(self)->tp_free((PyObject *)self);
1468 }
1469
1470 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1471 static PyObject *
1472 BZ2File_getiter(BZ2FileObject *self)
1473 {
1474         if (self->mode == MODE_CLOSED) {
1475                 PyErr_SetString(PyExc_ValueError,
1476                                 "I/O operation on closed file");
1477                 return NULL;
1478         }
1479         Py_INCREF((PyObject*)self);
1480         return (PyObject *)self;
1481 }
1482
1483 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1484 #define READAHEAD_BUFSIZE 8192
1485 static PyObject *
1486 BZ2File_iternext(BZ2FileObject *self)
1487 {
1488         PyStringObject* ret;
1489         ACQUIRE_LOCK(self);
1490         if (self->mode == MODE_CLOSED) {
1491                 RELEASE_LOCK(self);
1492                 PyErr_SetString(PyExc_ValueError,
1493                                 "I/O operation on closed file");
1494                 return NULL;
1495         }
1496         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1497         RELEASE_LOCK(self);
1498         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1499                 Py_XDECREF(ret);
1500                 return NULL;
1501         }
1502         return (PyObject *)ret;
1503 }
1504
1505 /* ===================================================================== */
1506 /* BZ2File_Type definition. */
1507
1508 PyDoc_VAR(BZ2File__doc__) =
1509 PyDoc_STR(
1510 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1511 \n\
1512 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1513 writing. When opened for writing, the file will be created if it doesn't\n\
1514 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1515 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1516 is given, must be a number between 1 and 9.\n\
1517 ")
1518 PyDoc_STR(
1519 "\n\
1520 Add a 'U' to mode to open the file for input with universal newline\n\
1521 support. Any line ending in the input file will be seen as a '\\n' in\n\
1522 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1523 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1524 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1525 newlines are available only when reading.\n\
1526 ")
1527 ;
1528
1529 static PyTypeObject BZ2File_Type = {
1530         PyVarObject_HEAD_INIT(NULL, 0)
1531         "bz2.BZ2File",          /*tp_name*/
1532         sizeof(BZ2FileObject),  /*tp_basicsize*/
1533         0,                      /*tp_itemsize*/
1534         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1535         0,                      /*tp_print*/
1536         0,                      /*tp_getattr*/
1537         0,                      /*tp_setattr*/
1538         0,                      /*tp_compare*/
1539         0,                      /*tp_repr*/
1540         0,                      /*tp_as_number*/
1541         0,                      /*tp_as_sequence*/
1542         0,                      /*tp_as_mapping*/
1543         0,                      /*tp_hash*/
1544         0,                      /*tp_call*/
1545         0,                      /*tp_str*/
1546         PyObject_GenericGetAttr,/*tp_getattro*/
1547         PyObject_GenericSetAttr,/*tp_setattro*/
1548         0,                      /*tp_as_buffer*/
1549         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1550         BZ2File__doc__,         /*tp_doc*/
1551         0,                      /*tp_traverse*/
1552         0,                      /*tp_clear*/
1553         0,                      /*tp_richcompare*/
1554         0,                      /*tp_weaklistoffset*/
1555         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1556         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1557         BZ2File_methods,        /*tp_methods*/
1558         BZ2File_members,        /*tp_members*/
1559         BZ2File_getset,         /*tp_getset*/
1560         0,                      /*tp_base*/
1561         0,                      /*tp_dict*/
1562         0,                      /*tp_descr_get*/
1563         0,                      /*tp_descr_set*/
1564         0,                      /*tp_dictoffset*/
1565         (initproc)BZ2File_init, /*tp_init*/
1566         PyType_GenericAlloc,    /*tp_alloc*/
1567         PyType_GenericNew,      /*tp_new*/
1568         _PyObject_Del,          /*tp_free*/
1569         0,                      /*tp_is_gc*/
1570 };
1571
1572
1573 /* ===================================================================== */
1574 /* Methods of BZ2Comp. */
1575
1576 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1577 "compress(data) -> string\n\
1578 \n\
1579 Provide more data to the compressor object. It will return chunks of\n\
1580 compressed data whenever possible. When you've finished providing data\n\
1581 to compress, call the flush() method to finish the compression process,\n\
1582 and return what is left in the internal buffers.\n\
1583 ");
1584
1585 static PyObject *
1586 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1587 {
1588         Py_buffer pdata;
1589         char *data;
1590         int datasize;
1591         int bufsize = SMALLCHUNK;
1592         PY_LONG_LONG totalout;
1593         PyObject *ret = NULL;
1594         bz_stream *bzs = &self->bzs;
1595         int bzerror;
1596
1597         if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1598                 return NULL;
1599         data = pdata.buf;
1600         datasize = pdata.len;
1601
1602         if (datasize == 0) {
1603                 PyBuffer_Release(&pdata);
1604                 return PyString_FromString("");
1605         }
1606
1607         ACQUIRE_LOCK(self);
1608         if (!self->running) {
1609                 PyErr_SetString(PyExc_ValueError,
1610                                 "this object was already flushed");
1611                 goto error;
1612         }
1613
1614         ret = PyString_FromStringAndSize(NULL, bufsize);
1615         if (!ret)
1616                 goto error;
1617
1618         bzs->next_in = data;
1619         bzs->avail_in = datasize;
1620         bzs->next_out = BUF(ret);
1621         bzs->avail_out = bufsize;
1622
1623         totalout = BZS_TOTAL_OUT(bzs);
1624
1625         for (;;) {
1626                 Py_BEGIN_ALLOW_THREADS
1627                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1628                 Py_END_ALLOW_THREADS
1629                 if (bzerror != BZ_RUN_OK) {
1630                         Util_CatchBZ2Error(bzerror);
1631                         goto error;
1632                 }
1633                 if (bzs->avail_in == 0)
1634                         break; /* no more input data */
1635                 if (bzs->avail_out == 0) {
1636                         bufsize = Util_NewBufferSize(bufsize);
1637                         if (_PyString_Resize(&ret, bufsize) < 0) {
1638                                 BZ2_bzCompressEnd(bzs);
1639                                 goto error;
1640                         }
1641                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1642                                                     - totalout);
1643                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1644                 }
1645         }
1646
1647         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1648
1649         RELEASE_LOCK(self);
1650         PyBuffer_Release(&pdata);
1651         return ret;
1652
1653 error:
1654         RELEASE_LOCK(self);
1655         PyBuffer_Release(&pdata);
1656         Py_XDECREF(ret);
1657         return NULL;
1658 }
1659
1660 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1661 "flush() -> string\n\
1662 \n\
1663 Finish the compression process and return what is left in internal buffers.\n\
1664 You must not use the compressor object after calling this method.\n\
1665 ");
1666
1667 static PyObject *
1668 BZ2Comp_flush(BZ2CompObject *self)
1669 {
1670         int bufsize = SMALLCHUNK;
1671         PyObject *ret = NULL;
1672         bz_stream *bzs = &self->bzs;
1673         PY_LONG_LONG totalout;
1674         int bzerror;
1675
1676         ACQUIRE_LOCK(self);
1677         if (!self->running) {
1678                 PyErr_SetString(PyExc_ValueError, "object was already "
1679                                                   "flushed");
1680                 goto error;
1681         }
1682         self->running = 0;
1683
1684         ret = PyString_FromStringAndSize(NULL, bufsize);
1685         if (!ret)
1686                 goto error;
1687
1688         bzs->next_out = BUF(ret);
1689         bzs->avail_out = bufsize;
1690
1691         totalout = BZS_TOTAL_OUT(bzs);
1692
1693         for (;;) {
1694                 Py_BEGIN_ALLOW_THREADS
1695                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1696                 Py_END_ALLOW_THREADS
1697                 if (bzerror == BZ_STREAM_END) {
1698                         break;
1699                 } else if (bzerror != BZ_FINISH_OK) {
1700                         Util_CatchBZ2Error(bzerror);
1701                         goto error;
1702                 }
1703                 if (bzs->avail_out == 0) {
1704                         bufsize = Util_NewBufferSize(bufsize);
1705                         if (_PyString_Resize(&ret, bufsize) < 0)
1706                                 goto error;
1707                         bzs->next_out = BUF(ret);
1708                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1709                                                     - totalout);
1710                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1711                 }
1712         }
1713
1714         if (bzs->avail_out != 0)
1715                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1716
1717         RELEASE_LOCK(self);
1718         return ret;
1719
1720 error:
1721         RELEASE_LOCK(self);
1722         Py_XDECREF(ret);
1723         return NULL;
1724 }
1725
1726 static PyMethodDef BZ2Comp_methods[] = {
1727         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1728          BZ2Comp_compress__doc__},
1729         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1730          BZ2Comp_flush__doc__},
1731         {NULL,          NULL}           /* sentinel */
1732 };
1733
1734
1735 /* ===================================================================== */
1736 /* Slot definitions for BZ2Comp_Type. */
1737
1738 static int
1739 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1740 {
1741         int compresslevel = 9;
1742         int bzerror;
1743         static char *kwlist[] = {"compresslevel", 0};
1744
1745         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1746                                          kwlist, &compresslevel))
1747                 return -1;
1748
1749         if (compresslevel < 1 || compresslevel > 9) {
1750                 PyErr_SetString(PyExc_ValueError,
1751                                 "compresslevel must be between 1 and 9");
1752                 goto error;
1753         }
1754
1755 #ifdef WITH_THREAD
1756         self->lock = PyThread_allocate_lock();
1757         if (!self->lock) {
1758                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1759                 goto error;
1760         }
1761 #endif
1762
1763         memset(&self->bzs, 0, sizeof(bz_stream));
1764         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1765         if (bzerror != BZ_OK) {
1766                 Util_CatchBZ2Error(bzerror);
1767                 goto error;
1768         }
1769
1770         self->running = 1;
1771
1772         return 0;
1773 error:
1774 #ifdef WITH_THREAD
1775         if (self->lock) {
1776                 PyThread_free_lock(self->lock);
1777                 self->lock = NULL;
1778         }
1779 #endif
1780         return -1;
1781 }
1782
1783 static void
1784 BZ2Comp_dealloc(BZ2CompObject *self)
1785 {
1786 #ifdef WITH_THREAD
1787         if (self->lock)
1788                 PyThread_free_lock(self->lock);
1789 #endif
1790         BZ2_bzCompressEnd(&self->bzs);
1791         Py_TYPE(self)->tp_free((PyObject *)self);
1792 }
1793
1794
1795 /* ===================================================================== */
1796 /* BZ2Comp_Type definition. */
1797
1798 PyDoc_STRVAR(BZ2Comp__doc__,
1799 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1800 \n\
1801 Create a new compressor object. This object may be used to compress\n\
1802 data sequentially. If you want to compress data in one shot, use the\n\
1803 compress() function instead. The compresslevel parameter, if given,\n\
1804 must be a number between 1 and 9.\n\
1805 ");
1806
1807 static PyTypeObject BZ2Comp_Type = {
1808         PyVarObject_HEAD_INIT(NULL, 0)
1809         "bz2.BZ2Compressor",    /*tp_name*/
1810         sizeof(BZ2CompObject),  /*tp_basicsize*/
1811         0,                      /*tp_itemsize*/
1812         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1813         0,                      /*tp_print*/
1814         0,                      /*tp_getattr*/
1815         0,                      /*tp_setattr*/
1816         0,                      /*tp_compare*/
1817         0,                      /*tp_repr*/
1818         0,                      /*tp_as_number*/
1819         0,                      /*tp_as_sequence*/
1820         0,                      /*tp_as_mapping*/
1821         0,                      /*tp_hash*/
1822         0,                      /*tp_call*/
1823         0,                      /*tp_str*/
1824         PyObject_GenericGetAttr,/*tp_getattro*/
1825         PyObject_GenericSetAttr,/*tp_setattro*/
1826         0,                      /*tp_as_buffer*/
1827         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1828         BZ2Comp__doc__,         /*tp_doc*/
1829         0,                      /*tp_traverse*/
1830         0,                      /*tp_clear*/
1831         0,                      /*tp_richcompare*/
1832         0,                      /*tp_weaklistoffset*/
1833         0,                      /*tp_iter*/
1834         0,                      /*tp_iternext*/
1835         BZ2Comp_methods,        /*tp_methods*/
1836         0,                      /*tp_members*/
1837         0,                      /*tp_getset*/
1838         0,                      /*tp_base*/
1839         0,                      /*tp_dict*/
1840         0,                      /*tp_descr_get*/
1841         0,                      /*tp_descr_set*/
1842         0,                      /*tp_dictoffset*/
1843         (initproc)BZ2Comp_init, /*tp_init*/
1844         PyType_GenericAlloc,    /*tp_alloc*/
1845         PyType_GenericNew,      /*tp_new*/
1846         _PyObject_Del,          /*tp_free*/
1847         0,                      /*tp_is_gc*/
1848 };
1849
1850
1851 /* ===================================================================== */
1852 /* Members of BZ2Decomp. */
1853
1854 #undef OFF
1855 #define OFF(x) offsetof(BZ2DecompObject, x)
1856
1857 static PyMemberDef BZ2Decomp_members[] = {
1858         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1859         {NULL}  /* Sentinel */
1860 };
1861
1862
1863 /* ===================================================================== */
1864 /* Methods of BZ2Decomp. */
1865
1866 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1867 "decompress(data) -> string\n\
1868 \n\
1869 Provide more data to the decompressor object. It will return chunks\n\
1870 of decompressed data whenever possible. If you try to decompress data\n\
1871 after the end of stream is found, EOFError will be raised. If any data\n\
1872 was found after the end of stream, it'll be ignored and saved in\n\
1873 unused_data attribute.\n\
1874 ");
1875
1876 static PyObject *
1877 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1878 {
1879         Py_buffer pdata;
1880         char *data;
1881         int datasize;
1882         int bufsize = SMALLCHUNK;
1883         PY_LONG_LONG totalout;
1884         PyObject *ret = NULL;
1885         bz_stream *bzs = &self->bzs;
1886         int bzerror;
1887
1888         if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1889                 return NULL;
1890         data = pdata.buf;
1891         datasize = pdata.len;
1892
1893         ACQUIRE_LOCK(self);
1894         if (!self->running) {
1895                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1896                                                 "already found");
1897                 goto error;
1898         }
1899
1900         ret = PyString_FromStringAndSize(NULL, bufsize);
1901         if (!ret)
1902                 goto error;
1903
1904         bzs->next_in = data;
1905         bzs->avail_in = datasize;
1906         bzs->next_out = BUF(ret);
1907         bzs->avail_out = bufsize;
1908
1909         totalout = BZS_TOTAL_OUT(bzs);
1910
1911         for (;;) {
1912                 Py_BEGIN_ALLOW_THREADS
1913                 bzerror = BZ2_bzDecompress(bzs);
1914                 Py_END_ALLOW_THREADS
1915                 if (bzerror == BZ_STREAM_END) {
1916                         if (bzs->avail_in != 0) {
1917                                 Py_DECREF(self->unused_data);
1918                                 self->unused_data =
1919                                     PyString_FromStringAndSize(bzs->next_in,
1920                                                                bzs->avail_in);
1921                         }
1922                         self->running = 0;
1923                         break;
1924                 }
1925                 if (bzerror != BZ_OK) {
1926                         Util_CatchBZ2Error(bzerror);
1927                         goto error;
1928                 }
1929                 if (bzs->avail_in == 0)
1930                         break; /* no more input data */
1931                 if (bzs->avail_out == 0) {
1932                         bufsize = Util_NewBufferSize(bufsize);
1933                         if (_PyString_Resize(&ret, bufsize) < 0) {
1934                                 BZ2_bzDecompressEnd(bzs);
1935                                 goto error;
1936                         }
1937                         bzs->next_out = BUF(ret);
1938                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1939                                                     - totalout);
1940                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1941                 }
1942         }
1943
1944         if (bzs->avail_out != 0)
1945                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1946
1947         RELEASE_LOCK(self);
1948         PyBuffer_Release(&pdata);
1949         return ret;
1950
1951 error:
1952         RELEASE_LOCK(self);
1953         PyBuffer_Release(&pdata);
1954         Py_XDECREF(ret);
1955         return NULL;
1956 }
1957
1958 static PyMethodDef BZ2Decomp_methods[] = {
1959         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1960         {NULL,          NULL}           /* sentinel */
1961 };
1962
1963
1964 /* ===================================================================== */
1965 /* Slot definitions for BZ2Decomp_Type. */
1966
1967 static int
1968 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1969 {
1970         int bzerror;
1971
1972         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1973                 return -1;
1974
1975 #ifdef WITH_THREAD
1976         self->lock = PyThread_allocate_lock();
1977         if (!self->lock) {
1978                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1979                 goto error;
1980         }
1981 #endif
1982
1983         self->unused_data = PyString_FromString("");
1984         if (!self->unused_data)
1985                 goto error;
1986
1987         memset(&self->bzs, 0, sizeof(bz_stream));
1988         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1989         if (bzerror != BZ_OK) {
1990                 Util_CatchBZ2Error(bzerror);
1991                 goto error;
1992         }
1993
1994         self->running = 1;
1995
1996         return 0;
1997
1998 error:
1999 #ifdef WITH_THREAD
2000         if (self->lock) {
2001                 PyThread_free_lock(self->lock);
2002                 self->lock = NULL;
2003         }
2004 #endif
2005         Py_CLEAR(self->unused_data);
2006         return -1;
2007 }
2008
2009 static void
2010 BZ2Decomp_dealloc(BZ2DecompObject *self)
2011 {
2012 #ifdef WITH_THREAD
2013         if (self->lock)
2014                 PyThread_free_lock(self->lock);
2015 #endif
2016         Py_XDECREF(self->unused_data);
2017         BZ2_bzDecompressEnd(&self->bzs);
2018         Py_TYPE(self)->tp_free((PyObject *)self);
2019 }
2020
2021
2022 /* ===================================================================== */
2023 /* BZ2Decomp_Type definition. */
2024
2025 PyDoc_STRVAR(BZ2Decomp__doc__,
2026 "BZ2Decompressor() -> decompressor object\n\
2027 \n\
2028 Create a new decompressor object. This object may be used to decompress\n\
2029 data sequentially. If you want to decompress data in one shot, use the\n\
2030 decompress() function instead.\n\
2031 ");
2032
2033 static PyTypeObject BZ2Decomp_Type = {
2034         PyVarObject_HEAD_INIT(NULL, 0)
2035         "bz2.BZ2Decompressor",  /*tp_name*/
2036         sizeof(BZ2DecompObject), /*tp_basicsize*/
2037         0,                      /*tp_itemsize*/
2038         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2039         0,                      /*tp_print*/
2040         0,                      /*tp_getattr*/
2041         0,                      /*tp_setattr*/
2042         0,                      /*tp_compare*/
2043         0,                      /*tp_repr*/
2044         0,                      /*tp_as_number*/
2045         0,                      /*tp_as_sequence*/
2046         0,                      /*tp_as_mapping*/
2047         0,                      /*tp_hash*/
2048         0,                      /*tp_call*/
2049         0,                      /*tp_str*/
2050         PyObject_GenericGetAttr,/*tp_getattro*/
2051         PyObject_GenericSetAttr,/*tp_setattro*/
2052         0,                      /*tp_as_buffer*/
2053         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2054         BZ2Decomp__doc__,       /*tp_doc*/
2055         0,                      /*tp_traverse*/
2056         0,                      /*tp_clear*/
2057         0,                      /*tp_richcompare*/
2058         0,                      /*tp_weaklistoffset*/
2059         0,                      /*tp_iter*/
2060         0,                      /*tp_iternext*/
2061         BZ2Decomp_methods,      /*tp_methods*/
2062         BZ2Decomp_members,      /*tp_members*/
2063         0,                      /*tp_getset*/
2064         0,                      /*tp_base*/
2065         0,                      /*tp_dict*/
2066         0,                      /*tp_descr_get*/
2067         0,                      /*tp_descr_set*/
2068         0,                      /*tp_dictoffset*/
2069         (initproc)BZ2Decomp_init, /*tp_init*/
2070         PyType_GenericAlloc,    /*tp_alloc*/
2071         PyType_GenericNew,      /*tp_new*/
2072         _PyObject_Del,          /*tp_free*/
2073         0,                      /*tp_is_gc*/
2074 };
2075
2076
2077 /* ===================================================================== */
2078 /* Module functions. */
2079
2080 PyDoc_STRVAR(bz2_compress__doc__,
2081 "compress(data [, compresslevel=9]) -> string\n\
2082 \n\
2083 Compress data in one shot. If you want to compress data sequentially,\n\
2084 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2085 given, must be a number between 1 and 9.\n\
2086 ");
2087
2088 static PyObject *
2089 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2090 {
2091         int compresslevel=9;
2092         Py_buffer pdata;
2093         char *data;
2094         int datasize;
2095         int bufsize;
2096         PyObject *ret = NULL;
2097         bz_stream _bzs;
2098         bz_stream *bzs = &_bzs;
2099         int bzerror;
2100         static char *kwlist[] = {"data", "compresslevel", 0};
2101
2102         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2103                                          kwlist, &pdata,
2104                                          &compresslevel))
2105                 return NULL;
2106         data = pdata.buf;
2107         datasize = pdata.len;
2108
2109         if (compresslevel < 1 || compresslevel > 9) {
2110                 PyErr_SetString(PyExc_ValueError,
2111                                 "compresslevel must be between 1 and 9");
2112                 PyBuffer_Release(&pdata);
2113                 return NULL;
2114         }
2115
2116         /* Conforming to bz2 manual, this is large enough to fit compressed
2117          * data in one shot. We will check it later anyway. */
2118         bufsize = datasize + (datasize/100+1) + 600;
2119
2120         ret = PyString_FromStringAndSize(NULL, bufsize);
2121         if (!ret) {
2122                 PyBuffer_Release(&pdata);
2123                 return NULL;
2124         }
2125
2126         memset(bzs, 0, sizeof(bz_stream));
2127
2128         bzs->next_in = data;
2129         bzs->avail_in = datasize;
2130         bzs->next_out = BUF(ret);
2131         bzs->avail_out = bufsize;
2132
2133         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2134         if (bzerror != BZ_OK) {
2135                 Util_CatchBZ2Error(bzerror);
2136                 PyBuffer_Release(&pdata);
2137                 Py_DECREF(ret);
2138                 return NULL;
2139         }
2140
2141         for (;;) {
2142                 Py_BEGIN_ALLOW_THREADS
2143                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2144                 Py_END_ALLOW_THREADS
2145                 if (bzerror == BZ_STREAM_END) {
2146                         break;
2147                 } else if (bzerror != BZ_FINISH_OK) {
2148                         BZ2_bzCompressEnd(bzs);
2149                         Util_CatchBZ2Error(bzerror);
2150                         PyBuffer_Release(&pdata);
2151                         Py_DECREF(ret);
2152                         return NULL;
2153                 }
2154                 if (bzs->avail_out == 0) {
2155                         bufsize = Util_NewBufferSize(bufsize);
2156                         if (_PyString_Resize(&ret, bufsize) < 0) {
2157                                 BZ2_bzCompressEnd(bzs);
2158                                 PyBuffer_Release(&pdata);
2159                                 Py_DECREF(ret);
2160                                 return NULL;
2161                         }
2162                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2163                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2164                 }
2165         }
2166
2167         if (bzs->avail_out != 0)
2168                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2169         BZ2_bzCompressEnd(bzs);
2170
2171         PyBuffer_Release(&pdata);
2172         return ret;
2173 }
2174
2175 PyDoc_STRVAR(bz2_decompress__doc__,
2176 "decompress(data) -> decompressed data\n\
2177 \n\
2178 Decompress data in one shot. If you want to decompress data sequentially,\n\
2179 use an instance of BZ2Decompressor instead.\n\
2180 ");
2181
2182 static PyObject *
2183 bz2_decompress(PyObject *self, PyObject *args)
2184 {
2185         Py_buffer pdata;
2186         char *data;
2187         int datasize;
2188         int bufsize = SMALLCHUNK;
2189         PyObject *ret;
2190         bz_stream _bzs;
2191         bz_stream *bzs = &_bzs;
2192         int bzerror;
2193
2194         if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2195                 return NULL;
2196         data = pdata.buf;
2197         datasize = pdata.len;
2198
2199         if (datasize == 0) {
2200                 PyBuffer_Release(&pdata);
2201                 return PyString_FromString("");
2202         }
2203
2204         ret = PyString_FromStringAndSize(NULL, bufsize);
2205         if (!ret) {
2206                 PyBuffer_Release(&pdata);
2207                 return NULL;
2208         }
2209
2210         memset(bzs, 0, sizeof(bz_stream));
2211
2212         bzs->next_in = data;
2213         bzs->avail_in = datasize;
2214         bzs->next_out = BUF(ret);
2215         bzs->avail_out = bufsize;
2216
2217         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2218         if (bzerror != BZ_OK) {
2219                 Util_CatchBZ2Error(bzerror);
2220                 Py_DECREF(ret);
2221                 PyBuffer_Release(&pdata);
2222                 return NULL;
2223         }
2224
2225         for (;;) {
2226                 Py_BEGIN_ALLOW_THREADS
2227                 bzerror = BZ2_bzDecompress(bzs);
2228                 Py_END_ALLOW_THREADS
2229                 if (bzerror == BZ_STREAM_END) {
2230                         break;
2231                 } else if (bzerror != BZ_OK) {
2232                         BZ2_bzDecompressEnd(bzs);
2233                         Util_CatchBZ2Error(bzerror);
2234                         PyBuffer_Release(&pdata);
2235                         Py_DECREF(ret);
2236                         return NULL;
2237                 }
2238                 if (bzs->avail_in == 0) {
2239                         BZ2_bzDecompressEnd(bzs);
2240                         PyErr_SetString(PyExc_ValueError,
2241                                         "couldn't find end of stream");
2242                         PyBuffer_Release(&pdata);
2243                         Py_DECREF(ret);
2244                         return NULL;
2245                 }
2246                 if (bzs->avail_out == 0) {
2247                         bufsize = Util_NewBufferSize(bufsize);
2248                         if (_PyString_Resize(&ret, bufsize) < 0) {
2249                                 BZ2_bzDecompressEnd(bzs);
2250                                 PyBuffer_Release(&pdata);
2251                                 Py_DECREF(ret);
2252                                 return NULL;
2253                         }
2254                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2255                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2256                 }
2257         }
2258
2259         if (bzs->avail_out != 0)
2260                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2261         BZ2_bzDecompressEnd(bzs);
2262         PyBuffer_Release(&pdata);
2263
2264         return ret;
2265 }
2266
2267 static PyMethodDef bz2_methods[] = {
2268         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2269                 bz2_compress__doc__},
2270         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2271                 bz2_decompress__doc__},
2272         {NULL,          NULL}           /* sentinel */
2273 };
2274
2275 /* ===================================================================== */
2276 /* Initialization function. */
2277
2278 PyDoc_STRVAR(bz2__doc__,
2279 "The python bz2 module provides a comprehensive interface for\n\
2280 the bz2 compression library. It implements a complete file\n\
2281 interface, one shot (de)compression functions, and types for\n\
2282 sequential (de)compression.\n\
2283 ");
2284
2285 PyMODINIT_FUNC
2286 initbz2(void)
2287 {
2288         PyObject *m;
2289
2290         Py_TYPE(&BZ2File_Type) = &PyType_Type;
2291         Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2292         Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2293
2294         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2295         if (m == NULL)
2296                 return;
2297
2298         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2299
2300         Py_INCREF(&BZ2File_Type);
2301         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2302
2303         Py_INCREF(&BZ2Comp_Type);
2304         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2305
2306         Py_INCREF(&BZ2Decomp_Type);
2307         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2308 }