Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) do { \
  82         if (!PyThread_acquire_lock(obj->lock, 0)) { \
  83                 Py_BEGIN_ALLOW_THREADS \
  84                 PyThread_acquire_lock(obj->lock, 1); \
  85                 Py_END_ALLOW_THREADS \
  86         } } while(0)
  87 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  88 #else
  89 #define ACQUIRE_LOCK(obj)
  90 #define RELEASE_LOCK(obj)
  91 #endif
  92
  93 /* Bits in f_newlinetypes */
  94 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  95 #define NEWLINE_CR 1            /* \r newline seen */
  96 #define NEWLINE_LF 2            /* \n newline seen */
  97 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  98
  99 /* ===================================================================== */
 100 /* Structure definitions. */
 101
 102 typedef struct {
 103         PyObject_HEAD
 104         PyObject *file;
 105
 106         char* f_buf;            /* Allocated readahead buffer */
 107         char* f_bufend;         /* Points after last occupied position */
 108         char* f_bufptr;         /* Current buffer position */
 109
 110         int f_softspace;        /* Flag used by 'print' command */
 111
 112         int f_univ_newline;     /* Handle any newline convention */
 113         int f_newlinetypes;     /* Types of newlines seen */
 114         int f_skipnextlf;       /* Skip next \n */
 115
 116         BZFILE *fp;
 117         int mode;
 118         Py_off_t pos;
 119         Py_off_t size;
 120 #ifdef WITH_THREAD
 121         PyThread_type_lock lock;
 122 #endif
 123 } BZ2FileObject;
 124
 125 typedef struct {
 126         PyObject_HEAD
 127         bz_stream bzs;
 128         int running;
 129 #ifdef WITH_THREAD
 130         PyThread_type_lock lock;
 131 #endif
 132 } BZ2CompObject;
 133
 134 typedef struct {
 135         PyObject_HEAD
 136         bz_stream bzs;
 137         int running;
 138         PyObject *unused_data;
 139 #ifdef WITH_THREAD
 140         PyThread_type_lock lock;
 141 #endif
 142 } BZ2DecompObject;
 143
 144 /* ===================================================================== */
 145 /* Utility functions. */
 146
 147 static int
 148 Util_CatchBZ2Error(int bzerror)
 149 {
 150         int ret = 0;
 151         switch(bzerror) {
 152                 case BZ_OK:
 153                 case BZ_STREAM_END:
 154                         break;
 155
 156 #ifdef BZ_CONFIG_ERROR
 157                 case BZ_CONFIG_ERROR:
 158                         PyErr_SetString(PyExc_SystemError,
 159                                         "the bz2 library was not compiled "
 160                                         "correctly");
 161                         ret = 1;
 162                         break;
 163 #endif
 164
 165                 case BZ_PARAM_ERROR:
 166                         PyErr_SetString(PyExc_ValueError,
 167                                         "the bz2 library has received wrong "
 168                                         "parameters");
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_MEM_ERROR:
 173                         PyErr_NoMemory();
 174                         ret = 1;
 175                         break;
 176
 177                 case BZ_DATA_ERROR:
 178                 case BZ_DATA_ERROR_MAGIC:
 179                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_IO_ERROR:
 184                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 185                         ret = 1;
 186                         break;
 187
 188                 case BZ_UNEXPECTED_EOF:
 189                         PyErr_SetString(PyExc_EOFError,
 190                                         "compressed file ended before the "
 191                                         "logical end-of-stream was detected");
 192                         ret = 1;
 193                         break;
 194
 195                 case BZ_SEQUENCE_ERROR:
 196                         PyErr_SetString(PyExc_RuntimeError,
 197                                         "wrong sequence of bz2 library "
 198                                         "commands used");
 199                         ret = 1;
 200                         break;
 201         }
 202         return ret;
 203 }
 204
 205 #if BUFSIZ < 8192
 206 #define SMALLCHUNK 8192
 207 #else
 208 #define SMALLCHUNK BUFSIZ
 209 #endif
 210
 211 #if SIZEOF_INT < 4
 212 #define BIGCHUNK  (512 * 32)
 213 #else
 214 #define BIGCHUNK  (512 * 1024)
 215 #endif
 216
 217 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 218 static size_t
 219 Util_NewBufferSize(size_t currentsize)
 220 {
 221         if (currentsize > SMALLCHUNK) {
 222                 /* Keep doubling until we reach BIGCHUNK;
 223                    then keep adding BIGCHUNK. */
 224                 if (currentsize <= BIGCHUNK)
 225                         return currentsize + currentsize;
 226                 else
 227                         return currentsize + BIGCHUNK;
 228         }
 229         return currentsize + SMALLCHUNK;
 230 }
 231
 232 /* This is a hacked version of Python's fileobject.c:get_line(). */
 233 static PyObject *
 234 Util_GetLine(BZ2FileObject *f, int n)
 235 {
 236         char c;
 237         char *buf, *end;
 238         size_t total_v_size;    /* total # of slots in buffer */
 239         size_t used_v_size;     /* # used slots in buffer */
 240         size_t increment;       /* amount to increment the buffer */
 241         PyObject *v;
 242         int bzerror;
 243         int bytes_read;
 244         int newlinetypes = f->f_newlinetypes;
 245         int skipnextlf = f->f_skipnextlf;
 246         int univ_newline = f->f_univ_newline;
 247
 248         total_v_size = n > 0 ? n : 100;
 249         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 250         if (v == NULL)
 251                 return NULL;
 252
 253         buf = BUF(v);
 254         end = buf + total_v_size;
 255
 256         for (;;) {
 257                 Py_BEGIN_ALLOW_THREADS
 258                 while (buf != end) {
 259                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 260                         f->pos++;
 261                         if (bytes_read == 0) break;
 262                         if (univ_newline) {
 263                                 if (skipnextlf) {
 264                                         skipnextlf = 0;
 265                                         if (c == '\n') {
 266                                                 /* Seeing a \n here with skipnextlf true means we
 267                                                  * saw a \r before.
 268                                                  */
 269                                                 newlinetypes |= NEWLINE_CRLF;
 270                                                 if (bzerror != BZ_OK) break;
 271                                                 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 272                                                 f->pos++;
 273                                                 if (bytes_read == 0) break;
 274                                         } else {
 275                                                 newlinetypes |= NEWLINE_CR;
 276                                         }
 277                                 }
 278                                 if (c == '\r') {
 279                                         skipnextlf = 1;
 280                                         c = '\n';
 281                                 } else if (c == '\n')
 282                                         newlinetypes |= NEWLINE_LF;
 283                         }
 284                         *buf++ = c;
 285                         if (bzerror != BZ_OK || c == '\n') break;
 286                 }
 287                 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
 288                         newlinetypes |= NEWLINE_CR;
 289                 Py_END_ALLOW_THREADS
 290                 f->f_newlinetypes = newlinetypes;
 291                 f->f_skipnextlf = skipnextlf;
 292                 if (bzerror == BZ_STREAM_END) {
 293                         f->size = f->pos;
 294                         f->mode = MODE_READ_EOF;
 295                         break;
 296                 } else if (bzerror != BZ_OK) {
 297                         Util_CatchBZ2Error(bzerror);
 298                         Py_DECREF(v);
 299                         return NULL;
 300                 }
 301                 if (c == '\n')
 302                         break;
 303                 /* Must be because buf == end */
 304                 if (n > 0)
 305                         break;
 306                 used_v_size = total_v_size;
 307                 increment = total_v_size >> 2; /* mild exponential growth */
 308                 total_v_size += increment;
 309                 if (total_v_size > INT_MAX) {
 310                         PyErr_SetString(PyExc_OverflowError,
 311                             "line is longer than a Python string can hold");
 312                         Py_DECREF(v);
 313                         return NULL;
 314                 }
 315                 if (_PyString_Resize(&v, total_v_size) < 0)
 316                         return NULL;
 317                 buf = BUF(v) + used_v_size;
 318                 end = BUF(v) + total_v_size;
 319         }
 320
 321         used_v_size = buf - BUF(v);
 322         if (used_v_size != total_v_size)
 323                 _PyString_Resize(&v, used_v_size);
 324         return v;
 325 }
 326
 327 /* This is a hacked version of Python's
 328  * fileobject.c:Py_UniversalNewlineFread(). */
 329 size_t
 330 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 331                      char* buf, size_t n, BZ2FileObject *f)
 332 {
 333         char *dst = buf;
 334         int newlinetypes, skipnextlf;
 335
 336         assert(buf != NULL);
 337         assert(stream != NULL);
 338
 339         if (!f->f_univ_newline)
 340                 return BZ2_bzRead(bzerror, stream, buf, n);
 341
 342         newlinetypes = f->f_newlinetypes;
 343         skipnextlf = f->f_skipnextlf;
 344
 345         /* Invariant:  n is the number of bytes remaining to be filled
 346          * in the buffer.
 347          */
 348         while (n) {
 349                 size_t nread;
 350                 int shortread;
 351                 char *src = dst;
 352
 353                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 354                 assert(nread <= n);
 355                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 356                 shortread = n != 0;     /* true iff EOF or error */
 357                 while (nread--) {
 358                         char c = *src++;
 359                         if (c == '\r') {
 360                                 /* Save as LF and set flag to skip next LF. */
 361                                 *dst++ = '\n';
 362                                 skipnextlf = 1;
 363                         }
 364                         else if (skipnextlf && c == '\n') {
 365                                 /* Skip LF, and remember we saw CR LF. */
 366                                 skipnextlf = 0;
 367                                 newlinetypes |= NEWLINE_CRLF;
 368                                 ++n;
 369                         }
 370                         else {
 371                                 /* Normal char to be stored in buffer.  Also
 372                                  * update the newlinetypes flag if either this
 373                                  * is an LF or the previous char was a CR.
 374                                  */
 375                                 if (c == '\n')
 376                                         newlinetypes |= NEWLINE_LF;
 377                                 else if (skipnextlf)
 378                                         newlinetypes |= NEWLINE_CR;
 379                                 *dst++ = c;
 380                                 skipnextlf = 0;
 381                         }
 382                 }
 383                 if (shortread) {
 384                         /* If this is EOF, update type flags. */
 385                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 386                                 newlinetypes |= NEWLINE_CR;
 387                         break;
 388                 }
 389         }
 390         f->f_newlinetypes = newlinetypes;
 391         f->f_skipnextlf = skipnextlf;
 392         return dst - buf;
 393 }
 394
 395 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 396 static void
 397 Util_DropReadAhead(BZ2FileObject *f)
 398 {
 399         if (f->f_buf != NULL) {
 400                 PyMem_Free(f->f_buf);
 401                 f->f_buf = NULL;
 402         }
 403 }
 404
 405 /* This is a hacked version of Python's fileobject.c:readahead(). */
 406 static int
 407 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 408 {
 409         int chunksize;
 410         int bzerror;
 411
 412         if (f->f_buf != NULL) {
 413                 if((f->f_bufend - f->f_bufptr) >= 1)
 414                         return 0;
 415                 else
 416                         Util_DropReadAhead(f);
 417         }
 418         if (f->mode == MODE_READ_EOF) {
 419                 f->f_bufptr = f->f_buf;
 420                 f->f_bufend = f->f_buf;
 421                 return 0;
 422         }
 423         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 424                 PyErr_NoMemory();
 425                 return -1;
 426         }
 427         Py_BEGIN_ALLOW_THREADS
 428         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 429                                          bufsize, f);
 430         Py_END_ALLOW_THREADS
 431         f->pos += chunksize;
 432         if (bzerror == BZ_STREAM_END) {
 433                 f->size = f->pos;
 434                 f->mode = MODE_READ_EOF;
 435         } else if (bzerror != BZ_OK) {
 436                 Util_CatchBZ2Error(bzerror);
 437                 Util_DropReadAhead(f);
 438                 return -1;
 439         }
 440         f->f_bufptr = f->f_buf;
 441         f->f_bufend = f->f_buf + chunksize;
 442         return 0;
 443 }
 444
 445 /* This is a hacked version of Python's
 446  * fileobject.c:readahead_get_line_skip(). */
 447 static PyStringObject *
 448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 449 {
 450         PyStringObject* s;
 451         char *bufptr;
 452         char *buf;
 453         int len;
 454
 455         if (f->f_buf == NULL)
 456                 if (Util_ReadAhead(f, bufsize) < 0)
 457                         return NULL;
 458
 459         len = f->f_bufend - f->f_bufptr;
 460         if (len == 0)
 461                 return (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip);
 463         bufptr = memchr(f->f_bufptr, '\n', len);
 464         if (bufptr != NULL) {
 465                 bufptr++;                       /* Count the '\n' */
 466                 len = bufptr - f->f_bufptr;
 467                 s = (PyStringObject *)
 468                         PyString_FromStringAndSize(NULL, skip+len);
 469                 if (s == NULL)
 470                         return NULL;
 471                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 472                 f->f_bufptr = bufptr;
 473                 if (bufptr == f->f_bufend)
 474                         Util_DropReadAhead(f);
 475         } else {
 476                 bufptr = f->f_bufptr;
 477                 buf = f->f_buf;
 478                 f->f_buf = NULL;        /* Force new readahead buffer */
 479                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 480                                               bufsize + (bufsize>>2));
 481                 if (s == NULL) {
 482                         PyMem_Free(buf);
 483                         return NULL;
 484                 }
 485                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 486                 PyMem_Free(buf);
 487         }
 488         return s;
 489 }
 490
 491 /* ===================================================================== */
 492 /* Methods of BZ2File. */
 493
 494 PyDoc_STRVAR(BZ2File_read__doc__,
 495 "read([size]) -> string\n\
 496 \n\
 497 Read at most size uncompressed bytes, returned as a string. If the size\n\
 498 argument is negative or omitted, read until EOF is reached.\n\
 499 ");
 500
 501 /* This is a hacked version of Python's fileobject.c:file_read(). */
 502 static PyObject *
 503 BZ2File_read(BZ2FileObject *self, PyObject *args)
 504 {
 505         long bytesrequested = -1;
 506         size_t bytesread, buffersize, chunksize;
 507         int bzerror;
 508         PyObject *ret = NULL;
 509
 510         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 511                 return NULL;
 512
 513         ACQUIRE_LOCK(self);
 514         switch (self->mode) {
 515                 case MODE_READ:
 516                         break;
 517                 case MODE_READ_EOF:
 518                         ret = PyString_FromString("");
 519                         goto cleanup;
 520                 case MODE_CLOSED:
 521                         PyErr_SetString(PyExc_ValueError,
 522                                         "I/O operation on closed file");
 523                         goto cleanup;
 524                 default:
 525                         PyErr_SetString(PyExc_IOError,
 526                                         "file is not ready for reading");
 527                         goto cleanup;
 528         }
 529
 530         if (bytesrequested < 0)
 531                 buffersize = Util_NewBufferSize((size_t)0);
 532         else
 533                 buffersize = bytesrequested;
 534         if (buffersize > INT_MAX) {
 535                 PyErr_SetString(PyExc_OverflowError,
 536                                 "requested number of bytes is "
 537                                 "more than a Python string can hold");
 538                 goto cleanup;
 539         }
 540         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 541         if (ret == NULL)
 542                 goto cleanup;
 543         bytesread = 0;
 544
 545         for (;;) {
 546                 Py_BEGIN_ALLOW_THREADS
 547                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 548                                                  BUF(ret)+bytesread,
 549                                                  buffersize-bytesread,
 550                                                  self);
 551                 self->pos += chunksize;
 552                 Py_END_ALLOW_THREADS
 553                 bytesread += chunksize;
 554                 if (bzerror == BZ_STREAM_END) {
 555                         self->size = self->pos;
 556                         self->mode = MODE_READ_EOF;
 557                         break;
 558                 } else if (bzerror != BZ_OK) {
 559                         Util_CatchBZ2Error(bzerror);
 560                         Py_DECREF(ret);
 561                         ret = NULL;
 562                         goto cleanup;
 563                 }
 564                 if (bytesrequested < 0) {
 565                         buffersize = Util_NewBufferSize(buffersize);
 566                         if (_PyString_Resize(&ret, buffersize) < 0)
 567                                 goto cleanup;
 568                 } else {
 569                         break;
 570                 }
 571         }
 572         if (bytesread != buffersize)
 573                 _PyString_Resize(&ret, bytesread);
 574
 575 cleanup:
 576         RELEASE_LOCK(self);
 577         return ret;
 578 }
 579
 580 PyDoc_STRVAR(BZ2File_readline__doc__,
 581 "readline([size]) -> string\n\
 582 \n\
 583 Return the next line from the file, as a string, retaining newline.\n\
 584 A non-negative size argument will limit the maximum number of bytes to\n\
 585 return (an incomplete line may be returned then). Return an empty\n\
 586 string at EOF.\n\
 587 ");
 588
 589 static PyObject *
 590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 591 {
 592         PyObject *ret = NULL;
 593         int sizehint = -1;
 594
 595         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 596                 return NULL;
 597
 598         ACQUIRE_LOCK(self);
 599         switch (self->mode) {
 600                 case MODE_READ:
 601                         break;
 602                 case MODE_READ_EOF:
 603                         ret = PyString_FromString("");
 604                         goto cleanup;
 605                 case MODE_CLOSED:
 606                         PyErr_SetString(PyExc_ValueError,
 607                                         "I/O operation on closed file");
 608                         goto cleanup;
 609                 default:
 610                         PyErr_SetString(PyExc_IOError,
 611                                         "file is not ready for reading");
 612                         goto cleanup;
 613         }
 614
 615         if (sizehint == 0)
 616                 ret = PyString_FromString("");
 617         else
 618                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 619
 620 cleanup:
 621         RELEASE_LOCK(self);
 622         return ret;
 623 }
 624
 625 PyDoc_STRVAR(BZ2File_readlines__doc__,
 626 "readlines([size]) -> list\n\
 627 \n\
 628 Call readline() repeatedly and return a list of lines read.\n\
 629 The optional size argument, if given, is an approximate bound on the\n\
 630 total number of bytes in the lines returned.\n\
 631 ");
 632
 633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 634 static PyObject *
 635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 636 {
 637         long sizehint = 0;
 638         PyObject *list = NULL;
 639         PyObject *line;
 640         char small_buffer[SMALLCHUNK];
 641         char *buffer = small_buffer;
 642         size_t buffersize = SMALLCHUNK;
 643         PyObject *big_buffer = NULL;
 644         size_t nfilled = 0;
 645         size_t nread;
 646         size_t totalread = 0;
 647         char *p, *q, *end;
 648         int err;
 649         int shortread = 0;
 650         int bzerror;
 651
 652         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 653                 return NULL;
 654
 655         ACQUIRE_LOCK(self);
 656         switch (self->mode) {
 657                 case MODE_READ:
 658                         break;
 659                 case MODE_READ_EOF:
 660                         list = PyList_New(0);
 661                         goto cleanup;
 662                 case MODE_CLOSED:
 663                         PyErr_SetString(PyExc_ValueError,
 664                                         "I/O operation on closed file");
 665                         goto cleanup;
 666                 default:
 667                         PyErr_SetString(PyExc_IOError,
 668                                         "file is not ready for reading");
 669                         goto cleanup;
 670         }
 671
 672         if ((list = PyList_New(0)) == NULL)
 673                 goto cleanup;
 674
 675         for (;;) {
 676                 Py_BEGIN_ALLOW_THREADS
 677                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 678                                              buffer+nfilled,
 679                                              buffersize-nfilled, self);
 680                 self->pos += nread;
 681                 Py_END_ALLOW_THREADS
 682                 if (bzerror == BZ_STREAM_END) {
 683                         self->size = self->pos;
 684                         self->mode = MODE_READ_EOF;
 685                         if (nread == 0) {
 686                                 sizehint = 0;
 687                                 break;
 688                         }
 689                         shortread = 1;
 690                 } else if (bzerror != BZ_OK) {
 691                         Util_CatchBZ2Error(bzerror);
 692                   error:
 693                         Py_DECREF(list);
 694                         list = NULL;
 695                         goto cleanup;
 696                 }
 697                 totalread += nread;
 698                 p = memchr(buffer+nfilled, '\n', nread);
 699                 if (!shortread && p == NULL) {
 700                         /* Need a larger buffer to fit this line */
 701                         nfilled += nread;
 702                         buffersize *= 2;
 703                         if (buffersize > INT_MAX) {
 704                                 PyErr_SetString(PyExc_OverflowError,
 705                                 "line is longer than a Python string can hold");
 706                                 goto error;
 707                         }
 708                         if (big_buffer == NULL) {
 709                                 /* Create the big buffer */
 710                                 big_buffer = PyString_FromStringAndSize(
 711                                         NULL, buffersize);
 712                                 if (big_buffer == NULL)
 713                                         goto error;
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                                 memcpy(buffer, small_buffer, nfilled);
 716                         }
 717                         else {
 718                                 /* Grow the big buffer */
 719                                 _PyString_Resize(&big_buffer, buffersize);
 720                                 buffer = PyString_AS_STRING(big_buffer);
 721                         }
 722                         continue;
 723                 }
 724                 end = buffer+nfilled+nread;
 725                 q = buffer;
 726                 while (p != NULL) {
 727                         /* Process complete lines */
 728                         p++;
 729                         line = PyString_FromStringAndSize(q, p-q);
 730                         if (line == NULL)
 731                                 goto error;
 732                         err = PyList_Append(list, line);
 733                         Py_DECREF(line);
 734                         if (err != 0)
 735                                 goto error;
 736                         q = p;
 737                         p = memchr(q, '\n', end-q);
 738                 }
 739                 /* Move the remaining incomplete line to the start */
 740                 nfilled = end-q;
 741                 memmove(buffer, q, nfilled);
 742                 if (sizehint > 0)
 743                         if (totalread >= (size_t)sizehint)
 744                                 break;
 745                 if (shortread) {
 746                         sizehint = 0;
 747                         break;
 748                 }
 749         }
 750         if (nfilled != 0) {
 751                 /* Partial last line */
 752                 line = PyString_FromStringAndSize(buffer, nfilled);
 753                 if (line == NULL)
 754                         goto error;
 755                 if (sizehint > 0) {
 756                         /* Need to complete the last line */
 757                         PyObject *rest = Util_GetLine(self, 0);
 758                         if (rest == NULL) {
 759                                 Py_DECREF(line);
 760                                 goto error;
 761                         }
 762                         PyString_Concat(&line, rest);
 763                         Py_DECREF(rest);
 764                         if (line == NULL)
 765                                 goto error;
 766                 }
 767                 err = PyList_Append(list, line);
 768                 Py_DECREF(line);
 769                 if (err != 0)
 770                         goto error;
 771         }
 772
 773   cleanup:
 774         RELEASE_LOCK(self);
 775         if (big_buffer) {
 776                 Py_DECREF(big_buffer);
 777         }
 778         return list;
 779 }
 780
 781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 782 "xreadlines() -> self\n\
 783 \n\
 784 For backward compatibility. BZ2File objects now include the performance\n\
 785 optimizations previously implemented in the xreadlines module.\n\
 786 ");
 787
 788 PyDoc_STRVAR(BZ2File_write__doc__,
 789 "write(data) -> None\n\
 790 \n\
 791 Write the 'data' string to file. Note that due to buffering, close() may\n\
 792 be needed before the file on disk reflects the data written.\n\
 793 ");
 794
 795 /* This is a hacked version of Python's fileobject.c:file_write(). */
 796 static PyObject *
 797 BZ2File_write(BZ2FileObject *self, PyObject *args)
 798 {
 799         PyObject *ret = NULL;
 800         Py_buffer pbuf;
 801         char *buf;
 802         int len;
 803         int bzerror;
 804
 805         if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
 806                 return NULL;
 807         buf = pbuf.buf;
 808         len = pbuf.len;
 809
 810         ACQUIRE_LOCK(self);
 811         switch (self->mode) {
 812                 case MODE_WRITE:
 813                         break;
 814
 815                 case MODE_CLOSED:
 816                         PyErr_SetString(PyExc_ValueError,
 817                                         "I/O operation on closed file");
 818                         goto cleanup;
 819
 820                 default:
 821                         PyErr_SetString(PyExc_IOError,
 822                                         "file is not ready for writing");
 823                         goto cleanup;
 824         }
 825
 826         self->f_softspace = 0;
 827
 828         Py_BEGIN_ALLOW_THREADS
 829         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 830         self->pos += len;
 831         Py_END_ALLOW_THREADS
 832
 833         if (bzerror != BZ_OK) {
 834                 Util_CatchBZ2Error(bzerror);
 835                 goto cleanup;
 836         }
 837
 838         Py_INCREF(Py_None);
 839         ret = Py_None;
 840
 841 cleanup:
 842         PyBuffer_Release(&pbuf);
 843         RELEASE_LOCK(self);
 844         return ret;
 845 }
 846
 847 PyDoc_STRVAR(BZ2File_writelines__doc__,
 848 "writelines(sequence_of_strings) -> None\n\
 849 \n\
 850 Write the sequence of strings to the file. Note that newlines are not\n\
 851 added. The sequence can be any iterable object producing strings. This is\n\
 852 equivalent to calling write() for each string.\n\
 853 ");
 854
 855 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 856 static PyObject *
 857 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 858 {
 859 #define CHUNKSIZE 1000
 860         PyObject *list = NULL;
 861         PyObject *iter = NULL;
 862         PyObject *ret = NULL;
 863         PyObject *line;
 864         int i, j, index, len, islist;
 865         int bzerror;
 866
 867         ACQUIRE_LOCK(self);
 868         switch (self->mode) {
 869                 case MODE_WRITE:
 870                         break;
 871
 872                 case MODE_CLOSED:
 873                         PyErr_SetString(PyExc_ValueError,
 874                                         "I/O operation on closed file");
 875                         goto error;
 876
 877                 default:
 878                         PyErr_SetString(PyExc_IOError,
 879                                         "file is not ready for writing");
 880                         goto error;
 881         }
 882
 883         islist = PyList_Check(seq);
 884         if  (!islist) {
 885                 iter = PyObject_GetIter(seq);
 886                 if (iter == NULL) {
 887                         PyErr_SetString(PyExc_TypeError,
 888                                 "writelines() requires an iterable argument");
 889                         goto error;
 890                 }
 891                 list = PyList_New(CHUNKSIZE);
 892                 if (list == NULL)
 893                         goto error;
 894         }
 895
 896         /* Strategy: slurp CHUNKSIZE lines into a private list,
 897            checking that they are all strings, then write that list
 898            without holding the interpreter lock, then come back for more. */
 899         for (index = 0; ; index += CHUNKSIZE) {
 900                 if (islist) {
 901                         Py_XDECREF(list);
 902                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 903                         if (list == NULL)
 904                                 goto error;
 905                         j = PyList_GET_SIZE(list);
 906                 }
 907                 else {
 908                         for (j = 0; j < CHUNKSIZE; j++) {
 909                                 line = PyIter_Next(iter);
 910                                 if (line == NULL) {
 911                                         if (PyErr_Occurred())
 912                                                 goto error;
 913                                         break;
 914                                 }
 915                                 PyList_SetItem(list, j, line);
 916                         }
 917                 }
 918                 if (j == 0)
 919                         break;
 920
 921                 /* Check that all entries are indeed strings. If not,
 922                    apply the same rules as for file.write() and
 923                    convert the rets to strings. This is slow, but
 924                    seems to be the only way since all conversion APIs
 925                    could potentially execute Python code. */
 926                 for (i = 0; i < j; i++) {
 927                         PyObject *v = PyList_GET_ITEM(list, i);
 928                         if (!PyString_Check(v)) {
 929                                 const char *buffer;
 930                                 Py_ssize_t len;
 931                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 932                                         PyErr_SetString(PyExc_TypeError,
 933                                                         "writelines() "
 934                                                         "argument must be "
 935                                                         "a sequence of "
 936                                                         "strings");
 937                                         goto error;
 938                                 }
 939                                 line = PyString_FromStringAndSize(buffer,
 940                                                                   len);
 941                                 if (line == NULL)
 942                                         goto error;
 943                                 Py_DECREF(v);
 944                                 PyList_SET_ITEM(list, i, line);
 945                         }
 946                 }
 947
 948                 self->f_softspace = 0;
 949
 950                 /* Since we are releasing the global lock, the
 951                    following code may *not* execute Python code. */
 952                 Py_BEGIN_ALLOW_THREADS
 953                 for (i = 0; i < j; i++) {
 954                         line = PyList_GET_ITEM(list, i);
 955                         len = PyString_GET_SIZE(line);
 956                         BZ2_bzWrite (&bzerror, self->fp,
 957                                      PyString_AS_STRING(line), len);
 958                         if (bzerror != BZ_OK) {
 959                                 Py_BLOCK_THREADS
 960                                 Util_CatchBZ2Error(bzerror);
 961                                 goto error;
 962                         }
 963                 }
 964                 Py_END_ALLOW_THREADS
 965
 966                 if (j < CHUNKSIZE)
 967                         break;
 968         }
 969
 970         Py_INCREF(Py_None);
 971         ret = Py_None;
 972
 973   error:
 974         RELEASE_LOCK(self);
 975         Py_XDECREF(list);
 976         Py_XDECREF(iter);
 977         return ret;
 978 #undef CHUNKSIZE
 979 }
 980
 981 PyDoc_STRVAR(BZ2File_seek__doc__,
 982 "seek(offset [, whence]) -> None\n\
 983 \n\
 984 Move to new file position. Argument offset is a byte count. Optional\n\
 985 argument whence defaults to 0 (offset from start of file, offset\n\
 986 should be >= 0); other values are 1 (move relative to current position,\n\
 987 positive or negative), and 2 (move relative to end of file, usually\n\
 988 negative, although many platforms allow seeking beyond the end of a file).\n\
 989 \n\
 990 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 991 the operation may be extremely slow.\n\
 992 ");
 993
 994 static PyObject *
 995 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 996 {
 997         int where = 0;
 998         PyObject *offobj;
 999         Py_off_t offset;
1000         char small_buffer[SMALLCHUNK];
1001         char *buffer = small_buffer;
1002         size_t buffersize = SMALLCHUNK;
1003         Py_off_t bytesread = 0;
1004         size_t readsize;
1005         int chunksize;
1006         int bzerror;
1007         PyObject *ret = NULL;
1008
1009         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1010                 return NULL;
1011 #if !defined(HAVE_LARGEFILE_SUPPORT)
1012         offset = PyInt_AsLong(offobj);
1013 #else
1014         offset = PyLong_Check(offobj) ?
1015                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1016 #endif
1017         if (PyErr_Occurred())
1018                 return NULL;
1019
1020         ACQUIRE_LOCK(self);
1021         Util_DropReadAhead(self);
1022         switch (self->mode) {
1023                 case MODE_READ:
1024                 case MODE_READ_EOF:
1025                         break;
1026
1027                 case MODE_CLOSED:
1028                         PyErr_SetString(PyExc_ValueError,
1029                                         "I/O operation on closed file");
1030                         goto cleanup;
1031
1032                 default:
1033                         PyErr_SetString(PyExc_IOError,
1034                                         "seek works only while reading");
1035                         goto cleanup;
1036         }
1037
1038         if (where == 2) {
1039                 if (self->size == -1) {
1040                         assert(self->mode != MODE_READ_EOF);
1041                         for (;;) {
1042                                 Py_BEGIN_ALLOW_THREADS
1043                                 chunksize = Util_UnivNewlineRead(
1044                                                 &bzerror, self->fp,
1045                                                 buffer, buffersize,
1046                                                 self);
1047                                 self->pos += chunksize;
1048                                 Py_END_ALLOW_THREADS
1049
1050                                 bytesread += chunksize;
1051                                 if (bzerror == BZ_STREAM_END) {
1052                                         break;
1053                                 } else if (bzerror != BZ_OK) {
1054                                         Util_CatchBZ2Error(bzerror);
1055                                         goto cleanup;
1056                                 }
1057                         }
1058                         self->mode = MODE_READ_EOF;
1059                         self->size = self->pos;
1060                         bytesread = 0;
1061                 }
1062                 offset = self->size + offset;
1063         } else if (where == 1) {
1064                 offset = self->pos + offset;
1065         }
1066
1067         /* Before getting here, offset must be the absolute position the file
1068          * pointer should be set to. */
1069
1070         if (offset >= self->pos) {
1071                 /* we can move forward */
1072                 offset -= self->pos;
1073         } else {
1074                 /* we cannot move back, so rewind the stream */
1075                 BZ2_bzReadClose(&bzerror, self->fp);
1076                 if (self->fp) {
1077                         PyFile_DecUseCount((PyFileObject *)self->file);
1078                         self->fp = NULL;
1079                 }
1080                 if (bzerror != BZ_OK) {
1081                         Util_CatchBZ2Error(bzerror);
1082                         goto cleanup;
1083                 }
1084                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1085                 if (!ret)
1086                         goto cleanup;
1087                 Py_DECREF(ret);
1088                 ret = NULL;
1089                 self->pos = 0;
1090                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1091                                           0, 0, NULL, 0);
1092                 if (self->fp)
1093                         PyFile_IncUseCount((PyFileObject *)self->file);
1094                 if (bzerror != BZ_OK) {
1095                         Util_CatchBZ2Error(bzerror);
1096                         goto cleanup;
1097                 }
1098                 self->mode = MODE_READ;
1099         }
1100
1101         if (offset <= 0 || self->mode == MODE_READ_EOF)
1102                 goto exit;
1103
1104         /* Before getting here, offset must be set to the number of bytes
1105          * to walk forward. */
1106         for (;;) {
1107                 if (offset-bytesread > buffersize)
1108                         readsize = buffersize;
1109                 else
1110                         /* offset might be wider that readsize, but the result
1111                          * of the subtraction is bound by buffersize (see the
1112                          * condition above). buffersize is 8192. */
1113                         readsize = (size_t)(offset-bytesread);
1114                 Py_BEGIN_ALLOW_THREADS
1115                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1116                                                  buffer, readsize, self);
1117                 self->pos += chunksize;
1118                 Py_END_ALLOW_THREADS
1119                 bytesread += chunksize;
1120                 if (bzerror == BZ_STREAM_END) {
1121                         self->size = self->pos;
1122                         self->mode = MODE_READ_EOF;
1123                         break;
1124                 } else if (bzerror != BZ_OK) {
1125                         Util_CatchBZ2Error(bzerror);
1126                         goto cleanup;
1127                 }
1128                 if (bytesread == offset)
1129                         break;
1130         }
1131
1132 exit:
1133         Py_INCREF(Py_None);
1134         ret = Py_None;
1135
1136 cleanup:
1137         RELEASE_LOCK(self);
1138         return ret;
1139 }
1140
1141 PyDoc_STRVAR(BZ2File_tell__doc__,
1142 "tell() -> int\n\
1143 \n\
1144 Return the current file position, an integer (may be a long integer).\n\
1145 ");
1146
1147 static PyObject *
1148 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1149 {
1150         PyObject *ret = NULL;
1151
1152         if (self->mode == MODE_CLOSED) {
1153                 PyErr_SetString(PyExc_ValueError,
1154                                 "I/O operation on closed file");
1155                 goto cleanup;
1156         }
1157
1158 #if !defined(HAVE_LARGEFILE_SUPPORT)
1159         ret = PyInt_FromLong(self->pos);
1160 #else
1161         ret = PyLong_FromLongLong(self->pos);
1162 #endif
1163
1164 cleanup:
1165         return ret;
1166 }
1167
1168 PyDoc_STRVAR(BZ2File_close__doc__,
1169 "close() -> None or (perhaps) an integer\n\
1170 \n\
1171 Close the file. Sets data attribute .closed to true. A closed file\n\
1172 cannot be used for further I/O operations. close() may be called more\n\
1173 than once without error.\n\
1174 ");
1175
1176 static PyObject *
1177 BZ2File_close(BZ2FileObject *self)
1178 {
1179         PyObject *ret = NULL;
1180         int bzerror = BZ_OK;
1181
1182         ACQUIRE_LOCK(self);
1183         switch (self->mode) {
1184                 case MODE_READ:
1185                 case MODE_READ_EOF:
1186                         BZ2_bzReadClose(&bzerror, self->fp);
1187                         break;
1188                 case MODE_WRITE:
1189                         BZ2_bzWriteClose(&bzerror, self->fp,
1190                                          0, NULL, NULL);
1191                         break;
1192         }
1193         if (self->fp) {
1194                 PyFile_DecUseCount((PyFileObject *)self->file);
1195                 self->fp = NULL;
1196         }
1197         self->mode = MODE_CLOSED;
1198         ret = PyObject_CallMethod(self->file, "close", NULL);
1199         if (bzerror != BZ_OK) {
1200                 Util_CatchBZ2Error(bzerror);
1201                 Py_XDECREF(ret);
1202                 ret = NULL;
1203         }
1204
1205         RELEASE_LOCK(self);
1206         return ret;
1207 }
1208
1209 PyDoc_STRVAR(BZ2File_enter_doc,
1210 "__enter__() -> self.");
1211
1212 static PyObject *
1213 BZ2File_enter(BZ2FileObject *self)
1214 {
1215         if (self->mode == MODE_CLOSED) {
1216                 PyErr_SetString(PyExc_ValueError,
1217                         "I/O operation on closed file");
1218                 return NULL;
1219         }
1220         Py_INCREF(self);
1221         return (PyObject *) self;
1222 }
1223
1224 PyDoc_STRVAR(BZ2File_exit_doc,
1225 "__exit__(*excinfo) -> None.  Closes the file.");
1226
1227 static PyObject *
1228 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1229 {
1230         PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1231         if (!ret)
1232                 /* If error occurred, pass through */
1233                 return NULL;
1234         Py_DECREF(ret);
1235         Py_RETURN_NONE;
1236 }
1237
1238
1239 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1240
1241 static PyMethodDef BZ2File_methods[] = {
1242         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1243         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1244         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1245         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1246         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1247         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1248         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1249         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1250         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1251         {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1252         {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1253         {NULL,          NULL}           /* sentinel */
1254 };
1255
1256
1257 /* ===================================================================== */
1258 /* Getters and setters of BZ2File. */
1259
1260 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1261 static PyObject *
1262 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1263 {
1264         switch (self->f_newlinetypes) {
1265         case NEWLINE_UNKNOWN:
1266                 Py_INCREF(Py_None);
1267                 return Py_None;
1268         case NEWLINE_CR:
1269                 return PyString_FromString("\r");
1270         case NEWLINE_LF:
1271                 return PyString_FromString("\n");
1272         case NEWLINE_CR|NEWLINE_LF:
1273                 return Py_BuildValue("(ss)", "\r", "\n");
1274         case NEWLINE_CRLF:
1275                 return PyString_FromString("\r\n");
1276         case NEWLINE_CR|NEWLINE_CRLF:
1277                 return Py_BuildValue("(ss)", "\r", "\r\n");
1278         case NEWLINE_LF|NEWLINE_CRLF:
1279                 return Py_BuildValue("(ss)", "\n", "\r\n");
1280         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1281                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1282         default:
1283                 PyErr_Format(PyExc_SystemError,
1284                              "Unknown newlines value 0x%x\n",
1285                              self->f_newlinetypes);
1286                 return NULL;
1287         }
1288 }
1289
1290 static PyObject *
1291 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1292 {
1293         return PyInt_FromLong(self->mode == MODE_CLOSED);
1294 }
1295
1296 static PyObject *
1297 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1298 {
1299         return PyObject_GetAttrString(self->file, "mode");
1300 }
1301
1302 static PyObject *
1303 BZ2File_get_name(BZ2FileObject *self, void *closure)
1304 {
1305         return PyObject_GetAttrString(self->file, "name");
1306 }
1307
1308 static PyGetSetDef BZ2File_getset[] = {
1309         {"closed", (getter)BZ2File_get_closed, NULL,
1310                         "True if the file is closed"},
1311         {"newlines", (getter)BZ2File_get_newlines, NULL,
1312                         "end-of-line convention used in this file"},
1313         {"mode", (getter)BZ2File_get_mode, NULL,
1314                         "file mode ('r', 'w', or 'U')"},
1315         {"name", (getter)BZ2File_get_name, NULL,
1316                         "file name"},
1317         {NULL}  /* Sentinel */
1318 };
1319
1320
1321 /* ===================================================================== */
1322 /* Members of BZ2File_Type. */
1323
1324 #undef OFF
1325 #define OFF(x) offsetof(BZ2FileObject, x)
1326
1327 static PyMemberDef BZ2File_members[] = {
1328         {"softspace",   T_INT,          OFF(f_softspace), 0,
1329          "flag indicating that a space needs to be printed; used by print"},
1330         {NULL}  /* Sentinel */
1331 };
1332
1333 /* ===================================================================== */
1334 /* Slot definitions for BZ2File_Type. */
1335
1336 static int
1337 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1338 {
1339         static char *kwlist[] = {"filename", "mode", "buffering",
1340                                        "compresslevel", 0};
1341         PyObject *name;
1342         char *mode = "r";
1343         int buffering = -1;
1344         int compresslevel = 9;
1345         int bzerror;
1346         int mode_char = 0;
1347
1348         self->size = -1;
1349
1350         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1351                                          kwlist, &name, &mode, &buffering,
1352                                          &compresslevel))
1353                 return -1;
1354
1355         if (compresslevel < 1 || compresslevel > 9) {
1356                 PyErr_SetString(PyExc_ValueError,
1357                                 "compresslevel must be between 1 and 9");
1358                 return -1;
1359         }
1360
1361         for (;;) {
1362                 int error = 0;
1363                 switch (*mode) {
1364                         case 'r':
1365                         case 'w':
1366                                 if (mode_char)
1367                                         error = 1;
1368                                 mode_char = *mode;
1369                                 break;
1370
1371                         case 'b':
1372                                 break;
1373
1374                         case 'U':
1375 #ifdef __VMS
1376                                 self->f_univ_newline = 0;
1377 #else
1378                                 self->f_univ_newline = 1;
1379 #endif
1380                                 break;
1381
1382                         default:
1383                                 error = 1;
1384                                 break;
1385                 }
1386                 if (error) {
1387                         PyErr_Format(PyExc_ValueError,
1388                                      "invalid mode char %c", *mode);
1389                         return -1;
1390                 }
1391                 mode++;
1392                 if (*mode == '\0')
1393                         break;
1394         }
1395
1396         if (mode_char == 0) {
1397                 mode_char = 'r';
1398         }
1399
1400         mode = (mode_char == 'r') ? "rb" : "wb";
1401
1402         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1403                                            name, mode, buffering);
1404         if (self->file == NULL)
1405                 return -1;
1406
1407         /* From now on, we have stuff to dealloc, so jump to error label
1408          * instead of returning */
1409
1410 #ifdef WITH_THREAD
1411         self->lock = PyThread_allocate_lock();
1412         if (!self->lock) {
1413                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1414                 goto error;
1415         }
1416 #endif
1417
1418         if (mode_char == 'r')
1419                 self->fp = BZ2_bzReadOpen(&bzerror,
1420                                           PyFile_AsFile(self->file),
1421                                           0, 0, NULL, 0);
1422         else
1423                 self->fp = BZ2_bzWriteOpen(&bzerror,
1424                                            PyFile_AsFile(self->file),
1425                                            compresslevel, 0, 0);
1426
1427         if (bzerror != BZ_OK) {
1428                 Util_CatchBZ2Error(bzerror);
1429                 goto error;
1430         }
1431         PyFile_IncUseCount((PyFileObject *)self->file);
1432
1433         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1434
1435         return 0;
1436
1437 error:
1438         Py_CLEAR(self->file);
1439 #ifdef WITH_THREAD
1440         if (self->lock) {
1441                 PyThread_free_lock(self->lock);
1442                 self->lock = NULL;
1443         }
1444 #endif
1445         return -1;
1446 }
1447
1448 static void
1449 BZ2File_dealloc(BZ2FileObject *self)
1450 {
1451         int bzerror;
1452 #ifdef WITH_THREAD
1453         if (self->lock)
1454                 PyThread_free_lock(self->lock);
1455 #endif
1456         switch (self->mode) {
1457                 case MODE_READ:
1458                 case MODE_READ_EOF:
1459                         BZ2_bzReadClose(&bzerror, self->fp);
1460                         break;
1461                 case MODE_WRITE:
1462                         BZ2_bzWriteClose(&bzerror, self->fp,
1463                                          0, NULL, NULL);
1464                         break;
1465         }
1466         if (self->fp) {
1467                 PyFile_DecUseCount((PyFileObject *)self->file);
1468                 self->fp = NULL;
1469         }
1470         Util_DropReadAhead(self);
1471         Py_XDECREF(self->file);
1472         Py_TYPE(self)->tp_free((PyObject *)self);
1473 }
1474
1475 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1476 static PyObject *
1477 BZ2File_getiter(BZ2FileObject *self)
1478 {
1479         if (self->mode == MODE_CLOSED) {
1480                 PyErr_SetString(PyExc_ValueError,
1481                                 "I/O operation on closed file");
1482                 return NULL;
1483         }
1484         Py_INCREF((PyObject*)self);
1485         return (PyObject *)self;
1486 }
1487
1488 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1489 #define READAHEAD_BUFSIZE 8192
1490 static PyObject *
1491 BZ2File_iternext(BZ2FileObject *self)
1492 {
1493         PyStringObject* ret;
1494         ACQUIRE_LOCK(self);
1495         if (self->mode == MODE_CLOSED) {
1496                 RELEASE_LOCK(self);
1497                 PyErr_SetString(PyExc_ValueError,
1498                                 "I/O operation on closed file");
1499                 return NULL;
1500         }
1501         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1502         RELEASE_LOCK(self);
1503         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1504                 Py_XDECREF(ret);
1505                 return NULL;
1506         }
1507         return (PyObject *)ret;
1508 }
1509
1510 /* ===================================================================== */
1511 /* BZ2File_Type definition. */
1512
1513 PyDoc_VAR(BZ2File__doc__) =
1514 PyDoc_STR(
1515 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1516 \n\
1517 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1518 writing. When opened for writing, the file will be created if it doesn't\n\
1519 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1520 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1521 is given, must be a number between 1 and 9.\n\
1522 ")
1523 PyDoc_STR(
1524 "\n\
1525 Add a 'U' to mode to open the file for input with universal newline\n\
1526 support. Any line ending in the input file will be seen as a '\\n' in\n\
1527 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1528 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1529 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1530 newlines are available only when reading.\n\
1531 ")
1532 ;
1533
1534 static PyTypeObject BZ2File_Type = {
1535         PyVarObject_HEAD_INIT(NULL, 0)
1536         "bz2.BZ2File",          /*tp_name*/
1537         sizeof(BZ2FileObject),  /*tp_basicsize*/
1538         0,                      /*tp_itemsize*/
1539         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1540         0,                      /*tp_print*/
1541         0,                      /*tp_getattr*/
1542         0,                      /*tp_setattr*/
1543         0,                      /*tp_compare*/
1544         0,                      /*tp_repr*/
1545         0,                      /*tp_as_number*/
1546         0,                      /*tp_as_sequence*/
1547         0,                      /*tp_as_mapping*/
1548         0,                      /*tp_hash*/
1549         0,                      /*tp_call*/
1550         0,                      /*tp_str*/
1551         PyObject_GenericGetAttr,/*tp_getattro*/
1552         PyObject_GenericSetAttr,/*tp_setattro*/
1553         0,                      /*tp_as_buffer*/
1554         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1555         BZ2File__doc__,         /*tp_doc*/
1556         0,                      /*tp_traverse*/
1557         0,                      /*tp_clear*/
1558         0,                      /*tp_richcompare*/
1559         0,                      /*tp_weaklistoffset*/
1560         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1561         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1562         BZ2File_methods,        /*tp_methods*/
1563         BZ2File_members,        /*tp_members*/
1564         BZ2File_getset,         /*tp_getset*/
1565         0,                      /*tp_base*/
1566         0,                      /*tp_dict*/
1567         0,                      /*tp_descr_get*/
1568         0,                      /*tp_descr_set*/
1569         0,                      /*tp_dictoffset*/
1570         (initproc)BZ2File_init, /*tp_init*/
1571         PyType_GenericAlloc,    /*tp_alloc*/
1572         PyType_GenericNew,      /*tp_new*/
1573         _PyObject_Del,          /*tp_free*/
1574         0,                      /*tp_is_gc*/
1575 };
1576
1577
1578 /* ===================================================================== */
1579 /* Methods of BZ2Comp. */
1580
1581 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1582 "compress(data) -> string\n\
1583 \n\
1584 Provide more data to the compressor object. It will return chunks of\n\
1585 compressed data whenever possible. When you've finished providing data\n\
1586 to compress, call the flush() method to finish the compression process,\n\
1587 and return what is left in the internal buffers.\n\
1588 ");
1589
1590 static PyObject *
1591 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1592 {
1593         Py_buffer pdata;
1594         char *data;
1595         int datasize;
1596         int bufsize = SMALLCHUNK;
1597         PY_LONG_LONG totalout;
1598         PyObject *ret = NULL;
1599         bz_stream *bzs = &self->bzs;
1600         int bzerror;
1601
1602         if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1603                 return NULL;
1604         data = pdata.buf;
1605         datasize = pdata.len;
1606
1607         if (datasize == 0) {
1608                 PyBuffer_Release(&pdata);
1609                 return PyString_FromString("");
1610         }
1611
1612         ACQUIRE_LOCK(self);
1613         if (!self->running) {
1614                 PyErr_SetString(PyExc_ValueError,
1615                                 "this object was already flushed");
1616                 goto error;
1617         }
1618
1619         ret = PyString_FromStringAndSize(NULL, bufsize);
1620         if (!ret)
1621                 goto error;
1622
1623         bzs->next_in = data;
1624         bzs->avail_in = datasize;
1625         bzs->next_out = BUF(ret);
1626         bzs->avail_out = bufsize;
1627
1628         totalout = BZS_TOTAL_OUT(bzs);
1629
1630         for (;;) {
1631                 Py_BEGIN_ALLOW_THREADS
1632                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1633                 Py_END_ALLOW_THREADS
1634                 if (bzerror != BZ_RUN_OK) {
1635                         Util_CatchBZ2Error(bzerror);
1636                         goto error;
1637                 }
1638                 if (bzs->avail_in == 0)
1639                         break; /* no more input data */
1640                 if (bzs->avail_out == 0) {
1641                         bufsize = Util_NewBufferSize(bufsize);
1642                         if (_PyString_Resize(&ret, bufsize) < 0) {
1643                                 BZ2_bzCompressEnd(bzs);
1644                                 goto error;
1645                         }
1646                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1647                                                     - totalout);
1648                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1649                 }
1650         }
1651
1652         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1653
1654         RELEASE_LOCK(self);
1655         PyBuffer_Release(&pdata);
1656         return ret;
1657
1658 error:
1659         RELEASE_LOCK(self);
1660         PyBuffer_Release(&pdata);
1661         Py_XDECREF(ret);
1662         return NULL;
1663 }
1664
1665 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1666 "flush() -> string\n\
1667 \n\
1668 Finish the compression process and return what is left in internal buffers.\n\
1669 You must not use the compressor object after calling this method.\n\
1670 ");
1671
1672 static PyObject *
1673 BZ2Comp_flush(BZ2CompObject *self)
1674 {
1675         int bufsize = SMALLCHUNK;
1676         PyObject *ret = NULL;
1677         bz_stream *bzs = &self->bzs;
1678         PY_LONG_LONG totalout;
1679         int bzerror;
1680
1681         ACQUIRE_LOCK(self);
1682         if (!self->running) {
1683                 PyErr_SetString(PyExc_ValueError, "object was already "
1684                                                   "flushed");
1685                 goto error;
1686         }
1687         self->running = 0;
1688
1689         ret = PyString_FromStringAndSize(NULL, bufsize);
1690         if (!ret)
1691                 goto error;
1692
1693         bzs->next_out = BUF(ret);
1694         bzs->avail_out = bufsize;
1695
1696         totalout = BZS_TOTAL_OUT(bzs);
1697
1698         for (;;) {
1699                 Py_BEGIN_ALLOW_THREADS
1700                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1701                 Py_END_ALLOW_THREADS
1702                 if (bzerror == BZ_STREAM_END) {
1703                         break;
1704                 } else if (bzerror != BZ_FINISH_OK) {
1705                         Util_CatchBZ2Error(bzerror);
1706                         goto error;
1707                 }
1708                 if (bzs->avail_out == 0) {
1709                         bufsize = Util_NewBufferSize(bufsize);
1710                         if (_PyString_Resize(&ret, bufsize) < 0)
1711                                 goto error;
1712                         bzs->next_out = BUF(ret);
1713                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1714                                                     - totalout);
1715                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1716                 }
1717         }
1718
1719         if (bzs->avail_out != 0)
1720                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1721
1722         RELEASE_LOCK(self);
1723         return ret;
1724
1725 error:
1726         RELEASE_LOCK(self);
1727         Py_XDECREF(ret);
1728         return NULL;
1729 }
1730
1731 static PyMethodDef BZ2Comp_methods[] = {
1732         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1733          BZ2Comp_compress__doc__},
1734         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1735          BZ2Comp_flush__doc__},
1736         {NULL,          NULL}           /* sentinel */
1737 };
1738
1739
1740 /* ===================================================================== */
1741 /* Slot definitions for BZ2Comp_Type. */
1742
1743 static int
1744 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1745 {
1746         int compresslevel = 9;
1747         int bzerror;
1748         static char *kwlist[] = {"compresslevel", 0};
1749
1750         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1751                                          kwlist, &compresslevel))
1752                 return -1;
1753
1754         if (compresslevel < 1 || compresslevel > 9) {
1755                 PyErr_SetString(PyExc_ValueError,
1756                                 "compresslevel must be between 1 and 9");
1757                 goto error;
1758         }
1759
1760 #ifdef WITH_THREAD
1761         self->lock = PyThread_allocate_lock();
1762         if (!self->lock) {
1763                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1764                 goto error;
1765         }
1766 #endif
1767
1768         memset(&self->bzs, 0, sizeof(bz_stream));
1769         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1770         if (bzerror != BZ_OK) {
1771                 Util_CatchBZ2Error(bzerror);
1772                 goto error;
1773         }
1774
1775         self->running = 1;
1776
1777         return 0;
1778 error:
1779 #ifdef WITH_THREAD
1780         if (self->lock) {
1781                 PyThread_free_lock(self->lock);
1782                 self->lock = NULL;
1783         }
1784 #endif
1785         return -1;
1786 }
1787
1788 static void
1789 BZ2Comp_dealloc(BZ2CompObject *self)
1790 {
1791 #ifdef WITH_THREAD
1792         if (self->lock)
1793                 PyThread_free_lock(self->lock);
1794 #endif
1795         BZ2_bzCompressEnd(&self->bzs);
1796         Py_TYPE(self)->tp_free((PyObject *)self);
1797 }
1798
1799
1800 /* ===================================================================== */
1801 /* BZ2Comp_Type definition. */
1802
1803 PyDoc_STRVAR(BZ2Comp__doc__,
1804 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1805 \n\
1806 Create a new compressor object. This object may be used to compress\n\
1807 data sequentially. If you want to compress data in one shot, use the\n\
1808 compress() function instead. The compresslevel parameter, if given,\n\
1809 must be a number between 1 and 9.\n\
1810 ");
1811
1812 static PyTypeObject BZ2Comp_Type = {
1813         PyVarObject_HEAD_INIT(NULL, 0)
1814         "bz2.BZ2Compressor",    /*tp_name*/
1815         sizeof(BZ2CompObject),  /*tp_basicsize*/
1816         0,                      /*tp_itemsize*/
1817         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1818         0,                      /*tp_print*/
1819         0,                      /*tp_getattr*/
1820         0,                      /*tp_setattr*/
1821         0,                      /*tp_compare*/
1822         0,                      /*tp_repr*/
1823         0,                      /*tp_as_number*/
1824         0,                      /*tp_as_sequence*/
1825         0,                      /*tp_as_mapping*/
1826         0,                      /*tp_hash*/
1827         0,                      /*tp_call*/
1828         0,                      /*tp_str*/
1829         PyObject_GenericGetAttr,/*tp_getattro*/
1830         PyObject_GenericSetAttr,/*tp_setattro*/
1831         0,                      /*tp_as_buffer*/
1832         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1833         BZ2Comp__doc__,         /*tp_doc*/
1834         0,                      /*tp_traverse*/
1835         0,                      /*tp_clear*/
1836         0,                      /*tp_richcompare*/
1837         0,                      /*tp_weaklistoffset*/
1838         0,                      /*tp_iter*/
1839         0,                      /*tp_iternext*/
1840         BZ2Comp_methods,        /*tp_methods*/
1841         0,                      /*tp_members*/
1842         0,                      /*tp_getset*/
1843         0,                      /*tp_base*/
1844         0,                      /*tp_dict*/
1845         0,                      /*tp_descr_get*/
1846         0,                      /*tp_descr_set*/
1847         0,                      /*tp_dictoffset*/
1848         (initproc)BZ2Comp_init, /*tp_init*/
1849         PyType_GenericAlloc,    /*tp_alloc*/
1850         PyType_GenericNew,      /*tp_new*/
1851         _PyObject_Del,          /*tp_free*/
1852         0,                      /*tp_is_gc*/
1853 };
1854
1855
1856 /* ===================================================================== */
1857 /* Members of BZ2Decomp. */
1858
1859 #undef OFF
1860 #define OFF(x) offsetof(BZ2DecompObject, x)
1861
1862 static PyMemberDef BZ2Decomp_members[] = {
1863         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1864         {NULL}  /* Sentinel */
1865 };
1866
1867
1868 /* ===================================================================== */
1869 /* Methods of BZ2Decomp. */
1870
1871 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1872 "decompress(data) -> string\n\
1873 \n\
1874 Provide more data to the decompressor object. It will return chunks\n\
1875 of decompressed data whenever possible. If you try to decompress data\n\
1876 after the end of stream is found, EOFError will be raised. If any data\n\
1877 was found after the end of stream, it'll be ignored and saved in\n\
1878 unused_data attribute.\n\
1879 ");
1880
1881 static PyObject *
1882 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1883 {
1884         Py_buffer pdata;
1885         char *data;
1886         int datasize;
1887         int bufsize = SMALLCHUNK;
1888         PY_LONG_LONG totalout;
1889         PyObject *ret = NULL;
1890         bz_stream *bzs = &self->bzs;
1891         int bzerror;
1892
1893         if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1894                 return NULL;
1895         data = pdata.buf;
1896         datasize = pdata.len;
1897
1898         ACQUIRE_LOCK(self);
1899         if (!self->running) {
1900                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1901                                                 "already found");
1902                 goto error;
1903         }
1904
1905         ret = PyString_FromStringAndSize(NULL, bufsize);
1906         if (!ret)
1907                 goto error;
1908
1909         bzs->next_in = data;
1910         bzs->avail_in = datasize;
1911         bzs->next_out = BUF(ret);
1912         bzs->avail_out = bufsize;
1913
1914         totalout = BZS_TOTAL_OUT(bzs);
1915
1916         for (;;) {
1917                 Py_BEGIN_ALLOW_THREADS
1918                 bzerror = BZ2_bzDecompress(bzs);
1919                 Py_END_ALLOW_THREADS
1920                 if (bzerror == BZ_STREAM_END) {
1921                         if (bzs->avail_in != 0) {
1922                                 Py_DECREF(self->unused_data);
1923                                 self->unused_data =
1924                                     PyString_FromStringAndSize(bzs->next_in,
1925                                                                bzs->avail_in);
1926                         }
1927                         self->running = 0;
1928                         break;
1929                 }
1930                 if (bzerror != BZ_OK) {
1931                         Util_CatchBZ2Error(bzerror);
1932                         goto error;
1933                 }
1934                 if (bzs->avail_in == 0)
1935                         break; /* no more input data */
1936                 if (bzs->avail_out == 0) {
1937                         bufsize = Util_NewBufferSize(bufsize);
1938                         if (_PyString_Resize(&ret, bufsize) < 0) {
1939                                 BZ2_bzDecompressEnd(bzs);
1940                                 goto error;
1941                         }
1942                         bzs->next_out = BUF(ret);
1943                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1944                                                     - totalout);
1945                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1946                 }
1947         }
1948
1949         if (bzs->avail_out != 0)
1950                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1951
1952         RELEASE_LOCK(self);
1953         PyBuffer_Release(&pdata);
1954         return ret;
1955
1956 error:
1957         RELEASE_LOCK(self);
1958         PyBuffer_Release(&pdata);
1959         Py_XDECREF(ret);
1960         return NULL;
1961 }
1962
1963 static PyMethodDef BZ2Decomp_methods[] = {
1964         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1965         {NULL,          NULL}           /* sentinel */
1966 };
1967
1968
1969 /* ===================================================================== */
1970 /* Slot definitions for BZ2Decomp_Type. */
1971
1972 static int
1973 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1974 {
1975         int bzerror;
1976
1977         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1978                 return -1;
1979
1980 #ifdef WITH_THREAD
1981         self->lock = PyThread_allocate_lock();
1982         if (!self->lock) {
1983                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1984                 goto error;
1985         }
1986 #endif
1987
1988         self->unused_data = PyString_FromString("");
1989         if (!self->unused_data)
1990                 goto error;
1991
1992         memset(&self->bzs, 0, sizeof(bz_stream));
1993         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1994         if (bzerror != BZ_OK) {
1995                 Util_CatchBZ2Error(bzerror);
1996                 goto error;
1997         }
1998
1999         self->running = 1;
2000
2001         return 0;
2002
2003 error:
2004 #ifdef WITH_THREAD
2005         if (self->lock) {
2006                 PyThread_free_lock(self->lock);
2007                 self->lock = NULL;
2008         }
2009 #endif
2010         Py_CLEAR(self->unused_data);
2011         return -1;
2012 }
2013
2014 static void
2015 BZ2Decomp_dealloc(BZ2DecompObject *self)
2016 {
2017 #ifdef WITH_THREAD
2018         if (self->lock)
2019                 PyThread_free_lock(self->lock);
2020 #endif
2021         Py_XDECREF(self->unused_data);
2022         BZ2_bzDecompressEnd(&self->bzs);
2023         Py_TYPE(self)->tp_free((PyObject *)self);
2024 }
2025
2026
2027 /* ===================================================================== */
2028 /* BZ2Decomp_Type definition. */
2029
2030 PyDoc_STRVAR(BZ2Decomp__doc__,
2031 "BZ2Decompressor() -> decompressor object\n\
2032 \n\
2033 Create a new decompressor object. This object may be used to decompress\n\
2034 data sequentially. If you want to decompress data in one shot, use the\n\
2035 decompress() function instead.\n\
2036 ");
2037
2038 static PyTypeObject BZ2Decomp_Type = {
2039         PyVarObject_HEAD_INIT(NULL, 0)
2040         "bz2.BZ2Decompressor",  /*tp_name*/
2041         sizeof(BZ2DecompObject), /*tp_basicsize*/
2042         0,                      /*tp_itemsize*/
2043         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2044         0,                      /*tp_print*/
2045         0,                      /*tp_getattr*/
2046         0,                      /*tp_setattr*/
2047         0,                      /*tp_compare*/
2048         0,                      /*tp_repr*/
2049         0,                      /*tp_as_number*/
2050         0,                      /*tp_as_sequence*/
2051         0,                      /*tp_as_mapping*/
2052         0,                      /*tp_hash*/
2053         0,                      /*tp_call*/
2054         0,                      /*tp_str*/
2055         PyObject_GenericGetAttr,/*tp_getattro*/
2056         PyObject_GenericSetAttr,/*tp_setattro*/
2057         0,                      /*tp_as_buffer*/
2058         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2059         BZ2Decomp__doc__,       /*tp_doc*/
2060         0,                      /*tp_traverse*/
2061         0,                      /*tp_clear*/
2062         0,                      /*tp_richcompare*/
2063         0,                      /*tp_weaklistoffset*/
2064         0,                      /*tp_iter*/
2065         0,                      /*tp_iternext*/
2066         BZ2Decomp_methods,      /*tp_methods*/
2067         BZ2Decomp_members,      /*tp_members*/
2068         0,                      /*tp_getset*/
2069         0,                      /*tp_base*/
2070         0,                      /*tp_dict*/
2071         0,                      /*tp_descr_get*/
2072         0,                      /*tp_descr_set*/
2073         0,                      /*tp_dictoffset*/
2074         (initproc)BZ2Decomp_init, /*tp_init*/
2075         PyType_GenericAlloc,    /*tp_alloc*/
2076         PyType_GenericNew,      /*tp_new*/
2077         _PyObject_Del,          /*tp_free*/
2078         0,                      /*tp_is_gc*/
2079 };
2080
2081
2082 /* ===================================================================== */
2083 /* Module functions. */
2084
2085 PyDoc_STRVAR(bz2_compress__doc__,
2086 "compress(data [, compresslevel=9]) -> string\n\
2087 \n\
2088 Compress data in one shot. If you want to compress data sequentially,\n\
2089 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2090 given, must be a number between 1 and 9.\n\
2091 ");
2092
2093 static PyObject *
2094 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2095 {
2096         int compresslevel=9;
2097         Py_buffer pdata;
2098         char *data;
2099         int datasize;
2100         int bufsize;
2101         PyObject *ret = NULL;
2102         bz_stream _bzs;
2103         bz_stream *bzs = &_bzs;
2104         int bzerror;
2105         static char *kwlist[] = {"data", "compresslevel", 0};
2106
2107         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2108                                          kwlist, &pdata,
2109                                          &compresslevel))
2110                 return NULL;
2111         data = pdata.buf;
2112         datasize = pdata.len;
2113
2114         if (compresslevel < 1 || compresslevel > 9) {
2115                 PyErr_SetString(PyExc_ValueError,
2116                                 "compresslevel must be between 1 and 9");
2117                 PyBuffer_Release(&pdata);
2118                 return NULL;
2119         }
2120
2121         /* Conforming to bz2 manual, this is large enough to fit compressed
2122          * data in one shot. We will check it later anyway. */
2123         bufsize = datasize + (datasize/100+1) + 600;
2124
2125         ret = PyString_FromStringAndSize(NULL, bufsize);
2126         if (!ret) {
2127                 PyBuffer_Release(&pdata);
2128                 return NULL;
2129         }
2130
2131         memset(bzs, 0, sizeof(bz_stream));
2132
2133         bzs->next_in = data;
2134         bzs->avail_in = datasize;
2135         bzs->next_out = BUF(ret);
2136         bzs->avail_out = bufsize;
2137
2138         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2139         if (bzerror != BZ_OK) {
2140                 Util_CatchBZ2Error(bzerror);
2141                 PyBuffer_Release(&pdata);
2142                 Py_DECREF(ret);
2143                 return NULL;
2144         }
2145
2146         for (;;) {
2147                 Py_BEGIN_ALLOW_THREADS
2148                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2149                 Py_END_ALLOW_THREADS
2150                 if (bzerror == BZ_STREAM_END) {
2151                         break;
2152                 } else if (bzerror != BZ_FINISH_OK) {
2153                         BZ2_bzCompressEnd(bzs);
2154                         Util_CatchBZ2Error(bzerror);
2155                         PyBuffer_Release(&pdata);
2156                         Py_DECREF(ret);
2157                         return NULL;
2158                 }
2159                 if (bzs->avail_out == 0) {
2160                         bufsize = Util_NewBufferSize(bufsize);
2161                         if (_PyString_Resize(&ret, bufsize) < 0) {
2162                                 BZ2_bzCompressEnd(bzs);
2163                                 PyBuffer_Release(&pdata);
2164                                 Py_DECREF(ret);
2165                                 return NULL;
2166                         }
2167                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2168                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2169                 }
2170         }
2171
2172         if (bzs->avail_out != 0)
2173                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2174         BZ2_bzCompressEnd(bzs);
2175
2176         PyBuffer_Release(&pdata);
2177         return ret;
2178 }
2179
2180 PyDoc_STRVAR(bz2_decompress__doc__,
2181 "decompress(data) -> decompressed data\n\
2182 \n\
2183 Decompress data in one shot. If you want to decompress data sequentially,\n\
2184 use an instance of BZ2Decompressor instead.\n\
2185 ");
2186
2187 static PyObject *
2188 bz2_decompress(PyObject *self, PyObject *args)
2189 {
2190         Py_buffer pdata;
2191         char *data;
2192         int datasize;
2193         int bufsize = SMALLCHUNK;
2194         PyObject *ret;
2195         bz_stream _bzs;
2196         bz_stream *bzs = &_bzs;
2197         int bzerror;
2198
2199         if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2200                 return NULL;
2201         data = pdata.buf;
2202         datasize = pdata.len;
2203
2204         if (datasize == 0) {
2205                 PyBuffer_Release(&pdata);
2206                 return PyString_FromString("");
2207         }
2208
2209         ret = PyString_FromStringAndSize(NULL, bufsize);
2210         if (!ret) {
2211                 PyBuffer_Release(&pdata);
2212                 return NULL;
2213         }
2214
2215         memset(bzs, 0, sizeof(bz_stream));
2216
2217         bzs->next_in = data;
2218         bzs->avail_in = datasize;
2219         bzs->next_out = BUF(ret);
2220         bzs->avail_out = bufsize;
2221
2222         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2223         if (bzerror != BZ_OK) {
2224                 Util_CatchBZ2Error(bzerror);
2225                 Py_DECREF(ret);
2226                 PyBuffer_Release(&pdata);
2227                 return NULL;
2228         }
2229
2230         for (;;) {
2231                 Py_BEGIN_ALLOW_THREADS
2232                 bzerror = BZ2_bzDecompress(bzs);
2233                 Py_END_ALLOW_THREADS
2234                 if (bzerror == BZ_STREAM_END) {
2235                         break;
2236                 } else if (bzerror != BZ_OK) {
2237                         BZ2_bzDecompressEnd(bzs);
2238                         Util_CatchBZ2Error(bzerror);
2239                         PyBuffer_Release(&pdata);
2240                         Py_DECREF(ret);
2241                         return NULL;
2242                 }
2243                 if (bzs->avail_in == 0) {
2244                         BZ2_bzDecompressEnd(bzs);
2245                         PyErr_SetString(PyExc_ValueError,
2246                                         "couldn't find end of stream");
2247                         PyBuffer_Release(&pdata);
2248                         Py_DECREF(ret);
2249                         return NULL;
2250                 }
2251                 if (bzs->avail_out == 0) {
2252                         bufsize = Util_NewBufferSize(bufsize);
2253                         if (_PyString_Resize(&ret, bufsize) < 0) {
2254                                 BZ2_bzDecompressEnd(bzs);
2255                                 PyBuffer_Release(&pdata);
2256                                 Py_DECREF(ret);
2257                                 return NULL;
2258                         }
2259                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2260                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2261                 }
2262         }
2263
2264         if (bzs->avail_out != 0)
2265                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2266         BZ2_bzDecompressEnd(bzs);
2267         PyBuffer_Release(&pdata);
2268
2269         return ret;
2270 }
2271
2272 static PyMethodDef bz2_methods[] = {
2273         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2274                 bz2_compress__doc__},
2275         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2276                 bz2_decompress__doc__},
2277         {NULL,          NULL}           /* sentinel */
2278 };
2279
2280 /* ===================================================================== */
2281 /* Initialization function. */
2282
2283 PyDoc_STRVAR(bz2__doc__,
2284 "The python bz2 module provides a comprehensive interface for\n\
2285 the bz2 compression library. It implements a complete file\n\
2286 interface, one shot (de)compression functions, and types for\n\
2287 sequential (de)compression.\n\
2288 ");
2289
2290 PyMODINIT_FUNC
2291 initbz2(void)
2292 {
2293         PyObject *m;
2294
2295         Py_TYPE(&BZ2File_Type) = &PyType_Type;
2296         Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2297         Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2298
2299         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2300         if (m == NULL)
2301                 return;
2302
2303         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2304
2305         Py_INCREF(&BZ2File_Type);
2306         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2307
2308         Py_INCREF(&BZ2Comp_Type);
2309         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2310
2311         Py_INCREF(&BZ2Decomp_Type);
2312         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2313 }