Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  (Py_TYPE(v) == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int bytes_read;
 239         int newlinetypes = f->f_newlinetypes;
 240         int skipnextlf = f->f_skipnextlf;
 241         int univ_newline = f->f_univ_newline;
 242
 243         total_v_size = n > 0 ? n : 100;
 244         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 245         if (v == NULL)
 246                 return NULL;
 247
 248         buf = BUF(v);
 249         end = buf + total_v_size;
 250
 251         for (;;) {
 252                 Py_BEGIN_ALLOW_THREADS
 253                 while (buf != end) {
 254                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                         f->pos++;
 256                         if (bytes_read == 0) break;
 257                         if (univ_newline) {
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with skipnextlf true means we
 262                                                  * saw a \r before.
 263                                                  */
 264                                                 newlinetypes |= NEWLINE_CRLF;
 265                                                 if (bzerror != BZ_OK) break;
 266                                                 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
 267                                                 f->pos++;
 268                                                 if (bytes_read == 0) break;
 269                                         } else {
 270                                                 newlinetypes |= NEWLINE_CR;
 271                                         }
 272                                 }
 273                                 if (c == '\r') {
 274                                         skipnextlf = 1;
 275                                         c = '\n';
 276                                 } else if (c == '\n')
 277                                         newlinetypes |= NEWLINE_LF;
 278                         }
 279                         *buf++ = c;
 280                         if (bzerror != BZ_OK || c == '\n') break;
 281                 }
 282                 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
 283                         newlinetypes |= NEWLINE_CR;
 284                 Py_END_ALLOW_THREADS
 285                 f->f_newlinetypes = newlinetypes;
 286                 f->f_skipnextlf = skipnextlf;
 287                 if (bzerror == BZ_STREAM_END) {
 288                         f->size = f->pos;
 289                         f->mode = MODE_READ_EOF;
 290                         break;
 291                 } else if (bzerror != BZ_OK) {
 292                         Util_CatchBZ2Error(bzerror);
 293                         Py_DECREF(v);
 294                         return NULL;
 295                 }
 296                 if (c == '\n')
 297                         break;
 298                 /* Must be because buf == end */
 299                 if (n > 0)
 300                         break;
 301                 used_v_size = total_v_size;
 302                 increment = total_v_size >> 2; /* mild exponential growth */
 303                 total_v_size += increment;
 304                 if (total_v_size > INT_MAX) {
 305                         PyErr_SetString(PyExc_OverflowError,
 306                             "line is longer than a Python string can hold");
 307                         Py_DECREF(v);
 308                         return NULL;
 309                 }
 310                 if (_PyString_Resize(&v, total_v_size) < 0)
 311                         return NULL;
 312                 buf = BUF(v) + used_v_size;
 313                 end = BUF(v) + total_v_size;
 314         }
 315
 316         used_v_size = buf - BUF(v);
 317         if (used_v_size != total_v_size)
 318                 _PyString_Resize(&v, used_v_size);
 319         return v;
 320 }
 321
 322 /* This is a hacked version of Python's
 323  * fileobject.c:Py_UniversalNewlineFread(). */
 324 size_t
 325 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 326                      char* buf, size_t n, BZ2FileObject *f)
 327 {
 328         char *dst = buf;
 329         int newlinetypes, skipnextlf;
 330
 331         assert(buf != NULL);
 332         assert(stream != NULL);
 333
 334         if (!f->f_univ_newline)
 335                 return BZ2_bzRead(bzerror, stream, buf, n);
 336
 337         newlinetypes = f->f_newlinetypes;
 338         skipnextlf = f->f_skipnextlf;
 339
 340         /* Invariant:  n is the number of bytes remaining to be filled
 341          * in the buffer.
 342          */
 343         while (n) {
 344                 size_t nread;
 345                 int shortread;
 346                 char *src = dst;
 347
 348                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 349                 assert(nread <= n);
 350                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 351                 shortread = n != 0;     /* true iff EOF or error */
 352                 while (nread--) {
 353                         char c = *src++;
 354                         if (c == '\r') {
 355                                 /* Save as LF and set flag to skip next LF. */
 356                                 *dst++ = '\n';
 357                                 skipnextlf = 1;
 358                         }
 359                         else if (skipnextlf && c == '\n') {
 360                                 /* Skip LF, and remember we saw CR LF. */
 361                                 skipnextlf = 0;
 362                                 newlinetypes |= NEWLINE_CRLF;
 363                                 ++n;
 364                         }
 365                         else {
 366                                 /* Normal char to be stored in buffer.  Also
 367                                  * update the newlinetypes flag if either this
 368                                  * is an LF or the previous char was a CR.
 369                                  */
 370                                 if (c == '\n')
 371                                         newlinetypes |= NEWLINE_LF;
 372                                 else if (skipnextlf)
 373                                         newlinetypes |= NEWLINE_CR;
 374                                 *dst++ = c;
 375                                 skipnextlf = 0;
 376                         }
 377                 }
 378                 if (shortread) {
 379                         /* If this is EOF, update type flags. */
 380                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 381                                 newlinetypes |= NEWLINE_CR;
 382                         break;
 383                 }
 384         }
 385         f->f_newlinetypes = newlinetypes;
 386         f->f_skipnextlf = skipnextlf;
 387         return dst - buf;
 388 }
 389
 390 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 391 static void
 392 Util_DropReadAhead(BZ2FileObject *f)
 393 {
 394         if (f->f_buf != NULL) {
 395                 PyMem_Free(f->f_buf);
 396                 f->f_buf = NULL;
 397         }
 398 }
 399
 400 /* This is a hacked version of Python's fileobject.c:readahead(). */
 401 static int
 402 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 403 {
 404         int chunksize;
 405         int bzerror;
 406
 407         if (f->f_buf != NULL) {
 408                 if((f->f_bufend - f->f_bufptr) >= 1)
 409                         return 0;
 410                 else
 411                         Util_DropReadAhead(f);
 412         }
 413         if (f->mode == MODE_READ_EOF) {
 414                 f->f_bufptr = f->f_buf;
 415                 f->f_bufend = f->f_buf;
 416                 return 0;
 417         }
 418         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 419                 return -1;
 420         }
 421         Py_BEGIN_ALLOW_THREADS
 422         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 423                                          bufsize, f);
 424         Py_END_ALLOW_THREADS
 425         f->pos += chunksize;
 426         if (bzerror == BZ_STREAM_END) {
 427                 f->size = f->pos;
 428                 f->mode = MODE_READ_EOF;
 429         } else if (bzerror != BZ_OK) {
 430                 Util_CatchBZ2Error(bzerror);
 431                 Util_DropReadAhead(f);
 432                 return -1;
 433         }
 434         f->f_bufptr = f->f_buf;
 435         f->f_bufend = f->f_buf + chunksize;
 436         return 0;
 437 }
 438
 439 /* This is a hacked version of Python's
 440  * fileobject.c:readahead_get_line_skip(). */
 441 static PyStringObject *
 442 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 443 {
 444         PyStringObject* s;
 445         char *bufptr;
 446         char *buf;
 447         int len;
 448
 449         if (f->f_buf == NULL)
 450                 if (Util_ReadAhead(f, bufsize) < 0)
 451                         return NULL;
 452
 453         len = f->f_bufend - f->f_bufptr;
 454         if (len == 0)
 455                 return (PyStringObject *)
 456                         PyString_FromStringAndSize(NULL, skip);
 457         bufptr = memchr(f->f_bufptr, '\n', len);
 458         if (bufptr != NULL) {
 459                 bufptr++;                       /* Count the '\n' */
 460                 len = bufptr - f->f_bufptr;
 461                 s = (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip+len);
 463                 if (s == NULL)
 464                         return NULL;
 465                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 466                 f->f_bufptr = bufptr;
 467                 if (bufptr == f->f_bufend)
 468                         Util_DropReadAhead(f);
 469         } else {
 470                 bufptr = f->f_bufptr;
 471                 buf = f->f_buf;
 472                 f->f_buf = NULL;        /* Force new readahead buffer */
 473                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 474                                               bufsize + (bufsize>>2));
 475                 if (s == NULL) {
 476                         PyMem_Free(buf);
 477                         return NULL;
 478                 }
 479                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 480                 PyMem_Free(buf);
 481         }
 482         return s;
 483 }
 484
 485 /* ===================================================================== */
 486 /* Methods of BZ2File. */
 487
 488 PyDoc_STRVAR(BZ2File_read__doc__,
 489 "read([size]) -> string\n\
 490 \n\
 491 Read at most size uncompressed bytes, returned as a string. If the size\n\
 492 argument is negative or omitted, read until EOF is reached.\n\
 493 ");
 494
 495 /* This is a hacked version of Python's fileobject.c:file_read(). */
 496 static PyObject *
 497 BZ2File_read(BZ2FileObject *self, PyObject *args)
 498 {
 499         long bytesrequested = -1;
 500         size_t bytesread, buffersize, chunksize;
 501         int bzerror;
 502         PyObject *ret = NULL;
 503
 504         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 505                 return NULL;
 506
 507         ACQUIRE_LOCK(self);
 508         switch (self->mode) {
 509                 case MODE_READ:
 510                         break;
 511                 case MODE_READ_EOF:
 512                         ret = PyString_FromString("");
 513                         goto cleanup;
 514                 case MODE_CLOSED:
 515                         PyErr_SetString(PyExc_ValueError,
 516                                         "I/O operation on closed file");
 517                         goto cleanup;
 518                 default:
 519                         PyErr_SetString(PyExc_IOError,
 520                                         "file is not ready for reading");
 521                         goto cleanup;
 522         }
 523
 524         if (bytesrequested < 0)
 525                 buffersize = Util_NewBufferSize((size_t)0);
 526         else
 527                 buffersize = bytesrequested;
 528         if (buffersize > INT_MAX) {
 529                 PyErr_SetString(PyExc_OverflowError,
 530                                 "requested number of bytes is "
 531                                 "more than a Python string can hold");
 532                 goto cleanup;
 533         }
 534         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 535         if (ret == NULL)
 536                 goto cleanup;
 537         bytesread = 0;
 538
 539         for (;;) {
 540                 Py_BEGIN_ALLOW_THREADS
 541                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 542                                                  BUF(ret)+bytesread,
 543                                                  buffersize-bytesread,
 544                                                  self);
 545                 self->pos += chunksize;
 546                 Py_END_ALLOW_THREADS
 547                 bytesread += chunksize;
 548                 if (bzerror == BZ_STREAM_END) {
 549                         self->size = self->pos;
 550                         self->mode = MODE_READ_EOF;
 551                         break;
 552                 } else if (bzerror != BZ_OK) {
 553                         Util_CatchBZ2Error(bzerror);
 554                         Py_DECREF(ret);
 555                         ret = NULL;
 556                         goto cleanup;
 557                 }
 558                 if (bytesrequested < 0) {
 559                         buffersize = Util_NewBufferSize(buffersize);
 560                         if (_PyString_Resize(&ret, buffersize) < 0)
 561                                 goto cleanup;
 562                 } else {
 563                         break;
 564                 }
 565         }
 566         if (bytesread != buffersize)
 567                 _PyString_Resize(&ret, bytesread);
 568
 569 cleanup:
 570         RELEASE_LOCK(self);
 571         return ret;
 572 }
 573
 574 PyDoc_STRVAR(BZ2File_readline__doc__,
 575 "readline([size]) -> string\n\
 576 \n\
 577 Return the next line from the file, as a string, retaining newline.\n\
 578 A non-negative size argument will limit the maximum number of bytes to\n\
 579 return (an incomplete line may be returned then). Return an empty\n\
 580 string at EOF.\n\
 581 ");
 582
 583 static PyObject *
 584 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 585 {
 586         PyObject *ret = NULL;
 587         int sizehint = -1;
 588
 589         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 590                 return NULL;
 591
 592         ACQUIRE_LOCK(self);
 593         switch (self->mode) {
 594                 case MODE_READ:
 595                         break;
 596                 case MODE_READ_EOF:
 597                         ret = PyString_FromString("");
 598                         goto cleanup;
 599                 case MODE_CLOSED:
 600                         PyErr_SetString(PyExc_ValueError,
 601                                         "I/O operation on closed file");
 602                         goto cleanup;
 603                 default:
 604                         PyErr_SetString(PyExc_IOError,
 605                                         "file is not ready for reading");
 606                         goto cleanup;
 607         }
 608
 609         if (sizehint == 0)
 610                 ret = PyString_FromString("");
 611         else
 612                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 613
 614 cleanup:
 615         RELEASE_LOCK(self);
 616         return ret;
 617 }
 618
 619 PyDoc_STRVAR(BZ2File_readlines__doc__,
 620 "readlines([size]) -> list\n\
 621 \n\
 622 Call readline() repeatedly and return a list of lines read.\n\
 623 The optional size argument, if given, is an approximate bound on the\n\
 624 total number of bytes in the lines returned.\n\
 625 ");
 626
 627 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 628 static PyObject *
 629 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 630 {
 631         long sizehint = 0;
 632         PyObject *list = NULL;
 633         PyObject *line;
 634         char small_buffer[SMALLCHUNK];
 635         char *buffer = small_buffer;
 636         size_t buffersize = SMALLCHUNK;
 637         PyObject *big_buffer = NULL;
 638         size_t nfilled = 0;
 639         size_t nread;
 640         size_t totalread = 0;
 641         char *p, *q, *end;
 642         int err;
 643         int shortread = 0;
 644         int bzerror;
 645
 646         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 647                 return NULL;
 648
 649         ACQUIRE_LOCK(self);
 650         switch (self->mode) {
 651                 case MODE_READ:
 652                         break;
 653                 case MODE_READ_EOF:
 654                         list = PyList_New(0);
 655                         goto cleanup;
 656                 case MODE_CLOSED:
 657                         PyErr_SetString(PyExc_ValueError,
 658                                         "I/O operation on closed file");
 659                         goto cleanup;
 660                 default:
 661                         PyErr_SetString(PyExc_IOError,
 662                                         "file is not ready for reading");
 663                         goto cleanup;
 664         }
 665
 666         if ((list = PyList_New(0)) == NULL)
 667                 goto cleanup;
 668
 669         for (;;) {
 670                 Py_BEGIN_ALLOW_THREADS
 671                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 672                                              buffer+nfilled,
 673                                              buffersize-nfilled, self);
 674                 self->pos += nread;
 675                 Py_END_ALLOW_THREADS
 676                 if (bzerror == BZ_STREAM_END) {
 677                         self->size = self->pos;
 678                         self->mode = MODE_READ_EOF;
 679                         if (nread == 0) {
 680                                 sizehint = 0;
 681                                 break;
 682                         }
 683                         shortread = 1;
 684                 } else if (bzerror != BZ_OK) {
 685                         Util_CatchBZ2Error(bzerror);
 686                   error:
 687                         Py_DECREF(list);
 688                         list = NULL;
 689                         goto cleanup;
 690                 }
 691                 totalread += nread;
 692                 p = memchr(buffer+nfilled, '\n', nread);
 693                 if (!shortread && p == NULL) {
 694                         /* Need a larger buffer to fit this line */
 695                         nfilled += nread;
 696                         buffersize *= 2;
 697                         if (buffersize > INT_MAX) {
 698                                 PyErr_SetString(PyExc_OverflowError,
 699                                 "line is longer than a Python string can hold");
 700                                 goto error;
 701                         }
 702                         if (big_buffer == NULL) {
 703                                 /* Create the big buffer */
 704                                 big_buffer = PyString_FromStringAndSize(
 705                                         NULL, buffersize);
 706                                 if (big_buffer == NULL)
 707                                         goto error;
 708                                 buffer = PyString_AS_STRING(big_buffer);
 709                                 memcpy(buffer, small_buffer, nfilled);
 710                         }
 711                         else {
 712                                 /* Grow the big buffer */
 713                                 _PyString_Resize(&big_buffer, buffersize);
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                         }
 716                         continue;
 717                 }
 718                 end = buffer+nfilled+nread;
 719                 q = buffer;
 720                 while (p != NULL) {
 721                         /* Process complete lines */
 722                         p++;
 723                         line = PyString_FromStringAndSize(q, p-q);
 724                         if (line == NULL)
 725                                 goto error;
 726                         err = PyList_Append(list, line);
 727                         Py_DECREF(line);
 728                         if (err != 0)
 729                                 goto error;
 730                         q = p;
 731                         p = memchr(q, '\n', end-q);
 732                 }
 733                 /* Move the remaining incomplete line to the start */
 734                 nfilled = end-q;
 735                 memmove(buffer, q, nfilled);
 736                 if (sizehint > 0)
 737                         if (totalread >= (size_t)sizehint)
 738                                 break;
 739                 if (shortread) {
 740                         sizehint = 0;
 741                         break;
 742                 }
 743         }
 744         if (nfilled != 0) {
 745                 /* Partial last line */
 746                 line = PyString_FromStringAndSize(buffer, nfilled);
 747                 if (line == NULL)
 748                         goto error;
 749                 if (sizehint > 0) {
 750                         /* Need to complete the last line */
 751                         PyObject *rest = Util_GetLine(self, 0);
 752                         if (rest == NULL) {
 753                                 Py_DECREF(line);
 754                                 goto error;
 755                         }
 756                         PyString_Concat(&line, rest);
 757                         Py_DECREF(rest);
 758                         if (line == NULL)
 759                                 goto error;
 760                 }
 761                 err = PyList_Append(list, line);
 762                 Py_DECREF(line);
 763                 if (err != 0)
 764                         goto error;
 765         }
 766
 767   cleanup:
 768         RELEASE_LOCK(self);
 769         if (big_buffer) {
 770                 Py_DECREF(big_buffer);
 771         }
 772         return list;
 773 }
 774
 775 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 776 "xreadlines() -> self\n\
 777 \n\
 778 For backward compatibility. BZ2File objects now include the performance\n\
 779 optimizations previously implemented in the xreadlines module.\n\
 780 ");
 781
 782 PyDoc_STRVAR(BZ2File_write__doc__,
 783 "write(data) -> None\n\
 784 \n\
 785 Write the 'data' string to file. Note that due to buffering, close() may\n\
 786 be needed before the file on disk reflects the data written.\n\
 787 ");
 788
 789 /* This is a hacked version of Python's fileobject.c:file_write(). */
 790 static PyObject *
 791 BZ2File_write(BZ2FileObject *self, PyObject *args)
 792 {
 793         PyObject *ret = NULL;
 794         char *buf;
 795         int len;
 796         int bzerror;
 797
 798         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 799                 return NULL;
 800
 801         ACQUIRE_LOCK(self);
 802         switch (self->mode) {
 803                 case MODE_WRITE:
 804                         break;
 805
 806                 case MODE_CLOSED:
 807                         PyErr_SetString(PyExc_ValueError,
 808                                         "I/O operation on closed file");
 809                         goto cleanup;
 810
 811                 default:
 812                         PyErr_SetString(PyExc_IOError,
 813                                         "file is not ready for writing");
 814                         goto cleanup;
 815         }
 816
 817         self->f_softspace = 0;
 818
 819         Py_BEGIN_ALLOW_THREADS
 820         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 821         self->pos += len;
 822         Py_END_ALLOW_THREADS
 823
 824         if (bzerror != BZ_OK) {
 825                 Util_CatchBZ2Error(bzerror);
 826                 goto cleanup;
 827         }
 828
 829         Py_INCREF(Py_None);
 830         ret = Py_None;
 831
 832 cleanup:
 833         RELEASE_LOCK(self);
 834         return ret;
 835 }
 836
 837 PyDoc_STRVAR(BZ2File_writelines__doc__,
 838 "writelines(sequence_of_strings) -> None\n\
 839 \n\
 840 Write the sequence of strings to the file. Note that newlines are not\n\
 841 added. The sequence can be any iterable object producing strings. This is\n\
 842 equivalent to calling write() for each string.\n\
 843 ");
 844
 845 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 846 static PyObject *
 847 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 848 {
 849 #define CHUNKSIZE 1000
 850         PyObject *list = NULL;
 851         PyObject *iter = NULL;
 852         PyObject *ret = NULL;
 853         PyObject *line;
 854         int i, j, index, len, islist;
 855         int bzerror;
 856
 857         ACQUIRE_LOCK(self);
 858         switch (self->mode) {
 859                 case MODE_WRITE:
 860                         break;
 861
 862                 case MODE_CLOSED:
 863                         PyErr_SetString(PyExc_ValueError,
 864                                         "I/O operation on closed file");
 865                         goto error;
 866
 867                 default:
 868                         PyErr_SetString(PyExc_IOError,
 869                                         "file is not ready for writing");
 870                         goto error;
 871         }
 872
 873         islist = PyList_Check(seq);
 874         if  (!islist) {
 875                 iter = PyObject_GetIter(seq);
 876                 if (iter == NULL) {
 877                         PyErr_SetString(PyExc_TypeError,
 878                                 "writelines() requires an iterable argument");
 879                         goto error;
 880                 }
 881                 list = PyList_New(CHUNKSIZE);
 882                 if (list == NULL)
 883                         goto error;
 884         }
 885
 886         /* Strategy: slurp CHUNKSIZE lines into a private list,
 887            checking that they are all strings, then write that list
 888            without holding the interpreter lock, then come back for more. */
 889         for (index = 0; ; index += CHUNKSIZE) {
 890                 if (islist) {
 891                         Py_XDECREF(list);
 892                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 893                         if (list == NULL)
 894                                 goto error;
 895                         j = PyList_GET_SIZE(list);
 896                 }
 897                 else {
 898                         for (j = 0; j < CHUNKSIZE; j++) {
 899                                 line = PyIter_Next(iter);
 900                                 if (line == NULL) {
 901                                         if (PyErr_Occurred())
 902                                                 goto error;
 903                                         break;
 904                                 }
 905                                 PyList_SetItem(list, j, line);
 906                         }
 907                 }
 908                 if (j == 0)
 909                         break;
 910
 911                 /* Check that all entries are indeed strings. If not,
 912                    apply the same rules as for file.write() and
 913                    convert the rets to strings. This is slow, but
 914                    seems to be the only way since all conversion APIs
 915                    could potentially execute Python code. */
 916                 for (i = 0; i < j; i++) {
 917                         PyObject *v = PyList_GET_ITEM(list, i);
 918                         if (!PyString_Check(v)) {
 919                                 const char *buffer;
 920                                 Py_ssize_t len;
 921                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 922                                         PyErr_SetString(PyExc_TypeError,
 923                                                         "writelines() "
 924                                                         "argument must be "
 925                                                         "a sequence of "
 926                                                         "strings");
 927                                         goto error;
 928                                 }
 929                                 line = PyString_FromStringAndSize(buffer,
 930                                                                   len);
 931                                 if (line == NULL)
 932                                         goto error;
 933                                 Py_DECREF(v);
 934                                 PyList_SET_ITEM(list, i, line);
 935                         }
 936                 }
 937
 938                 self->f_softspace = 0;
 939
 940                 /* Since we are releasing the global lock, the
 941                    following code may *not* execute Python code. */
 942                 Py_BEGIN_ALLOW_THREADS
 943                 for (i = 0; i < j; i++) {
 944                         line = PyList_GET_ITEM(list, i);
 945                         len = PyString_GET_SIZE(line);
 946                         BZ2_bzWrite (&bzerror, self->fp,
 947                                      PyString_AS_STRING(line), len);
 948                         if (bzerror != BZ_OK) {
 949                                 Py_BLOCK_THREADS
 950                                 Util_CatchBZ2Error(bzerror);
 951                                 goto error;
 952                         }
 953                 }
 954                 Py_END_ALLOW_THREADS
 955
 956                 if (j < CHUNKSIZE)
 957                         break;
 958         }
 959
 960         Py_INCREF(Py_None);
 961         ret = Py_None;
 962
 963   error:
 964         RELEASE_LOCK(self);
 965         Py_XDECREF(list);
 966         Py_XDECREF(iter);
 967         return ret;
 968 #undef CHUNKSIZE
 969 }
 970
 971 PyDoc_STRVAR(BZ2File_seek__doc__,
 972 "seek(offset [, whence]) -> None\n\
 973 \n\
 974 Move to new file position. Argument offset is a byte count. Optional\n\
 975 argument whence defaults to 0 (offset from start of file, offset\n\
 976 should be >= 0); other values are 1 (move relative to current position,\n\
 977 positive or negative), and 2 (move relative to end of file, usually\n\
 978 negative, although many platforms allow seeking beyond the end of a file).\n\
 979 \n\
 980 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 981 the operation may be extremely slow.\n\
 982 ");
 983
 984 static PyObject *
 985 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 986 {
 987         int where = 0;
 988         PyObject *offobj;
 989         Py_off_t offset;
 990         char small_buffer[SMALLCHUNK];
 991         char *buffer = small_buffer;
 992         size_t buffersize = SMALLCHUNK;
 993         Py_off_t bytesread = 0;
 994         size_t readsize;
 995         int chunksize;
 996         int bzerror;
 997         PyObject *ret = NULL;
 998
 999         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1000                 return NULL;
1001 #if !defined(HAVE_LARGEFILE_SUPPORT)
1002         offset = PyInt_AsLong(offobj);
1003 #else
1004         offset = PyLong_Check(offobj) ?
1005                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1006 #endif
1007         if (PyErr_Occurred())
1008                 return NULL;
1009
1010         ACQUIRE_LOCK(self);
1011         Util_DropReadAhead(self);
1012         switch (self->mode) {
1013                 case MODE_READ:
1014                 case MODE_READ_EOF:
1015                         break;
1016
1017                 case MODE_CLOSED:
1018                         PyErr_SetString(PyExc_ValueError,
1019                                         "I/O operation on closed file");
1020                         goto cleanup;
1021
1022                 default:
1023                         PyErr_SetString(PyExc_IOError,
1024                                         "seek works only while reading");
1025                         goto cleanup;
1026         }
1027
1028         if (where == 2) {
1029                 if (self->size == -1) {
1030                         assert(self->mode != MODE_READ_EOF);
1031                         for (;;) {
1032                                 Py_BEGIN_ALLOW_THREADS
1033                                 chunksize = Util_UnivNewlineRead(
1034                                                 &bzerror, self->fp,
1035                                                 buffer, buffersize,
1036                                                 self);
1037                                 self->pos += chunksize;
1038                                 Py_END_ALLOW_THREADS
1039
1040                                 bytesread += chunksize;
1041                                 if (bzerror == BZ_STREAM_END) {
1042                                         break;
1043                                 } else if (bzerror != BZ_OK) {
1044                                         Util_CatchBZ2Error(bzerror);
1045                                         goto cleanup;
1046                                 }
1047                         }
1048                         self->mode = MODE_READ_EOF;
1049                         self->size = self->pos;
1050                         bytesread = 0;
1051                 }
1052                 offset = self->size + offset;
1053         } else if (where == 1) {
1054                 offset = self->pos + offset;
1055         }
1056
1057         /* Before getting here, offset must be the absolute position the file
1058          * pointer should be set to. */
1059
1060         if (offset >= self->pos) {
1061                 /* we can move forward */
1062                 offset -= self->pos;
1063         } else {
1064                 /* we cannot move back, so rewind the stream */
1065                 BZ2_bzReadClose(&bzerror, self->fp);
1066                 if (bzerror != BZ_OK) {
1067                         Util_CatchBZ2Error(bzerror);
1068                         goto cleanup;
1069                 }
1070                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1071                 if (!ret)
1072                         goto cleanup;
1073                 Py_DECREF(ret);
1074                 ret = NULL;
1075                 self->pos = 0;
1076                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1077                                           0, 0, NULL, 0);
1078                 if (bzerror != BZ_OK) {
1079                         Util_CatchBZ2Error(bzerror);
1080                         goto cleanup;
1081                 }
1082                 self->mode = MODE_READ;
1083         }
1084
1085         if (offset <= 0 || self->mode == MODE_READ_EOF)
1086                 goto exit;
1087
1088         /* Before getting here, offset must be set to the number of bytes
1089          * to walk forward. */
1090         for (;;) {
1091                 if (offset-bytesread > buffersize)
1092                         readsize = buffersize;
1093                 else
1094                         /* offset might be wider that readsize, but the result
1095                          * of the subtraction is bound by buffersize (see the
1096                          * condition above). buffersize is 8192. */
1097                         readsize = (size_t)(offset-bytesread);
1098                 Py_BEGIN_ALLOW_THREADS
1099                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1100                                                  buffer, readsize, self);
1101                 self->pos += chunksize;
1102                 Py_END_ALLOW_THREADS
1103                 bytesread += chunksize;
1104                 if (bzerror == BZ_STREAM_END) {
1105                         self->size = self->pos;
1106                         self->mode = MODE_READ_EOF;
1107                         break;
1108                 } else if (bzerror != BZ_OK) {
1109                         Util_CatchBZ2Error(bzerror);
1110                         goto cleanup;
1111                 }
1112                 if (bytesread == offset)
1113                         break;
1114         }
1115
1116 exit:
1117         Py_INCREF(Py_None);
1118         ret = Py_None;
1119
1120 cleanup:
1121         RELEASE_LOCK(self);
1122         return ret;
1123 }
1124
1125 PyDoc_STRVAR(BZ2File_tell__doc__,
1126 "tell() -> int\n\
1127 \n\
1128 Return the current file position, an integer (may be a long integer).\n\
1129 ");
1130
1131 static PyObject *
1132 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1133 {
1134         PyObject *ret = NULL;
1135
1136         if (self->mode == MODE_CLOSED) {
1137                 PyErr_SetString(PyExc_ValueError,
1138                                 "I/O operation on closed file");
1139                 goto cleanup;
1140         }
1141
1142 #if !defined(HAVE_LARGEFILE_SUPPORT)
1143         ret = PyInt_FromLong(self->pos);
1144 #else
1145         ret = PyLong_FromLongLong(self->pos);
1146 #endif
1147
1148 cleanup:
1149         return ret;
1150 }
1151
1152 PyDoc_STRVAR(BZ2File_close__doc__,
1153 "close() -> None or (perhaps) an integer\n\
1154 \n\
1155 Close the file. Sets data attribute .closed to true. A closed file\n\
1156 cannot be used for further I/O operations. close() may be called more\n\
1157 than once without error.\n\
1158 ");
1159
1160 static PyObject *
1161 BZ2File_close(BZ2FileObject *self)
1162 {
1163         PyObject *ret = NULL;
1164         int bzerror = BZ_OK;
1165
1166         ACQUIRE_LOCK(self);
1167         switch (self->mode) {
1168                 case MODE_READ:
1169                 case MODE_READ_EOF:
1170                         BZ2_bzReadClose(&bzerror, self->fp);
1171                         break;
1172                 case MODE_WRITE:
1173                         BZ2_bzWriteClose(&bzerror, self->fp,
1174                                          0, NULL, NULL);
1175                         break;
1176         }
1177         self->mode = MODE_CLOSED;
1178         ret = PyObject_CallMethod(self->file, "close", NULL);
1179         if (bzerror != BZ_OK) {
1180                 Util_CatchBZ2Error(bzerror);
1181                 Py_XDECREF(ret);
1182                 ret = NULL;
1183         }
1184
1185         RELEASE_LOCK(self);
1186         return ret;
1187 }
1188
1189 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1190
1191 static PyMethodDef BZ2File_methods[] = {
1192         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1193         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1194         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1195         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1196         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1197         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1198         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1199         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1200         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1201         {NULL,          NULL}           /* sentinel */
1202 };
1203
1204
1205 /* ===================================================================== */
1206 /* Getters and setters of BZ2File. */
1207
1208 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1209 static PyObject *
1210 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1211 {
1212         switch (self->f_newlinetypes) {
1213         case NEWLINE_UNKNOWN:
1214                 Py_INCREF(Py_None);
1215                 return Py_None;
1216         case NEWLINE_CR:
1217                 return PyString_FromString("\r");
1218         case NEWLINE_LF:
1219                 return PyString_FromString("\n");
1220         case NEWLINE_CR|NEWLINE_LF:
1221                 return Py_BuildValue("(ss)", "\r", "\n");
1222         case NEWLINE_CRLF:
1223                 return PyString_FromString("\r\n");
1224         case NEWLINE_CR|NEWLINE_CRLF:
1225                 return Py_BuildValue("(ss)", "\r", "\r\n");
1226         case NEWLINE_LF|NEWLINE_CRLF:
1227                 return Py_BuildValue("(ss)", "\n", "\r\n");
1228         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1229                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1230         default:
1231                 PyErr_Format(PyExc_SystemError,
1232                              "Unknown newlines value 0x%x\n",
1233                              self->f_newlinetypes);
1234                 return NULL;
1235         }
1236 }
1237
1238 static PyObject *
1239 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1240 {
1241         return PyInt_FromLong(self->mode == MODE_CLOSED);
1242 }
1243
1244 static PyObject *
1245 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1246 {
1247         return PyObject_GetAttrString(self->file, "mode");
1248 }
1249
1250 static PyObject *
1251 BZ2File_get_name(BZ2FileObject *self, void *closure)
1252 {
1253         return PyObject_GetAttrString(self->file, "name");
1254 }
1255
1256 static PyGetSetDef BZ2File_getset[] = {
1257         {"closed", (getter)BZ2File_get_closed, NULL,
1258                         "True if the file is closed"},
1259         {"newlines", (getter)BZ2File_get_newlines, NULL,
1260                         "end-of-line convention used in this file"},
1261         {"mode", (getter)BZ2File_get_mode, NULL,
1262                         "file mode ('r', 'w', or 'U')"},
1263         {"name", (getter)BZ2File_get_name, NULL,
1264                         "file name"},
1265         {NULL}  /* Sentinel */
1266 };
1267
1268
1269 /* ===================================================================== */
1270 /* Members of BZ2File_Type. */
1271
1272 #undef OFF
1273 #define OFF(x) offsetof(BZ2FileObject, x)
1274
1275 static PyMemberDef BZ2File_members[] = {
1276         {"softspace",   T_INT,          OFF(f_softspace), 0,
1277          "flag indicating that a space needs to be printed; used by print"},
1278         {NULL}  /* Sentinel */
1279 };
1280
1281 /* ===================================================================== */
1282 /* Slot definitions for BZ2File_Type. */
1283
1284 static int
1285 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1286 {
1287         static char *kwlist[] = {"filename", "mode", "buffering",
1288                                        "compresslevel", 0};
1289         PyObject *name;
1290         char *mode = "r";
1291         int buffering = -1;
1292         int compresslevel = 9;
1293         int bzerror;
1294         int mode_char = 0;
1295
1296         self->size = -1;
1297
1298         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1299                                          kwlist, &name, &mode, &buffering,
1300                                          &compresslevel))
1301                 return -1;
1302
1303         if (compresslevel < 1 || compresslevel > 9) {
1304                 PyErr_SetString(PyExc_ValueError,
1305                                 "compresslevel must be between 1 and 9");
1306                 return -1;
1307         }
1308
1309         for (;;) {
1310                 int error = 0;
1311                 switch (*mode) {
1312                         case 'r':
1313                         case 'w':
1314                                 if (mode_char)
1315                                         error = 1;
1316                                 mode_char = *mode;
1317                                 break;
1318
1319                         case 'b':
1320                                 break;
1321
1322                         case 'U':
1323 #ifdef __VMS
1324                                 self->f_univ_newline = 0;
1325 #else
1326                                 self->f_univ_newline = 1;
1327 #endif
1328                                 break;
1329
1330                         default:
1331                                 error = 1;
1332                                 break;
1333                 }
1334                 if (error) {
1335                         PyErr_Format(PyExc_ValueError,
1336                                      "invalid mode char %c", *mode);
1337                         return -1;
1338                 }
1339                 mode++;
1340                 if (*mode == '\0')
1341                         break;
1342         }
1343
1344         if (mode_char == 0) {
1345                 mode_char = 'r';
1346         }
1347
1348         mode = (mode_char == 'r') ? "rb" : "wb";
1349
1350         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1351                                            name, mode, buffering);
1352         if (self->file == NULL)
1353                 return -1;
1354
1355         /* From now on, we have stuff to dealloc, so jump to error label
1356          * instead of returning */
1357
1358 #ifdef WITH_THREAD
1359         self->lock = PyThread_allocate_lock();
1360         if (!self->lock) {
1361                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1362                 goto error;
1363         }
1364 #endif
1365
1366         if (mode_char == 'r')
1367                 self->fp = BZ2_bzReadOpen(&bzerror,
1368                                           PyFile_AsFile(self->file),
1369                                           0, 0, NULL, 0);
1370         else
1371                 self->fp = BZ2_bzWriteOpen(&bzerror,
1372                                            PyFile_AsFile(self->file),
1373                                            compresslevel, 0, 0);
1374
1375         if (bzerror != BZ_OK) {
1376                 Util_CatchBZ2Error(bzerror);
1377                 goto error;
1378         }
1379
1380         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1381
1382         return 0;
1383
1384 error:
1385         Py_CLEAR(self->file);
1386 #ifdef WITH_THREAD
1387         if (self->lock) {
1388                 PyThread_free_lock(self->lock);
1389                 self->lock = NULL;
1390         }
1391 #endif
1392         return -1;
1393 }
1394
1395 static void
1396 BZ2File_dealloc(BZ2FileObject *self)
1397 {
1398         int bzerror;
1399 #ifdef WITH_THREAD
1400         if (self->lock)
1401                 PyThread_free_lock(self->lock);
1402 #endif
1403         switch (self->mode) {
1404                 case MODE_READ:
1405                 case MODE_READ_EOF:
1406                         BZ2_bzReadClose(&bzerror, self->fp);
1407                         break;
1408                 case MODE_WRITE:
1409                         BZ2_bzWriteClose(&bzerror, self->fp,
1410                                          0, NULL, NULL);
1411                         break;
1412         }
1413         Util_DropReadAhead(self);
1414         Py_XDECREF(self->file);
1415         Py_TYPE(self)->tp_free((PyObject *)self);
1416 }
1417
1418 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1419 static PyObject *
1420 BZ2File_getiter(BZ2FileObject *self)
1421 {
1422         if (self->mode == MODE_CLOSED) {
1423                 PyErr_SetString(PyExc_ValueError,
1424                                 "I/O operation on closed file");
1425                 return NULL;
1426         }
1427         Py_INCREF((PyObject*)self);
1428         return (PyObject *)self;
1429 }
1430
1431 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1432 #define READAHEAD_BUFSIZE 8192
1433 static PyObject *
1434 BZ2File_iternext(BZ2FileObject *self)
1435 {
1436         PyStringObject* ret;
1437         ACQUIRE_LOCK(self);
1438         if (self->mode == MODE_CLOSED) {
1439                 PyErr_SetString(PyExc_ValueError,
1440                                 "I/O operation on closed file");
1441                 return NULL;
1442         }
1443         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1444         RELEASE_LOCK(self);
1445         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1446                 Py_XDECREF(ret);
1447                 return NULL;
1448         }
1449         return (PyObject *)ret;
1450 }
1451
1452 /* ===================================================================== */
1453 /* BZ2File_Type definition. */
1454
1455 PyDoc_VAR(BZ2File__doc__) =
1456 PyDoc_STR(
1457 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1458 \n\
1459 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1460 writing. When opened for writing, the file will be created if it doesn't\n\
1461 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1462 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1463 is given, must be a number between 1 and 9.\n\
1464 ")
1465 PyDoc_STR(
1466 "\n\
1467 Add a 'U' to mode to open the file for input with universal newline\n\
1468 support. Any line ending in the input file will be seen as a '\\n' in\n\
1469 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1470 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1471 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1472 newlines are available only when reading.\n\
1473 ")
1474 ;
1475
1476 static PyTypeObject BZ2File_Type = {
1477         PyVarObject_HEAD_INIT(NULL, 0)
1478         "bz2.BZ2File",          /*tp_name*/
1479         sizeof(BZ2FileObject),  /*tp_basicsize*/
1480         0,                      /*tp_itemsize*/
1481         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1482         0,                      /*tp_print*/
1483         0,                      /*tp_getattr*/
1484         0,                      /*tp_setattr*/
1485         0,                      /*tp_compare*/
1486         0,                      /*tp_repr*/
1487         0,                      /*tp_as_number*/
1488         0,                      /*tp_as_sequence*/
1489         0,                      /*tp_as_mapping*/
1490         0,                      /*tp_hash*/
1491         0,                      /*tp_call*/
1492         0,                      /*tp_str*/
1493         PyObject_GenericGetAttr,/*tp_getattro*/
1494         PyObject_GenericSetAttr,/*tp_setattro*/
1495         0,                      /*tp_as_buffer*/
1496         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1497         BZ2File__doc__,         /*tp_doc*/
1498         0,                      /*tp_traverse*/
1499         0,                      /*tp_clear*/
1500         0,                      /*tp_richcompare*/
1501         0,                      /*tp_weaklistoffset*/
1502         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1503         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1504         BZ2File_methods,        /*tp_methods*/
1505         BZ2File_members,        /*tp_members*/
1506         BZ2File_getset,         /*tp_getset*/
1507         0,                      /*tp_base*/
1508         0,                      /*tp_dict*/
1509         0,                      /*tp_descr_get*/
1510         0,                      /*tp_descr_set*/
1511         0,                      /*tp_dictoffset*/
1512         (initproc)BZ2File_init, /*tp_init*/
1513         PyType_GenericAlloc,    /*tp_alloc*/
1514         PyType_GenericNew,      /*tp_new*/
1515         _PyObject_Del,          /*tp_free*/
1516         0,                      /*tp_is_gc*/
1517 };
1518
1519
1520 /* ===================================================================== */
1521 /* Methods of BZ2Comp. */
1522
1523 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1524 "compress(data) -> string\n\
1525 \n\
1526 Provide more data to the compressor object. It will return chunks of\n\
1527 compressed data whenever possible. When you've finished providing data\n\
1528 to compress, call the flush() method to finish the compression process,\n\
1529 and return what is left in the internal buffers.\n\
1530 ");
1531
1532 static PyObject *
1533 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1534 {
1535         char *data;
1536         int datasize;
1537         int bufsize = SMALLCHUNK;
1538         PY_LONG_LONG totalout;
1539         PyObject *ret = NULL;
1540         bz_stream *bzs = &self->bzs;
1541         int bzerror;
1542
1543         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1544                 return NULL;
1545
1546         if (datasize == 0)
1547                 return PyString_FromString("");
1548
1549         ACQUIRE_LOCK(self);
1550         if (!self->running) {
1551                 PyErr_SetString(PyExc_ValueError,
1552                                 "this object was already flushed");
1553                 goto error;
1554         }
1555
1556         ret = PyString_FromStringAndSize(NULL, bufsize);
1557         if (!ret)
1558                 goto error;
1559
1560         bzs->next_in = data;
1561         bzs->avail_in = datasize;
1562         bzs->next_out = BUF(ret);
1563         bzs->avail_out = bufsize;
1564
1565         totalout = BZS_TOTAL_OUT(bzs);
1566
1567         for (;;) {
1568                 Py_BEGIN_ALLOW_THREADS
1569                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1570                 Py_END_ALLOW_THREADS
1571                 if (bzerror != BZ_RUN_OK) {
1572                         Util_CatchBZ2Error(bzerror);
1573                         goto error;
1574                 }
1575                 if (bzs->avail_in == 0)
1576                         break; /* no more input data */
1577                 if (bzs->avail_out == 0) {
1578                         bufsize = Util_NewBufferSize(bufsize);
1579                         if (_PyString_Resize(&ret, bufsize) < 0) {
1580                                 BZ2_bzCompressEnd(bzs);
1581                                 goto error;
1582                         }
1583                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1584                                                     - totalout);
1585                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1586                 }
1587         }
1588
1589         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1590
1591         RELEASE_LOCK(self);
1592         return ret;
1593
1594 error:
1595         RELEASE_LOCK(self);
1596         Py_XDECREF(ret);
1597         return NULL;
1598 }
1599
1600 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1601 "flush() -> string\n\
1602 \n\
1603 Finish the compression process and return what is left in internal buffers.\n\
1604 You must not use the compressor object after calling this method.\n\
1605 ");
1606
1607 static PyObject *
1608 BZ2Comp_flush(BZ2CompObject *self)
1609 {
1610         int bufsize = SMALLCHUNK;
1611         PyObject *ret = NULL;
1612         bz_stream *bzs = &self->bzs;
1613         PY_LONG_LONG totalout;
1614         int bzerror;
1615
1616         ACQUIRE_LOCK(self);
1617         if (!self->running) {
1618                 PyErr_SetString(PyExc_ValueError, "object was already "
1619                                                   "flushed");
1620                 goto error;
1621         }
1622         self->running = 0;
1623
1624         ret = PyString_FromStringAndSize(NULL, bufsize);
1625         if (!ret)
1626                 goto error;
1627
1628         bzs->next_out = BUF(ret);
1629         bzs->avail_out = bufsize;
1630
1631         totalout = BZS_TOTAL_OUT(bzs);
1632
1633         for (;;) {
1634                 Py_BEGIN_ALLOW_THREADS
1635                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1636                 Py_END_ALLOW_THREADS
1637                 if (bzerror == BZ_STREAM_END) {
1638                         break;
1639                 } else if (bzerror != BZ_FINISH_OK) {
1640                         Util_CatchBZ2Error(bzerror);
1641                         goto error;
1642                 }
1643                 if (bzs->avail_out == 0) {
1644                         bufsize = Util_NewBufferSize(bufsize);
1645                         if (_PyString_Resize(&ret, bufsize) < 0)
1646                                 goto error;
1647                         bzs->next_out = BUF(ret);
1648                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1649                                                     - totalout);
1650                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1651                 }
1652         }
1653
1654         if (bzs->avail_out != 0)
1655                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1656
1657         RELEASE_LOCK(self);
1658         return ret;
1659
1660 error:
1661         RELEASE_LOCK(self);
1662         Py_XDECREF(ret);
1663         return NULL;
1664 }
1665
1666 static PyMethodDef BZ2Comp_methods[] = {
1667         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1668          BZ2Comp_compress__doc__},
1669         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1670          BZ2Comp_flush__doc__},
1671         {NULL,          NULL}           /* sentinel */
1672 };
1673
1674
1675 /* ===================================================================== */
1676 /* Slot definitions for BZ2Comp_Type. */
1677
1678 static int
1679 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1680 {
1681         int compresslevel = 9;
1682         int bzerror;
1683         static char *kwlist[] = {"compresslevel", 0};
1684
1685         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1686                                          kwlist, &compresslevel))
1687                 return -1;
1688
1689         if (compresslevel < 1 || compresslevel > 9) {
1690                 PyErr_SetString(PyExc_ValueError,
1691                                 "compresslevel must be between 1 and 9");
1692                 goto error;
1693         }
1694
1695 #ifdef WITH_THREAD
1696         self->lock = PyThread_allocate_lock();
1697         if (!self->lock) {
1698                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1699                 goto error;
1700         }
1701 #endif
1702
1703         memset(&self->bzs, 0, sizeof(bz_stream));
1704         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1705         if (bzerror != BZ_OK) {
1706                 Util_CatchBZ2Error(bzerror);
1707                 goto error;
1708         }
1709
1710         self->running = 1;
1711
1712         return 0;
1713 error:
1714 #ifdef WITH_THREAD
1715         if (self->lock) {
1716                 PyThread_free_lock(self->lock);
1717                 self->lock = NULL;
1718         }
1719 #endif
1720         return -1;
1721 }
1722
1723 static void
1724 BZ2Comp_dealloc(BZ2CompObject *self)
1725 {
1726 #ifdef WITH_THREAD
1727         if (self->lock)
1728                 PyThread_free_lock(self->lock);
1729 #endif
1730         BZ2_bzCompressEnd(&self->bzs);
1731         Py_TYPE(self)->tp_free((PyObject *)self);
1732 }
1733
1734
1735 /* ===================================================================== */
1736 /* BZ2Comp_Type definition. */
1737
1738 PyDoc_STRVAR(BZ2Comp__doc__,
1739 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1740 \n\
1741 Create a new compressor object. This object may be used to compress\n\
1742 data sequentially. If you want to compress data in one shot, use the\n\
1743 compress() function instead. The compresslevel parameter, if given,\n\
1744 must be a number between 1 and 9.\n\
1745 ");
1746
1747 static PyTypeObject BZ2Comp_Type = {
1748         PyVarObject_HEAD_INIT(NULL, 0)
1749         "bz2.BZ2Compressor",    /*tp_name*/
1750         sizeof(BZ2CompObject),  /*tp_basicsize*/
1751         0,                      /*tp_itemsize*/
1752         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1753         0,                      /*tp_print*/
1754         0,                      /*tp_getattr*/
1755         0,                      /*tp_setattr*/
1756         0,                      /*tp_compare*/
1757         0,                      /*tp_repr*/
1758         0,                      /*tp_as_number*/
1759         0,                      /*tp_as_sequence*/
1760         0,                      /*tp_as_mapping*/
1761         0,                      /*tp_hash*/
1762         0,                      /*tp_call*/
1763         0,                      /*tp_str*/
1764         PyObject_GenericGetAttr,/*tp_getattro*/
1765         PyObject_GenericSetAttr,/*tp_setattro*/
1766         0,                      /*tp_as_buffer*/
1767         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1768         BZ2Comp__doc__,         /*tp_doc*/
1769         0,                      /*tp_traverse*/
1770         0,                      /*tp_clear*/
1771         0,                      /*tp_richcompare*/
1772         0,                      /*tp_weaklistoffset*/
1773         0,                      /*tp_iter*/
1774         0,                      /*tp_iternext*/
1775         BZ2Comp_methods,        /*tp_methods*/
1776         0,                      /*tp_members*/
1777         0,                      /*tp_getset*/
1778         0,                      /*tp_base*/
1779         0,                      /*tp_dict*/
1780         0,                      /*tp_descr_get*/
1781         0,                      /*tp_descr_set*/
1782         0,                      /*tp_dictoffset*/
1783         (initproc)BZ2Comp_init, /*tp_init*/
1784         PyType_GenericAlloc,    /*tp_alloc*/
1785         PyType_GenericNew,      /*tp_new*/
1786         _PyObject_Del,          /*tp_free*/
1787         0,                      /*tp_is_gc*/
1788 };
1789
1790
1791 /* ===================================================================== */
1792 /* Members of BZ2Decomp. */
1793
1794 #undef OFF
1795 #define OFF(x) offsetof(BZ2DecompObject, x)
1796
1797 static PyMemberDef BZ2Decomp_members[] = {
1798         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1799         {NULL}  /* Sentinel */
1800 };
1801
1802
1803 /* ===================================================================== */
1804 /* Methods of BZ2Decomp. */
1805
1806 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1807 "decompress(data) -> string\n\
1808 \n\
1809 Provide more data to the decompressor object. It will return chunks\n\
1810 of decompressed data whenever possible. If you try to decompress data\n\
1811 after the end of stream is found, EOFError will be raised. If any data\n\
1812 was found after the end of stream, it'll be ignored and saved in\n\
1813 unused_data attribute.\n\
1814 ");
1815
1816 static PyObject *
1817 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1818 {
1819         char *data;
1820         int datasize;
1821         int bufsize = SMALLCHUNK;
1822         PY_LONG_LONG totalout;
1823         PyObject *ret = NULL;
1824         bz_stream *bzs = &self->bzs;
1825         int bzerror;
1826
1827         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1828                 return NULL;
1829
1830         ACQUIRE_LOCK(self);
1831         if (!self->running) {
1832                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1833                                                 "already found");
1834                 goto error;
1835         }
1836
1837         ret = PyString_FromStringAndSize(NULL, bufsize);
1838         if (!ret)
1839                 goto error;
1840
1841         bzs->next_in = data;
1842         bzs->avail_in = datasize;
1843         bzs->next_out = BUF(ret);
1844         bzs->avail_out = bufsize;
1845
1846         totalout = BZS_TOTAL_OUT(bzs);
1847
1848         for (;;) {
1849                 Py_BEGIN_ALLOW_THREADS
1850                 bzerror = BZ2_bzDecompress(bzs);
1851                 Py_END_ALLOW_THREADS
1852                 if (bzerror == BZ_STREAM_END) {
1853                         if (bzs->avail_in != 0) {
1854                                 Py_DECREF(self->unused_data);
1855                                 self->unused_data =
1856                                     PyString_FromStringAndSize(bzs->next_in,
1857                                                                bzs->avail_in);
1858                         }
1859                         self->running = 0;
1860                         break;
1861                 }
1862                 if (bzerror != BZ_OK) {
1863                         Util_CatchBZ2Error(bzerror);
1864                         goto error;
1865                 }
1866                 if (bzs->avail_in == 0)
1867                         break; /* no more input data */
1868                 if (bzs->avail_out == 0) {
1869                         bufsize = Util_NewBufferSize(bufsize);
1870                         if (_PyString_Resize(&ret, bufsize) < 0) {
1871                                 BZ2_bzDecompressEnd(bzs);
1872                                 goto error;
1873                         }
1874                         bzs->next_out = BUF(ret);
1875                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1876                                                     - totalout);
1877                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1878                 }
1879         }
1880
1881         if (bzs->avail_out != 0)
1882                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1883
1884         RELEASE_LOCK(self);
1885         return ret;
1886
1887 error:
1888         RELEASE_LOCK(self);
1889         Py_XDECREF(ret);
1890         return NULL;
1891 }
1892
1893 static PyMethodDef BZ2Decomp_methods[] = {
1894         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1895         {NULL,          NULL}           /* sentinel */
1896 };
1897
1898
1899 /* ===================================================================== */
1900 /* Slot definitions for BZ2Decomp_Type. */
1901
1902 static int
1903 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1904 {
1905         int bzerror;
1906
1907         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1908                 return -1;
1909
1910 #ifdef WITH_THREAD
1911         self->lock = PyThread_allocate_lock();
1912         if (!self->lock) {
1913                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1914                 goto error;
1915         }
1916 #endif
1917
1918         self->unused_data = PyString_FromString("");
1919         if (!self->unused_data)
1920                 goto error;
1921
1922         memset(&self->bzs, 0, sizeof(bz_stream));
1923         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1924         if (bzerror != BZ_OK) {
1925                 Util_CatchBZ2Error(bzerror);
1926                 goto error;
1927         }
1928
1929         self->running = 1;
1930
1931         return 0;
1932
1933 error:
1934 #ifdef WITH_THREAD
1935         if (self->lock) {
1936                 PyThread_free_lock(self->lock);
1937                 self->lock = NULL;
1938         }
1939 #endif
1940         Py_CLEAR(self->unused_data);
1941         return -1;
1942 }
1943
1944 static void
1945 BZ2Decomp_dealloc(BZ2DecompObject *self)
1946 {
1947 #ifdef WITH_THREAD
1948         if (self->lock)
1949                 PyThread_free_lock(self->lock);
1950 #endif
1951         Py_XDECREF(self->unused_data);
1952         BZ2_bzDecompressEnd(&self->bzs);
1953         Py_TYPE(self)->tp_free((PyObject *)self);
1954 }
1955
1956
1957 /* ===================================================================== */
1958 /* BZ2Decomp_Type definition. */
1959
1960 PyDoc_STRVAR(BZ2Decomp__doc__,
1961 "BZ2Decompressor() -> decompressor object\n\
1962 \n\
1963 Create a new decompressor object. This object may be used to decompress\n\
1964 data sequentially. If you want to decompress data in one shot, use the\n\
1965 decompress() function instead.\n\
1966 ");
1967
1968 static PyTypeObject BZ2Decomp_Type = {
1969         PyVarObject_HEAD_INIT(NULL, 0)
1970         "bz2.BZ2Decompressor",  /*tp_name*/
1971         sizeof(BZ2DecompObject), /*tp_basicsize*/
1972         0,                      /*tp_itemsize*/
1973         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1974         0,                      /*tp_print*/
1975         0,                      /*tp_getattr*/
1976         0,                      /*tp_setattr*/
1977         0,                      /*tp_compare*/
1978         0,                      /*tp_repr*/
1979         0,                      /*tp_as_number*/
1980         0,                      /*tp_as_sequence*/
1981         0,                      /*tp_as_mapping*/
1982         0,                      /*tp_hash*/
1983         0,                      /*tp_call*/
1984         0,                      /*tp_str*/
1985         PyObject_GenericGetAttr,/*tp_getattro*/
1986         PyObject_GenericSetAttr,/*tp_setattro*/
1987         0,                      /*tp_as_buffer*/
1988         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1989         BZ2Decomp__doc__,       /*tp_doc*/
1990         0,                      /*tp_traverse*/
1991         0,                      /*tp_clear*/
1992         0,                      /*tp_richcompare*/
1993         0,                      /*tp_weaklistoffset*/
1994         0,                      /*tp_iter*/
1995         0,                      /*tp_iternext*/
1996         BZ2Decomp_methods,      /*tp_methods*/
1997         BZ2Decomp_members,      /*tp_members*/
1998         0,                      /*tp_getset*/
1999         0,                      /*tp_base*/
2000         0,                      /*tp_dict*/
2001         0,                      /*tp_descr_get*/
2002         0,                      /*tp_descr_set*/
2003         0,                      /*tp_dictoffset*/
2004         (initproc)BZ2Decomp_init, /*tp_init*/
2005         PyType_GenericAlloc,    /*tp_alloc*/
2006         PyType_GenericNew,      /*tp_new*/
2007         _PyObject_Del,          /*tp_free*/
2008         0,                      /*tp_is_gc*/
2009 };
2010
2011
2012 /* ===================================================================== */
2013 /* Module functions. */
2014
2015 PyDoc_STRVAR(bz2_compress__doc__,
2016 "compress(data [, compresslevel=9]) -> string\n\
2017 \n\
2018 Compress data in one shot. If you want to compress data sequentially,\n\
2019 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2020 given, must be a number between 1 and 9.\n\
2021 ");
2022
2023 static PyObject *
2024 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2025 {
2026         int compresslevel=9;
2027         char *data;
2028         int datasize;
2029         int bufsize;
2030         PyObject *ret = NULL;
2031         bz_stream _bzs;
2032         bz_stream *bzs = &_bzs;
2033         int bzerror;
2034         static char *kwlist[] = {"data", "compresslevel", 0};
2035
2036         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2037                                          kwlist, &data, &datasize,
2038                                          &compresslevel))
2039                 return NULL;
2040
2041         if (compresslevel < 1 || compresslevel > 9) {
2042                 PyErr_SetString(PyExc_ValueError,
2043                                 "compresslevel must be between 1 and 9");
2044                 return NULL;
2045         }
2046
2047         /* Conforming to bz2 manual, this is large enough to fit compressed
2048          * data in one shot. We will check it later anyway. */
2049         bufsize = datasize + (datasize/100+1) + 600;
2050
2051         ret = PyString_FromStringAndSize(NULL, bufsize);
2052         if (!ret)
2053                 return NULL;
2054
2055         memset(bzs, 0, sizeof(bz_stream));
2056
2057         bzs->next_in = data;
2058         bzs->avail_in = datasize;
2059         bzs->next_out = BUF(ret);
2060         bzs->avail_out = bufsize;
2061
2062         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2063         if (bzerror != BZ_OK) {
2064                 Util_CatchBZ2Error(bzerror);
2065                 Py_DECREF(ret);
2066                 return NULL;
2067         }
2068
2069         for (;;) {
2070                 Py_BEGIN_ALLOW_THREADS
2071                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2072                 Py_END_ALLOW_THREADS
2073                 if (bzerror == BZ_STREAM_END) {
2074                         break;
2075                 } else if (bzerror != BZ_FINISH_OK) {
2076                         BZ2_bzCompressEnd(bzs);
2077                         Util_CatchBZ2Error(bzerror);
2078                         Py_DECREF(ret);
2079                         return NULL;
2080                 }
2081                 if (bzs->avail_out == 0) {
2082                         bufsize = Util_NewBufferSize(bufsize);
2083                         if (_PyString_Resize(&ret, bufsize) < 0) {
2084                                 BZ2_bzCompressEnd(bzs);
2085                                 Py_DECREF(ret);
2086                                 return NULL;
2087                         }
2088                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2089                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2090                 }
2091         }
2092
2093         if (bzs->avail_out != 0)
2094                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2095         BZ2_bzCompressEnd(bzs);
2096
2097         return ret;
2098 }
2099
2100 PyDoc_STRVAR(bz2_decompress__doc__,
2101 "decompress(data) -> decompressed data\n\
2102 \n\
2103 Decompress data in one shot. If you want to decompress data sequentially,\n\
2104 use an instance of BZ2Decompressor instead.\n\
2105 ");
2106
2107 static PyObject *
2108 bz2_decompress(PyObject *self, PyObject *args)
2109 {
2110         char *data;
2111         int datasize;
2112         int bufsize = SMALLCHUNK;
2113         PyObject *ret;
2114         bz_stream _bzs;
2115         bz_stream *bzs = &_bzs;
2116         int bzerror;
2117
2118         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2119                 return NULL;
2120
2121         if (datasize == 0)
2122                 return PyString_FromString("");
2123
2124         ret = PyString_FromStringAndSize(NULL, bufsize);
2125         if (!ret)
2126                 return NULL;
2127
2128         memset(bzs, 0, sizeof(bz_stream));
2129
2130         bzs->next_in = data;
2131         bzs->avail_in = datasize;
2132         bzs->next_out = BUF(ret);
2133         bzs->avail_out = bufsize;
2134
2135         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2136         if (bzerror != BZ_OK) {
2137                 Util_CatchBZ2Error(bzerror);
2138                 Py_DECREF(ret);
2139                 return NULL;
2140         }
2141
2142         for (;;) {
2143                 Py_BEGIN_ALLOW_THREADS
2144                 bzerror = BZ2_bzDecompress(bzs);
2145                 Py_END_ALLOW_THREADS
2146                 if (bzerror == BZ_STREAM_END) {
2147                         break;
2148                 } else if (bzerror != BZ_OK) {
2149                         BZ2_bzDecompressEnd(bzs);
2150                         Util_CatchBZ2Error(bzerror);
2151                         Py_DECREF(ret);
2152                         return NULL;
2153                 }
2154                 if (bzs->avail_in == 0) {
2155                         BZ2_bzDecompressEnd(bzs);
2156                         PyErr_SetString(PyExc_ValueError,
2157                                         "couldn't find end of stream");
2158                         Py_DECREF(ret);
2159                         return NULL;
2160                 }
2161                 if (bzs->avail_out == 0) {
2162                         bufsize = Util_NewBufferSize(bufsize);
2163                         if (_PyString_Resize(&ret, bufsize) < 0) {
2164                                 BZ2_bzDecompressEnd(bzs);
2165                                 Py_DECREF(ret);
2166                                 return NULL;
2167                         }
2168                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2169                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2170                 }
2171         }
2172
2173         if (bzs->avail_out != 0)
2174                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2175         BZ2_bzDecompressEnd(bzs);
2176
2177         return ret;
2178 }
2179
2180 static PyMethodDef bz2_methods[] = {
2181         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2182                 bz2_compress__doc__},
2183         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2184                 bz2_decompress__doc__},
2185         {NULL,          NULL}           /* sentinel */
2186 };
2187
2188 /* ===================================================================== */
2189 /* Initialization function. */
2190
2191 PyDoc_STRVAR(bz2__doc__,
2192 "The python bz2 module provides a comprehensive interface for\n\
2193 the bz2 compression library. It implements a complete file\n\
2194 interface, one shot (de)compression functions, and types for\n\
2195 sequential (de)compression.\n\
2196 ");
2197
2198 PyMODINIT_FUNC
2199 initbz2(void)
2200 {
2201         PyObject *m;
2202
2203         Py_TYPE(&BZ2File_Type) = &PyType_Type;
2204         Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2205         Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
2206
2207         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2208         if (m == NULL)
2209                 return;
2210
2211         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2212
2213         Py_INCREF(&BZ2File_Type);
2214         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2215
2216         Py_INCREF(&BZ2Comp_Type);
2217         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2218
2219         Py_INCREF(&BZ2Decomp_Type);
2220         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2221 }