Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int newlinetypes = f->f_newlinetypes;
 239         int skipnextlf = f->f_skipnextlf;
 240         int univ_newline = f->f_univ_newline;
 241
 242         total_v_size = n > 0 ? n : 100;
 243         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 244         if (v == NULL)
 245                 return NULL;
 246
 247         buf = BUF(v);
 248         end = buf + total_v_size;
 249
 250         for (;;) {
 251                 Py_BEGIN_ALLOW_THREADS
 252                 if (univ_newline) {
 253                         while (1) {
 254                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                                 f->pos++;
 256                                 if (bzerror != BZ_OK || buf == end)
 257                                         break;
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with
 262                                                  * skipnextlf true means we
 263                                                  * saw a \r before.
 264                                                  */
 265                                                 newlinetypes |= NEWLINE_CRLF;
 266                                                 BZ2_bzRead(&bzerror, f->fp,
 267                                                            &c, 1);
 268                                                 if (bzerror != BZ_OK)
 269                                                         break;
 270                                         } else {
 271                                                 newlinetypes |= NEWLINE_CR;
 272                                         }
 273                                 }
 274                                 if (c == '\r') {
 275                                         skipnextlf = 1;
 276                                         c = '\n';
 277                                 } else if ( c == '\n')
 278                                         newlinetypes |= NEWLINE_LF;
 279                                 *buf++ = c;
 280                                 if (c == '\n') break;
 281                         }
 282                         if (bzerror == BZ_STREAM_END && skipnextlf)
 283                                 newlinetypes |= NEWLINE_CR;
 284                 } else /* If not universal newlines use the normal loop */
 285                         do {
 286                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 287                                 f->pos++;
 288                                 *buf++ = c;
 289                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 290                 Py_END_ALLOW_THREADS
 291                 f->f_newlinetypes = newlinetypes;
 292                 f->f_skipnextlf = skipnextlf;
 293                 if (bzerror == BZ_STREAM_END) {
 294                         f->size = f->pos;
 295                         f->mode = MODE_READ_EOF;
 296                         break;
 297                 } else if (bzerror != BZ_OK) {
 298                         Util_CatchBZ2Error(bzerror);
 299                         Py_DECREF(v);
 300                         return NULL;
 301                 }
 302                 if (c == '\n')
 303                         break;
 304                 /* Must be because buf == end */
 305                 if (n > 0)
 306                         break;
 307                 used_v_size = total_v_size;
 308                 increment = total_v_size >> 2; /* mild exponential growth */
 309                 total_v_size += increment;
 310                 if (total_v_size > INT_MAX) {
 311                         PyErr_SetString(PyExc_OverflowError,
 312                             "line is longer than a Python string can hold");
 313                         Py_DECREF(v);
 314                         return NULL;
 315                 }
 316                 if (_PyString_Resize(&v, total_v_size) < 0)
 317                         return NULL;
 318                 buf = BUF(v) + used_v_size;
 319                 end = BUF(v) + total_v_size;
 320         }
 321
 322         used_v_size = buf - BUF(v);
 323         if (used_v_size != total_v_size)
 324                 _PyString_Resize(&v, used_v_size);
 325         return v;
 326 }
 327
 328 /* This is a hacked version of Python's
 329  * fileobject.c:Py_UniversalNewlineFread(). */
 330 size_t
 331 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 332                      char* buf, size_t n, BZ2FileObject *f)
 333 {
 334         char *dst = buf;
 335         int newlinetypes, skipnextlf;
 336
 337         assert(buf != NULL);
 338         assert(stream != NULL);
 339
 340         if (!f->f_univ_newline)
 341                 return BZ2_bzRead(bzerror, stream, buf, n);
 342
 343         newlinetypes = f->f_newlinetypes;
 344         skipnextlf = f->f_skipnextlf;
 345
 346         /* Invariant:  n is the number of bytes remaining to be filled
 347          * in the buffer.
 348          */
 349         while (n) {
 350                 size_t nread;
 351                 int shortread;
 352                 char *src = dst;
 353
 354                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 355                 assert(nread <= n);
 356                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 357                 shortread = n != 0;     /* true iff EOF or error */
 358                 while (nread--) {
 359                         char c = *src++;
 360                         if (c == '\r') {
 361                                 /* Save as LF and set flag to skip next LF. */
 362                                 *dst++ = '\n';
 363                                 skipnextlf = 1;
 364                         }
 365                         else if (skipnextlf && c == '\n') {
 366                                 /* Skip LF, and remember we saw CR LF. */
 367                                 skipnextlf = 0;
 368                                 newlinetypes |= NEWLINE_CRLF;
 369                                 ++n;
 370                         }
 371                         else {
 372                                 /* Normal char to be stored in buffer.  Also
 373                                  * update the newlinetypes flag if either this
 374                                  * is an LF or the previous char was a CR.
 375                                  */
 376                                 if (c == '\n')
 377                                         newlinetypes |= NEWLINE_LF;
 378                                 else if (skipnextlf)
 379                                         newlinetypes |= NEWLINE_CR;
 380                                 *dst++ = c;
 381                                 skipnextlf = 0;
 382                         }
 383                 }
 384                 if (shortread) {
 385                         /* If this is EOF, update type flags. */
 386                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 387                                 newlinetypes |= NEWLINE_CR;
 388                         break;
 389                 }
 390         }
 391         f->f_newlinetypes = newlinetypes;
 392         f->f_skipnextlf = skipnextlf;
 393         return dst - buf;
 394 }
 395
 396 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 397 static void
 398 Util_DropReadAhead(BZ2FileObject *f)
 399 {
 400         if (f->f_buf != NULL) {
 401                 PyMem_Free(f->f_buf);
 402                 f->f_buf = NULL;
 403         }
 404 }
 405
 406 /* This is a hacked version of Python's fileobject.c:readahead(). */
 407 static int
 408 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 409 {
 410         int chunksize;
 411         int bzerror;
 412
 413         if (f->f_buf != NULL) {
 414                 if((f->f_bufend - f->f_bufptr) >= 1)
 415                         return 0;
 416                 else
 417                         Util_DropReadAhead(f);
 418         }
 419         if (f->mode == MODE_READ_EOF) {
 420                 f->f_bufptr = f->f_buf;
 421                 f->f_bufend = f->f_buf;
 422                 return 0;
 423         }
 424         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 425                 return -1;
 426         }
 427         Py_BEGIN_ALLOW_THREADS
 428         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 429                                          bufsize, f);
 430         Py_END_ALLOW_THREADS
 431         f->pos += chunksize;
 432         if (bzerror == BZ_STREAM_END) {
 433                 f->size = f->pos;
 434                 f->mode = MODE_READ_EOF;
 435         } else if (bzerror != BZ_OK) {
 436                 Util_CatchBZ2Error(bzerror);
 437                 Util_DropReadAhead(f);
 438                 return -1;
 439         }
 440         f->f_bufptr = f->f_buf;
 441         f->f_bufend = f->f_buf + chunksize;
 442         return 0;
 443 }
 444
 445 /* This is a hacked version of Python's
 446  * fileobject.c:readahead_get_line_skip(). */
 447 static PyStringObject *
 448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 449 {
 450         PyStringObject* s;
 451         char *bufptr;
 452         char *buf;
 453         int len;
 454
 455         if (f->f_buf == NULL)
 456                 if (Util_ReadAhead(f, bufsize) < 0)
 457                         return NULL;
 458
 459         len = f->f_bufend - f->f_bufptr;
 460         if (len == 0)
 461                 return (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip);
 463         bufptr = memchr(f->f_bufptr, '\n', len);
 464         if (bufptr != NULL) {
 465                 bufptr++;                       /* Count the '\n' */
 466                 len = bufptr - f->f_bufptr;
 467                 s = (PyStringObject *)
 468                         PyString_FromStringAndSize(NULL, skip+len);
 469                 if (s == NULL)
 470                         return NULL;
 471                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 472                 f->f_bufptr = bufptr;
 473                 if (bufptr == f->f_bufend)
 474                         Util_DropReadAhead(f);
 475         } else {
 476                 bufptr = f->f_bufptr;
 477                 buf = f->f_buf;
 478                 f->f_buf = NULL;        /* Force new readahead buffer */
 479                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 480                                               bufsize + (bufsize>>2));
 481                 if (s == NULL) {
 482                         PyMem_Free(buf);
 483                         return NULL;
 484                 }
 485                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 486                 PyMem_Free(buf);
 487         }
 488         return s;
 489 }
 490
 491 /* ===================================================================== */
 492 /* Methods of BZ2File. */
 493
 494 PyDoc_STRVAR(BZ2File_read__doc__,
 495 "read([size]) -> string\n\
 496 \n\
 497 Read at most size uncompressed bytes, returned as a string. If the size\n\
 498 argument is negative or omitted, read until EOF is reached.\n\
 499 ");
 500
 501 /* This is a hacked version of Python's fileobject.c:file_read(). */
 502 static PyObject *
 503 BZ2File_read(BZ2FileObject *self, PyObject *args)
 504 {
 505         long bytesrequested = -1;
 506         size_t bytesread, buffersize, chunksize;
 507         int bzerror;
 508         PyObject *ret = NULL;
 509
 510         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 511                 return NULL;
 512
 513         ACQUIRE_LOCK(self);
 514         switch (self->mode) {
 515                 case MODE_READ:
 516                         break;
 517                 case MODE_READ_EOF:
 518                         ret = PyString_FromString("");
 519                         goto cleanup;
 520                 case MODE_CLOSED:
 521                         PyErr_SetString(PyExc_ValueError,
 522                                         "I/O operation on closed file");
 523                         goto cleanup;
 524                 default:
 525                         PyErr_SetString(PyExc_IOError,
 526                                         "file is not ready for reading");
 527                         goto cleanup;
 528         }
 529
 530         if (bytesrequested < 0)
 531                 buffersize = Util_NewBufferSize((size_t)0);
 532         else
 533                 buffersize = bytesrequested;
 534         if (buffersize > INT_MAX) {
 535                 PyErr_SetString(PyExc_OverflowError,
 536                                 "requested number of bytes is "
 537                                 "more than a Python string can hold");
 538                 goto cleanup;
 539         }
 540         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 541         if (ret == NULL)
 542                 goto cleanup;
 543         bytesread = 0;
 544
 545         for (;;) {
 546                 Py_BEGIN_ALLOW_THREADS
 547                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 548                                                  BUF(ret)+bytesread,
 549                                                  buffersize-bytesread,
 550                                                  self);
 551                 self->pos += chunksize;
 552                 Py_END_ALLOW_THREADS
 553                 bytesread += chunksize;
 554                 if (bzerror == BZ_STREAM_END) {
 555                         self->size = self->pos;
 556                         self->mode = MODE_READ_EOF;
 557                         break;
 558                 } else if (bzerror != BZ_OK) {
 559                         Util_CatchBZ2Error(bzerror);
 560                         Py_DECREF(ret);
 561                         ret = NULL;
 562                         goto cleanup;
 563                 }
 564                 if (bytesrequested < 0) {
 565                         buffersize = Util_NewBufferSize(buffersize);
 566                         if (_PyString_Resize(&ret, buffersize) < 0)
 567                                 goto cleanup;
 568                 } else {
 569                         break;
 570                 }
 571         }
 572         if (bytesread != buffersize)
 573                 _PyString_Resize(&ret, bytesread);
 574
 575 cleanup:
 576         RELEASE_LOCK(self);
 577         return ret;
 578 }
 579
 580 PyDoc_STRVAR(BZ2File_readline__doc__,
 581 "readline([size]) -> string\n\
 582 \n\
 583 Return the next line from the file, as a string, retaining newline.\n\
 584 A non-negative size argument will limit the maximum number of bytes to\n\
 585 return (an incomplete line may be returned then). Return an empty\n\
 586 string at EOF.\n\
 587 ");
 588
 589 static PyObject *
 590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 591 {
 592         PyObject *ret = NULL;
 593         int sizehint = -1;
 594
 595         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 596                 return NULL;
 597
 598         ACQUIRE_LOCK(self);
 599         switch (self->mode) {
 600                 case MODE_READ:
 601                         break;
 602                 case MODE_READ_EOF:
 603                         ret = PyString_FromString("");
 604                         goto cleanup;
 605                 case MODE_CLOSED:
 606                         PyErr_SetString(PyExc_ValueError,
 607                                         "I/O operation on closed file");
 608                         goto cleanup;
 609                 default:
 610                         PyErr_SetString(PyExc_IOError,
 611                                         "file is not ready for reading");
 612                         goto cleanup;
 613         }
 614
 615         if (sizehint == 0)
 616                 ret = PyString_FromString("");
 617         else
 618                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 619
 620 cleanup:
 621         RELEASE_LOCK(self);
 622         return ret;
 623 }
 624
 625 PyDoc_STRVAR(BZ2File_readlines__doc__,
 626 "readlines([size]) -> list\n\
 627 \n\
 628 Call readline() repeatedly and return a list of lines read.\n\
 629 The optional size argument, if given, is an approximate bound on the\n\
 630 total number of bytes in the lines returned.\n\
 631 ");
 632
 633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 634 static PyObject *
 635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 636 {
 637         long sizehint = 0;
 638         PyObject *list = NULL;
 639         PyObject *line;
 640         char small_buffer[SMALLCHUNK];
 641         char *buffer = small_buffer;
 642         size_t buffersize = SMALLCHUNK;
 643         PyObject *big_buffer = NULL;
 644         size_t nfilled = 0;
 645         size_t nread;
 646         size_t totalread = 0;
 647         char *p, *q, *end;
 648         int err;
 649         int shortread = 0;
 650         int bzerror;
 651
 652         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 653                 return NULL;
 654
 655         ACQUIRE_LOCK(self);
 656         switch (self->mode) {
 657                 case MODE_READ:
 658                         break;
 659                 case MODE_READ_EOF:
 660                         list = PyList_New(0);
 661                         goto cleanup;
 662                 case MODE_CLOSED:
 663                         PyErr_SetString(PyExc_ValueError,
 664                                         "I/O operation on closed file");
 665                         goto cleanup;
 666                 default:
 667                         PyErr_SetString(PyExc_IOError,
 668                                         "file is not ready for reading");
 669                         goto cleanup;
 670         }
 671
 672         if ((list = PyList_New(0)) == NULL)
 673                 goto cleanup;
 674
 675         for (;;) {
 676                 Py_BEGIN_ALLOW_THREADS
 677                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 678                                              buffer+nfilled,
 679                                              buffersize-nfilled, self);
 680                 self->pos += nread;
 681                 Py_END_ALLOW_THREADS
 682                 if (bzerror == BZ_STREAM_END) {
 683                         self->size = self->pos;
 684                         self->mode = MODE_READ_EOF;
 685                         if (nread == 0) {
 686                                 sizehint = 0;
 687                                 break;
 688                         }
 689                         shortread = 1;
 690                 } else if (bzerror != BZ_OK) {
 691                         Util_CatchBZ2Error(bzerror);
 692                   error:
 693                         Py_DECREF(list);
 694                         list = NULL;
 695                         goto cleanup;
 696                 }
 697                 totalread += nread;
 698                 p = memchr(buffer+nfilled, '\n', nread);
 699                 if (!shortread && p == NULL) {
 700                         /* Need a larger buffer to fit this line */
 701                         nfilled += nread;
 702                         buffersize *= 2;
 703                         if (buffersize > INT_MAX) {
 704                                 PyErr_SetString(PyExc_OverflowError,
 705                                 "line is longer than a Python string can hold");
 706                                 goto error;
 707                         }
 708                         if (big_buffer == NULL) {
 709                                 /* Create the big buffer */
 710                                 big_buffer = PyString_FromStringAndSize(
 711                                         NULL, buffersize);
 712                                 if (big_buffer == NULL)
 713                                         goto error;
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                                 memcpy(buffer, small_buffer, nfilled);
 716                         }
 717                         else {
 718                                 /* Grow the big buffer */
 719                                 _PyString_Resize(&big_buffer, buffersize);
 720                                 buffer = PyString_AS_STRING(big_buffer);
 721                         }
 722                         continue;
 723                 }
 724                 end = buffer+nfilled+nread;
 725                 q = buffer;
 726                 while (p != NULL) {
 727                         /* Process complete lines */
 728                         p++;
 729                         line = PyString_FromStringAndSize(q, p-q);
 730                         if (line == NULL)
 731                                 goto error;
 732                         err = PyList_Append(list, line);
 733                         Py_DECREF(line);
 734                         if (err != 0)
 735                                 goto error;
 736                         q = p;
 737                         p = memchr(q, '\n', end-q);
 738                 }
 739                 /* Move the remaining incomplete line to the start */
 740                 nfilled = end-q;
 741                 memmove(buffer, q, nfilled);
 742                 if (sizehint > 0)
 743                         if (totalread >= (size_t)sizehint)
 744                                 break;
 745                 if (shortread) {
 746                         sizehint = 0;
 747                         break;
 748                 }
 749         }
 750         if (nfilled != 0) {
 751                 /* Partial last line */
 752                 line = PyString_FromStringAndSize(buffer, nfilled);
 753                 if (line == NULL)
 754                         goto error;
 755                 if (sizehint > 0) {
 756                         /* Need to complete the last line */
 757                         PyObject *rest = Util_GetLine(self, 0);
 758                         if (rest == NULL) {
 759                                 Py_DECREF(line);
 760                                 goto error;
 761                         }
 762                         PyString_Concat(&line, rest);
 763                         Py_DECREF(rest);
 764                         if (line == NULL)
 765                                 goto error;
 766                 }
 767                 err = PyList_Append(list, line);
 768                 Py_DECREF(line);
 769                 if (err != 0)
 770                         goto error;
 771         }
 772
 773   cleanup:
 774         RELEASE_LOCK(self);
 775         if (big_buffer) {
 776                 Py_DECREF(big_buffer);
 777         }
 778         return list;
 779 }
 780
 781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 782 "xreadlines() -> self\n\
 783 \n\
 784 For backward compatibility. BZ2File objects now include the performance\n\
 785 optimizations previously implemented in the xreadlines module.\n\
 786 ");
 787
 788 PyDoc_STRVAR(BZ2File_write__doc__,
 789 "write(data) -> None\n\
 790 \n\
 791 Write the 'data' string to file. Note that due to buffering, close() may\n\
 792 be needed before the file on disk reflects the data written.\n\
 793 ");
 794
 795 /* This is a hacked version of Python's fileobject.c:file_write(). */
 796 static PyObject *
 797 BZ2File_write(BZ2FileObject *self, PyObject *args)
 798 {
 799         PyObject *ret = NULL;
 800         char *buf;
 801         int len;
 802         int bzerror;
 803
 804         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 805                 return NULL;
 806
 807         ACQUIRE_LOCK(self);
 808         switch (self->mode) {
 809                 case MODE_WRITE:
 810                         break;
 811
 812                 case MODE_CLOSED:
 813                         PyErr_SetString(PyExc_ValueError,
 814                                         "I/O operation on closed file");
 815                         goto cleanup;;
 816
 817                 default:
 818                         PyErr_SetString(PyExc_IOError,
 819                                         "file is not ready for writing");
 820                         goto cleanup;;
 821         }
 822
 823         self->f_softspace = 0;
 824
 825         Py_BEGIN_ALLOW_THREADS
 826         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 827         self->pos += len;
 828         Py_END_ALLOW_THREADS
 829
 830         if (bzerror != BZ_OK) {
 831                 Util_CatchBZ2Error(bzerror);
 832                 goto cleanup;
 833         }
 834
 835         Py_INCREF(Py_None);
 836         ret = Py_None;
 837
 838 cleanup:
 839         RELEASE_LOCK(self);
 840         return ret;
 841 }
 842
 843 PyDoc_STRVAR(BZ2File_writelines__doc__,
 844 "writelines(sequence_of_strings) -> None\n\
 845 \n\
 846 Write the sequence of strings to the file. Note that newlines are not\n\
 847 added. The sequence can be any iterable object producing strings. This is\n\
 848 equivalent to calling write() for each string.\n\
 849 ");
 850
 851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 852 static PyObject *
 853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 854 {
 855 #define CHUNKSIZE 1000
 856         PyObject *list = NULL;
 857         PyObject *iter = NULL;
 858         PyObject *ret = NULL;
 859         PyObject *line;
 860         int i, j, index, len, islist;
 861         int bzerror;
 862
 863         ACQUIRE_LOCK(self);
 864         islist = PyList_Check(seq);
 865         if  (!islist) {
 866                 iter = PyObject_GetIter(seq);
 867                 if (iter == NULL) {
 868                         PyErr_SetString(PyExc_TypeError,
 869                                 "writelines() requires an iterable argument");
 870                         goto error;
 871                 }
 872                 list = PyList_New(CHUNKSIZE);
 873                 if (list == NULL)
 874                         goto error;
 875         }
 876
 877         /* Strategy: slurp CHUNKSIZE lines into a private list,
 878            checking that they are all strings, then write that list
 879            without holding the interpreter lock, then come back for more. */
 880         for (index = 0; ; index += CHUNKSIZE) {
 881                 if (islist) {
 882                         Py_XDECREF(list);
 883                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 884                         if (list == NULL)
 885                                 goto error;
 886                         j = PyList_GET_SIZE(list);
 887                 }
 888                 else {
 889                         for (j = 0; j < CHUNKSIZE; j++) {
 890                                 line = PyIter_Next(iter);
 891                                 if (line == NULL) {
 892                                         if (PyErr_Occurred())
 893                                                 goto error;
 894                                         break;
 895                                 }
 896                                 PyList_SetItem(list, j, line);
 897                         }
 898                 }
 899                 if (j == 0)
 900                         break;
 901
 902                 /* Check that all entries are indeed strings. If not,
 903                    apply the same rules as for file.write() and
 904                    convert the rets to strings. This is slow, but
 905                    seems to be the only way since all conversion APIs
 906                    could potentially execute Python code. */
 907                 for (i = 0; i < j; i++) {
 908                         PyObject *v = PyList_GET_ITEM(list, i);
 909                         if (!PyString_Check(v)) {
 910                                 const char *buffer;
 911                                 int len;
 912                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 913                                         PyErr_SetString(PyExc_TypeError,
 914                                                         "writelines() "
 915                                                         "argument must be "
 916                                                         "a sequence of "
 917                                                         "strings");
 918                                         goto error;
 919                                 }
 920                                 line = PyString_FromStringAndSize(buffer,
 921                                                                   len);
 922                                 if (line == NULL)
 923                                         goto error;
 924                                 Py_DECREF(v);
 925                                 PyList_SET_ITEM(list, i, line);
 926                         }
 927                 }
 928
 929                 self->f_softspace = 0;
 930
 931                 /* Since we are releasing the global lock, the
 932                    following code may *not* execute Python code. */
 933                 Py_BEGIN_ALLOW_THREADS
 934                 for (i = 0; i < j; i++) {
 935                         line = PyList_GET_ITEM(list, i);
 936                         len = PyString_GET_SIZE(line);
 937                         BZ2_bzWrite (&bzerror, self->fp,
 938                                      PyString_AS_STRING(line), len);
 939                         if (bzerror != BZ_OK) {
 940                                 Py_BLOCK_THREADS
 941                                 Util_CatchBZ2Error(bzerror);
 942                                 goto error;
 943                         }
 944                 }
 945                 Py_END_ALLOW_THREADS
 946
 947                 if (j < CHUNKSIZE)
 948                         break;
 949         }
 950
 951         Py_INCREF(Py_None);
 952         ret = Py_None;
 953
 954   error:
 955         RELEASE_LOCK(self);
 956         Py_XDECREF(list);
 957         Py_XDECREF(iter);
 958         return ret;
 959 #undef CHUNKSIZE
 960 }
 961
 962 PyDoc_STRVAR(BZ2File_seek__doc__,
 963 "seek(offset [, whence]) -> None\n\
 964 \n\
 965 Move to new file position. Argument offset is a byte count. Optional\n\
 966 argument whence defaults to 0 (offset from start of file, offset\n\
 967 should be >= 0); other values are 1 (move relative to current position,\n\
 968 positive or negative), and 2 (move relative to end of file, usually\n\
 969 negative, although many platforms allow seeking beyond the end of a file).\n\
 970 \n\
 971 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 972 the operation may be extremely slow.\n\
 973 ");
 974
 975 static PyObject *
 976 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 977 {
 978         int where = 0;
 979         PyObject *offobj;
 980         Py_off_t offset;
 981         char small_buffer[SMALLCHUNK];
 982         char *buffer = small_buffer;
 983         size_t buffersize = SMALLCHUNK;
 984         int bytesread = 0;
 985         size_t readsize;
 986         int chunksize;
 987         int bzerror;
 988         int rewind = 0;
 989         PyObject *ret = NULL;
 990
 991         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
 992                 return NULL;
 993 #if !defined(HAVE_LARGEFILE_SUPPORT)
 994         offset = PyInt_AsLong(offobj);
 995 #else
 996         offset = PyLong_Check(offobj) ?
 997                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
 998 #endif
 999         if (PyErr_Occurred())
1000                 return NULL;
1001
1002         ACQUIRE_LOCK(self);
1003         Util_DropReadAhead(self);
1004         switch (self->mode) {
1005                 case MODE_READ:
1006                 case MODE_READ_EOF:
1007                         break;
1008
1009                 case MODE_CLOSED:
1010                         PyErr_SetString(PyExc_ValueError,
1011                                         "I/O operation on closed file");
1012                         goto cleanup;;
1013
1014                 default:
1015                         PyErr_SetString(PyExc_IOError,
1016                                         "seek works only while reading");
1017                         goto cleanup;;
1018         }
1019
1020         if (offset < 0) {
1021                 if (where == 1) {
1022                         offset = self->pos + offset;
1023                         rewind = 1;
1024                 } else if (where == 2) {
1025                         if (self->size == -1) {
1026                                 assert(self->mode != MODE_READ_EOF);
1027                                 for (;;) {
1028                                         Py_BEGIN_ALLOW_THREADS
1029                                         chunksize = Util_UnivNewlineRead(
1030                                                         &bzerror, self->fp,
1031                                                         buffer, buffersize,
1032                                                         self);
1033                                         self->pos += chunksize;
1034                                         Py_END_ALLOW_THREADS
1035
1036                                         bytesread += chunksize;
1037                                         if (bzerror == BZ_STREAM_END) {
1038                                                 break;
1039                                         } else if (bzerror != BZ_OK) {
1040                                                 Util_CatchBZ2Error(bzerror);
1041                                                 goto cleanup;
1042                                         }
1043                                 }
1044                                 self->mode = MODE_READ_EOF;
1045                                 self->size = self->pos;
1046                                 bytesread = 0;
1047                         }
1048                         offset = self->size + offset;
1049                         if (offset >= self->pos)
1050                                 offset -= self->pos;
1051                         else
1052                                 rewind = 1;
1053                 }
1054                 if (offset < 0)
1055                         offset = 0;
1056         } else if (where == 0) {
1057                 if (offset >= self->pos)
1058                         offset -= self->pos;
1059                 else
1060                         rewind = 1;
1061         }
1062
1063         if (rewind) {
1064                 BZ2_bzReadClose(&bzerror, self->fp);
1065                 if (bzerror != BZ_OK) {
1066                         Util_CatchBZ2Error(bzerror);
1067                         goto cleanup;
1068                 }
1069                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1070                 if (!ret)
1071                         goto cleanup;
1072                 Py_DECREF(ret);
1073                 ret = NULL;
1074                 self->pos = 0;
1075                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1076                                           0, 0, NULL, 0);
1077                 if (bzerror != BZ_OK) {
1078                         Util_CatchBZ2Error(bzerror);
1079                         goto cleanup;
1080                 }
1081                 self->mode = MODE_READ;
1082         } else if (self->mode == MODE_READ_EOF) {
1083                 goto exit;
1084         }
1085
1086         if (offset == 0)
1087                 goto exit;
1088
1089         /* Before getting here, offset must be set to the number of bytes
1090          * to walk forward. */
1091         for (;;) {
1092                 if (offset-bytesread > buffersize)
1093                         readsize = buffersize;
1094                 else
1095                         /* offset might be wider that readsize, but the result
1096                          * of the subtraction is bound by buffersize (see the
1097                          * condition above). buffersize is 8192. */
1098                         readsize = (size_t)(offset-bytesread);
1099                 Py_BEGIN_ALLOW_THREADS
1100                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1101                                                  buffer, readsize, self);
1102                 self->pos += chunksize;
1103                 Py_END_ALLOW_THREADS
1104                 bytesread += chunksize;
1105                 if (bzerror == BZ_STREAM_END) {
1106                         self->size = self->pos;
1107                         self->mode = MODE_READ_EOF;
1108                         break;
1109                 } else if (bzerror != BZ_OK) {
1110                         Util_CatchBZ2Error(bzerror);
1111                         goto cleanup;
1112                 }
1113                 if (bytesread == offset)
1114                         break;
1115         }
1116
1117 exit:
1118         Py_INCREF(Py_None);
1119         ret = Py_None;
1120
1121 cleanup:
1122         RELEASE_LOCK(self);
1123         return ret;
1124 }
1125
1126 PyDoc_STRVAR(BZ2File_tell__doc__,
1127 "tell() -> int\n\
1128 \n\
1129 Return the current file position, an integer (may be a long integer).\n\
1130 ");
1131
1132 static PyObject *
1133 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1134 {
1135         PyObject *ret = NULL;
1136
1137         if (self->mode == MODE_CLOSED) {
1138                 PyErr_SetString(PyExc_ValueError,
1139                                 "I/O operation on closed file");
1140                 goto cleanup;
1141         }
1142
1143 #if !defined(HAVE_LARGEFILE_SUPPORT)
1144         ret = PyInt_FromLong(self->pos);
1145 #else
1146         ret = PyLong_FromLongLong(self->pos);
1147 #endif
1148
1149 cleanup:
1150         return ret;
1151 }
1152
1153 PyDoc_STRVAR(BZ2File_close__doc__,
1154 "close() -> None or (perhaps) an integer\n\
1155 \n\
1156 Close the file. Sets data attribute .closed to true. A closed file\n\
1157 cannot be used for further I/O operations. close() may be called more\n\
1158 than once without error.\n\
1159 ");
1160
1161 static PyObject *
1162 BZ2File_close(BZ2FileObject *self)
1163 {
1164         PyObject *ret = NULL;
1165         int bzerror = BZ_OK;
1166
1167         ACQUIRE_LOCK(self);
1168         switch (self->mode) {
1169                 case MODE_READ:
1170                 case MODE_READ_EOF:
1171                         BZ2_bzReadClose(&bzerror, self->fp);
1172                         break;
1173                 case MODE_WRITE:
1174                         BZ2_bzWriteClose(&bzerror, self->fp,
1175                                          0, NULL, NULL);
1176                         break;
1177         }
1178         self->mode = MODE_CLOSED;
1179         ret = PyObject_CallMethod(self->file, "close", NULL);
1180         if (bzerror != BZ_OK) {
1181                 Util_CatchBZ2Error(bzerror);
1182                 Py_XDECREF(ret);
1183                 ret = NULL;
1184         }
1185
1186         RELEASE_LOCK(self);
1187         return ret;
1188 }
1189
1190 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1191
1192 static PyMethodDef BZ2File_methods[] = {
1193         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1194         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1195         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1196         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1197         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1198         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1199         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1200         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1201         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1202         {NULL,          NULL}           /* sentinel */
1203 };
1204
1205
1206 /* ===================================================================== */
1207 /* Getters and setters of BZ2File. */
1208
1209 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1210 static PyObject *
1211 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1212 {
1213         switch (self->f_newlinetypes) {
1214         case NEWLINE_UNKNOWN:
1215                 Py_INCREF(Py_None);
1216                 return Py_None;
1217         case NEWLINE_CR:
1218                 return PyString_FromString("\r");
1219         case NEWLINE_LF:
1220                 return PyString_FromString("\n");
1221         case NEWLINE_CR|NEWLINE_LF:
1222                 return Py_BuildValue("(ss)", "\r", "\n");
1223         case NEWLINE_CRLF:
1224                 return PyString_FromString("\r\n");
1225         case NEWLINE_CR|NEWLINE_CRLF:
1226                 return Py_BuildValue("(ss)", "\r", "\r\n");
1227         case NEWLINE_LF|NEWLINE_CRLF:
1228                 return Py_BuildValue("(ss)", "\n", "\r\n");
1229         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1230                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1231         default:
1232                 PyErr_Format(PyExc_SystemError,
1233                              "Unknown newlines value 0x%x\n",
1234                              self->f_newlinetypes);
1235                 return NULL;
1236         }
1237 }
1238
1239 static PyObject *
1240 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1241 {
1242         return PyInt_FromLong(self->mode == MODE_CLOSED);
1243 }
1244
1245 static PyObject *
1246 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1247 {
1248         return PyObject_GetAttrString(self->file, "mode");
1249 }
1250
1251 static PyObject *
1252 BZ2File_get_name(BZ2FileObject *self, void *closure)
1253 {
1254         return PyObject_GetAttrString(self->file, "name");
1255 }
1256
1257 static PyGetSetDef BZ2File_getset[] = {
1258         {"closed", (getter)BZ2File_get_closed, NULL,
1259                         "True if the file is closed"},
1260         {"newlines", (getter)BZ2File_get_newlines, NULL,
1261                         "end-of-line convention used in this file"},
1262         {"mode", (getter)BZ2File_get_mode, NULL,
1263                         "file mode ('r', 'w', or 'U')"},
1264         {"name", (getter)BZ2File_get_name, NULL,
1265                         "file name"},
1266         {NULL}  /* Sentinel */
1267 };
1268
1269
1270 /* ===================================================================== */
1271 /* Members of BZ2File_Type. */
1272
1273 #undef OFF
1274 #define OFF(x) offsetof(BZ2FileObject, x)
1275
1276 static PyMemberDef BZ2File_members[] = {
1277         {"softspace",   T_INT,          OFF(f_softspace), 0,
1278          "flag indicating that a space needs to be printed; used by print"},
1279         {NULL}  /* Sentinel */
1280 };
1281
1282 /* ===================================================================== */
1283 /* Slot definitions for BZ2File_Type. */
1284
1285 static int
1286 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1287 {
1288         static const char *kwlist[] = {"filename", "mode", "buffering",
1289                                        "compresslevel", 0};
1290         PyObject *name;
1291         char *mode = "r";
1292         int buffering = -1;
1293         int compresslevel = 9;
1294         int bzerror;
1295         int mode_char = 0;
1296
1297         self->size = -1;
1298
1299         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1300                                          kwlist, &name, &mode, &buffering,
1301                                          &compresslevel))
1302                 return -1;
1303
1304         if (compresslevel < 1 || compresslevel > 9) {
1305                 PyErr_SetString(PyExc_ValueError,
1306                                 "compresslevel must be between 1 and 9");
1307                 return -1;
1308         }
1309
1310         for (;;) {
1311                 int error = 0;
1312                 switch (*mode) {
1313                         case 'r':
1314                         case 'w':
1315                                 if (mode_char)
1316                                         error = 1;
1317                                 mode_char = *mode;
1318                                 break;
1319
1320                         case 'b':
1321                                 break;
1322
1323                         case 'U':
1324                                 self->f_univ_newline = 1;
1325                                 break;
1326
1327                         default:
1328                                 error = 1;
1329                                 break;
1330                 }
1331                 if (error) {
1332                         PyErr_Format(PyExc_ValueError,
1333                                      "invalid mode char %c", *mode);
1334                         return -1;
1335                 }
1336                 mode++;
1337                 if (*mode == '\0')
1338                         break;
1339         }
1340
1341         if (mode_char == 0) {
1342                 mode_char = 'r';
1343         }
1344
1345         mode = (mode_char == 'r') ? "rb" : "wb";
1346
1347         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1348                                            name, mode, buffering);
1349         if (self->file == NULL)
1350                 return -1;
1351
1352         /* From now on, we have stuff to dealloc, so jump to error label
1353          * instead of returning */
1354
1355 #ifdef WITH_THREAD
1356         self->lock = PyThread_allocate_lock();
1357         if (!self->lock)
1358                 goto error;
1359 #endif
1360
1361         if (mode_char == 'r')
1362                 self->fp = BZ2_bzReadOpen(&bzerror,
1363                                           PyFile_AsFile(self->file),
1364                                           0, 0, NULL, 0);
1365         else
1366                 self->fp = BZ2_bzWriteOpen(&bzerror,
1367                                            PyFile_AsFile(self->file),
1368                                            compresslevel, 0, 0);
1369
1370         if (bzerror != BZ_OK) {
1371                 Util_CatchBZ2Error(bzerror);
1372                 goto error;
1373         }
1374
1375         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1376
1377         return 0;
1378
1379 error:
1380         Py_DECREF(self->file);
1381 #ifdef WITH_THREAD
1382         if (self->lock)
1383                 PyThread_free_lock(self->lock);
1384 #endif
1385         return -1;
1386 }
1387
1388 static void
1389 BZ2File_dealloc(BZ2FileObject *self)
1390 {
1391         int bzerror;
1392 #ifdef WITH_THREAD
1393         if (self->lock)
1394                 PyThread_free_lock(self->lock);
1395 #endif
1396         switch (self->mode) {
1397                 case MODE_READ:
1398                 case MODE_READ_EOF:
1399                         BZ2_bzReadClose(&bzerror, self->fp);
1400                         break;
1401                 case MODE_WRITE:
1402                         BZ2_bzWriteClose(&bzerror, self->fp,
1403                                          0, NULL, NULL);
1404                         break;
1405         }
1406         Util_DropReadAhead(self);
1407         Py_XDECREF(self->file);
1408         self->ob_type->tp_free((PyObject *)self);
1409 }
1410
1411 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1412 static PyObject *
1413 BZ2File_getiter(BZ2FileObject *self)
1414 {
1415         if (self->mode == MODE_CLOSED) {
1416                 PyErr_SetString(PyExc_ValueError,
1417                                 "I/O operation on closed file");
1418                 return NULL;
1419         }
1420         Py_INCREF((PyObject*)self);
1421         return (PyObject *)self;
1422 }
1423
1424 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1425 #define READAHEAD_BUFSIZE 8192
1426 static PyObject *
1427 BZ2File_iternext(BZ2FileObject *self)
1428 {
1429         PyStringObject* ret;
1430         ACQUIRE_LOCK(self);
1431         if (self->mode == MODE_CLOSED) {
1432                 PyErr_SetString(PyExc_ValueError,
1433                                 "I/O operation on closed file");
1434                 return NULL;
1435         }
1436         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1437         RELEASE_LOCK(self);
1438         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1439                 Py_XDECREF(ret);
1440                 return NULL;
1441         }
1442         return (PyObject *)ret;
1443 }
1444
1445 /* ===================================================================== */
1446 /* BZ2File_Type definition. */
1447
1448 PyDoc_VAR(BZ2File__doc__) =
1449 PyDoc_STR(
1450 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1451 \n\
1452 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1453 writing. When opened for writing, the file will be created if it doesn't\n\
1454 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1455 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1456 is given, must be a number between 1 and 9.\n\
1457 ")
1458 PyDoc_STR(
1459 "\n\
1460 Add a 'U' to mode to open the file for input with universal newline\n\
1461 support. Any line ending in the input file will be seen as a '\\n' in\n\
1462 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1463 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1464 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1465 newlines are available only when reading.\n\
1466 ")
1467 ;
1468
1469 static PyTypeObject BZ2File_Type = {
1470         PyObject_HEAD_INIT(NULL)
1471         0,                      /*ob_size*/
1472         "bz2.BZ2File",          /*tp_name*/
1473         sizeof(BZ2FileObject),  /*tp_basicsize*/
1474         0,                      /*tp_itemsize*/
1475         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1476         0,                      /*tp_print*/
1477         0,                      /*tp_getattr*/
1478         0,                      /*tp_setattr*/
1479         0,                      /*tp_compare*/
1480         0,                      /*tp_repr*/
1481         0,                      /*tp_as_number*/
1482         0,                      /*tp_as_sequence*/
1483         0,                      /*tp_as_mapping*/
1484         0,                      /*tp_hash*/
1485         0,                      /*tp_call*/
1486         0,                      /*tp_str*/
1487         PyObject_GenericGetAttr,/*tp_getattro*/
1488         PyObject_GenericSetAttr,/*tp_setattro*/
1489         0,                      /*tp_as_buffer*/
1490         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1491         BZ2File__doc__,         /*tp_doc*/
1492         0,                      /*tp_traverse*/
1493         0,                      /*tp_clear*/
1494         0,                      /*tp_richcompare*/
1495         0,                      /*tp_weaklistoffset*/
1496         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1497         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1498         BZ2File_methods,        /*tp_methods*/
1499         BZ2File_members,        /*tp_members*/
1500         BZ2File_getset,         /*tp_getset*/
1501         0,                      /*tp_base*/
1502         0,                      /*tp_dict*/
1503         0,                      /*tp_descr_get*/
1504         0,                      /*tp_descr_set*/
1505         0,                      /*tp_dictoffset*/
1506         (initproc)BZ2File_init, /*tp_init*/
1507         PyType_GenericAlloc,    /*tp_alloc*/
1508         PyType_GenericNew,      /*tp_new*/
1509         _PyObject_Del,          /*tp_free*/
1510         0,                      /*tp_is_gc*/
1511 };
1512
1513
1514 /* ===================================================================== */
1515 /* Methods of BZ2Comp. */
1516
1517 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1518 "compress(data) -> string\n\
1519 \n\
1520 Provide more data to the compressor object. It will return chunks of\n\
1521 compressed data whenever possible. When you've finished providing data\n\
1522 to compress, call the flush() method to finish the compression process,\n\
1523 and return what is left in the internal buffers.\n\
1524 ");
1525
1526 static PyObject *
1527 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1528 {
1529         char *data;
1530         int datasize;
1531         int bufsize = SMALLCHUNK;
1532         PY_LONG_LONG totalout;
1533         PyObject *ret = NULL;
1534         bz_stream *bzs = &self->bzs;
1535         int bzerror;
1536
1537         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1538                 return NULL;
1539
1540         if (datasize == 0)
1541                 return PyString_FromString("");
1542
1543         ACQUIRE_LOCK(self);
1544         if (!self->running) {
1545                 PyErr_SetString(PyExc_ValueError,
1546                                 "this object was already flushed");
1547                 goto error;
1548         }
1549
1550         ret = PyString_FromStringAndSize(NULL, bufsize);
1551         if (!ret)
1552                 goto error;
1553
1554         bzs->next_in = data;
1555         bzs->avail_in = datasize;
1556         bzs->next_out = BUF(ret);
1557         bzs->avail_out = bufsize;
1558
1559         totalout = BZS_TOTAL_OUT(bzs);
1560
1561         for (;;) {
1562                 Py_BEGIN_ALLOW_THREADS
1563                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1564                 Py_END_ALLOW_THREADS
1565                 if (bzerror != BZ_RUN_OK) {
1566                         Util_CatchBZ2Error(bzerror);
1567                         goto error;
1568                 }
1569                 if (bzs->avail_out == 0) {
1570                         bufsize = Util_NewBufferSize(bufsize);
1571                         if (_PyString_Resize(&ret, bufsize) < 0) {
1572                                 BZ2_bzCompressEnd(bzs);
1573                                 goto error;
1574                         }
1575                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1576                                                     - totalout);
1577                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1578                 } else if (bzs->avail_in == 0) {
1579                         break;
1580                 }
1581         }
1582
1583         _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1584
1585         RELEASE_LOCK(self);
1586         return ret;
1587
1588 error:
1589         RELEASE_LOCK(self);
1590         Py_XDECREF(ret);
1591         return NULL;
1592 }
1593
1594 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1595 "flush() -> string\n\
1596 \n\
1597 Finish the compression process and return what is left in internal buffers.\n\
1598 You must not use the compressor object after calling this method.\n\
1599 ");
1600
1601 static PyObject *
1602 BZ2Comp_flush(BZ2CompObject *self)
1603 {
1604         int bufsize = SMALLCHUNK;
1605         PyObject *ret = NULL;
1606         bz_stream *bzs = &self->bzs;
1607         PY_LONG_LONG totalout;
1608         int bzerror;
1609
1610         ACQUIRE_LOCK(self);
1611         if (!self->running) {
1612                 PyErr_SetString(PyExc_ValueError, "object was already "
1613                                                   "flushed");
1614                 goto error;
1615         }
1616         self->running = 0;
1617
1618         ret = PyString_FromStringAndSize(NULL, bufsize);
1619         if (!ret)
1620                 goto error;
1621
1622         bzs->next_out = BUF(ret);
1623         bzs->avail_out = bufsize;
1624
1625         totalout = BZS_TOTAL_OUT(bzs);
1626
1627         for (;;) {
1628                 Py_BEGIN_ALLOW_THREADS
1629                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1630                 Py_END_ALLOW_THREADS
1631                 if (bzerror == BZ_STREAM_END) {
1632                         break;
1633                 } else if (bzerror != BZ_FINISH_OK) {
1634                         Util_CatchBZ2Error(bzerror);
1635                         goto error;
1636                 }
1637                 if (bzs->avail_out == 0) {
1638                         bufsize = Util_NewBufferSize(bufsize);
1639                         if (_PyString_Resize(&ret, bufsize) < 0)
1640                                 goto error;
1641                         bzs->next_out = BUF(ret);
1642                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1643                                                     - totalout);
1644                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1645                 }
1646         }
1647
1648         if (bzs->avail_out != 0)
1649                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1650
1651         RELEASE_LOCK(self);
1652         return ret;
1653
1654 error:
1655         RELEASE_LOCK(self);
1656         Py_XDECREF(ret);
1657         return NULL;
1658 }
1659
1660 static PyMethodDef BZ2Comp_methods[] = {
1661         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1662          BZ2Comp_compress__doc__},
1663         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1664          BZ2Comp_flush__doc__},
1665         {NULL,          NULL}           /* sentinel */
1666 };
1667
1668
1669 /* ===================================================================== */
1670 /* Slot definitions for BZ2Comp_Type. */
1671
1672 static int
1673 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1674 {
1675         int compresslevel = 9;
1676         int bzerror;
1677         static const char *kwlist[] = {"compresslevel", 0};
1678
1679         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1680                                          kwlist, &compresslevel))
1681                 return -1;
1682
1683         if (compresslevel < 1 || compresslevel > 9) {
1684                 PyErr_SetString(PyExc_ValueError,
1685                                 "compresslevel must be between 1 and 9");
1686                 goto error;
1687         }
1688
1689 #ifdef WITH_THREAD
1690         self->lock = PyThread_allocate_lock();
1691         if (!self->lock)
1692                 goto error;
1693 #endif
1694
1695         memset(&self->bzs, 0, sizeof(bz_stream));
1696         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1697         if (bzerror != BZ_OK) {
1698                 Util_CatchBZ2Error(bzerror);
1699                 goto error;
1700         }
1701
1702         self->running = 1;
1703
1704         return 0;
1705 error:
1706 #ifdef WITH_THREAD
1707         if (self->lock)
1708                 PyThread_free_lock(self->lock);
1709 #endif
1710         return -1;
1711 }
1712
1713 static void
1714 BZ2Comp_dealloc(BZ2CompObject *self)
1715 {
1716 #ifdef WITH_THREAD
1717         if (self->lock)
1718                 PyThread_free_lock(self->lock);
1719 #endif
1720         BZ2_bzCompressEnd(&self->bzs);
1721         self->ob_type->tp_free((PyObject *)self);
1722 }
1723
1724
1725 /* ===================================================================== */
1726 /* BZ2Comp_Type definition. */
1727
1728 PyDoc_STRVAR(BZ2Comp__doc__,
1729 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1730 \n\
1731 Create a new compressor object. This object may be used to compress\n\
1732 data sequentially. If you want to compress data in one shot, use the\n\
1733 compress() function instead. The compresslevel parameter, if given,\n\
1734 must be a number between 1 and 9.\n\
1735 ");
1736
1737 static PyTypeObject BZ2Comp_Type = {
1738         PyObject_HEAD_INIT(NULL)
1739         0,                      /*ob_size*/
1740         "bz2.BZ2Compressor",    /*tp_name*/
1741         sizeof(BZ2CompObject),  /*tp_basicsize*/
1742         0,                      /*tp_itemsize*/
1743         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1744         0,                      /*tp_print*/
1745         0,                      /*tp_getattr*/
1746         0,                      /*tp_setattr*/
1747         0,                      /*tp_compare*/
1748         0,                      /*tp_repr*/
1749         0,                      /*tp_as_number*/
1750         0,                      /*tp_as_sequence*/
1751         0,                      /*tp_as_mapping*/
1752         0,                      /*tp_hash*/
1753         0,                      /*tp_call*/
1754         0,                      /*tp_str*/
1755         PyObject_GenericGetAttr,/*tp_getattro*/
1756         PyObject_GenericSetAttr,/*tp_setattro*/
1757         0,                      /*tp_as_buffer*/
1758         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1759         BZ2Comp__doc__,         /*tp_doc*/
1760         0,                      /*tp_traverse*/
1761         0,                      /*tp_clear*/
1762         0,                      /*tp_richcompare*/
1763         0,                      /*tp_weaklistoffset*/
1764         0,                      /*tp_iter*/
1765         0,                      /*tp_iternext*/
1766         BZ2Comp_methods,        /*tp_methods*/
1767         0,                      /*tp_members*/
1768         0,                      /*tp_getset*/
1769         0,                      /*tp_base*/
1770         0,                      /*tp_dict*/
1771         0,                      /*tp_descr_get*/
1772         0,                      /*tp_descr_set*/
1773         0,                      /*tp_dictoffset*/
1774         (initproc)BZ2Comp_init, /*tp_init*/
1775         PyType_GenericAlloc,    /*tp_alloc*/
1776         PyType_GenericNew,      /*tp_new*/
1777         _PyObject_Del,          /*tp_free*/
1778         0,                      /*tp_is_gc*/
1779 };
1780
1781
1782 /* ===================================================================== */
1783 /* Members of BZ2Decomp. */
1784
1785 #undef OFF
1786 #define OFF(x) offsetof(BZ2DecompObject, x)
1787
1788 static PyMemberDef BZ2Decomp_members[] = {
1789         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1790         {NULL}  /* Sentinel */
1791 };
1792
1793
1794 /* ===================================================================== */
1795 /* Methods of BZ2Decomp. */
1796
1797 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1798 "decompress(data) -> string\n\
1799 \n\
1800 Provide more data to the decompressor object. It will return chunks\n\
1801 of decompressed data whenever possible. If you try to decompress data\n\
1802 after the end of stream is found, EOFError will be raised. If any data\n\
1803 was found after the end of stream, it'll be ignored and saved in\n\
1804 unused_data attribute.\n\
1805 ");
1806
1807 static PyObject *
1808 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1809 {
1810         char *data;
1811         int datasize;
1812         int bufsize = SMALLCHUNK;
1813         PY_LONG_LONG totalout;
1814         PyObject *ret = NULL;
1815         bz_stream *bzs = &self->bzs;
1816         int bzerror;
1817
1818         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1819                 return NULL;
1820
1821         ACQUIRE_LOCK(self);
1822         if (!self->running) {
1823                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1824                                                 "already found");
1825                 goto error;
1826         }
1827
1828         ret = PyString_FromStringAndSize(NULL, bufsize);
1829         if (!ret)
1830                 goto error;
1831
1832         bzs->next_in = data;
1833         bzs->avail_in = datasize;
1834         bzs->next_out = BUF(ret);
1835         bzs->avail_out = bufsize;
1836
1837         totalout = BZS_TOTAL_OUT(bzs);
1838
1839         for (;;) {
1840                 Py_BEGIN_ALLOW_THREADS
1841                 bzerror = BZ2_bzDecompress(bzs);
1842                 Py_END_ALLOW_THREADS
1843                 if (bzerror == BZ_STREAM_END) {
1844                         if (bzs->avail_in != 0) {
1845                                 Py_DECREF(self->unused_data);
1846                                 self->unused_data =
1847                                     PyString_FromStringAndSize(bzs->next_in,
1848                                                                bzs->avail_in);
1849                         }
1850                         self->running = 0;
1851                         break;
1852                 }
1853                 if (bzerror != BZ_OK) {
1854                         Util_CatchBZ2Error(bzerror);
1855                         goto error;
1856                 }
1857                 if (bzs->avail_out == 0) {
1858                         bufsize = Util_NewBufferSize(bufsize);
1859                         if (_PyString_Resize(&ret, bufsize) < 0) {
1860                                 BZ2_bzDecompressEnd(bzs);
1861                                 goto error;
1862                         }
1863                         bzs->next_out = BUF(ret);
1864                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1865                                                     - totalout);
1866                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1867                 } else if (bzs->avail_in == 0) {
1868                         break;
1869                 }
1870         }
1871
1872         if (bzs->avail_out != 0)
1873                 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
1874
1875         RELEASE_LOCK(self);
1876         return ret;
1877
1878 error:
1879         RELEASE_LOCK(self);
1880         Py_XDECREF(ret);
1881         return NULL;
1882 }
1883
1884 static PyMethodDef BZ2Decomp_methods[] = {
1885         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1886         {NULL,          NULL}           /* sentinel */
1887 };
1888
1889
1890 /* ===================================================================== */
1891 /* Slot definitions for BZ2Decomp_Type. */
1892
1893 static int
1894 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1895 {
1896         int bzerror;
1897
1898         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1899                 return -1;
1900
1901 #ifdef WITH_THREAD
1902         self->lock = PyThread_allocate_lock();
1903         if (!self->lock)
1904                 goto error;
1905 #endif
1906
1907         self->unused_data = PyString_FromString("");
1908         if (!self->unused_data)
1909                 goto error;
1910
1911         memset(&self->bzs, 0, sizeof(bz_stream));
1912         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1913         if (bzerror != BZ_OK) {
1914                 Util_CatchBZ2Error(bzerror);
1915                 goto error;
1916         }
1917
1918         self->running = 1;
1919
1920         return 0;
1921
1922 error:
1923 #ifdef WITH_THREAD
1924         if (self->lock)
1925                 PyThread_free_lock(self->lock);
1926 #endif
1927         Py_XDECREF(self->unused_data);
1928         return -1;
1929 }
1930
1931 static void
1932 BZ2Decomp_dealloc(BZ2DecompObject *self)
1933 {
1934 #ifdef WITH_THREAD
1935         if (self->lock)
1936                 PyThread_free_lock(self->lock);
1937 #endif
1938         Py_XDECREF(self->unused_data);
1939         BZ2_bzDecompressEnd(&self->bzs);
1940         self->ob_type->tp_free((PyObject *)self);
1941 }
1942
1943
1944 /* ===================================================================== */
1945 /* BZ2Decomp_Type definition. */
1946
1947 PyDoc_STRVAR(BZ2Decomp__doc__,
1948 "BZ2Decompressor() -> decompressor object\n\
1949 \n\
1950 Create a new decompressor object. This object may be used to decompress\n\
1951 data sequentially. If you want to decompress data in one shot, use the\n\
1952 decompress() function instead.\n\
1953 ");
1954
1955 static PyTypeObject BZ2Decomp_Type = {
1956         PyObject_HEAD_INIT(NULL)
1957         0,                      /*ob_size*/
1958         "bz2.BZ2Decompressor",  /*tp_name*/
1959         sizeof(BZ2DecompObject), /*tp_basicsize*/
1960         0,                      /*tp_itemsize*/
1961         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1962         0,                      /*tp_print*/
1963         0,                      /*tp_getattr*/
1964         0,                      /*tp_setattr*/
1965         0,                      /*tp_compare*/
1966         0,                      /*tp_repr*/
1967         0,                      /*tp_as_number*/
1968         0,                      /*tp_as_sequence*/
1969         0,                      /*tp_as_mapping*/
1970         0,                      /*tp_hash*/
1971         0,                      /*tp_call*/
1972         0,                      /*tp_str*/
1973         PyObject_GenericGetAttr,/*tp_getattro*/
1974         PyObject_GenericSetAttr,/*tp_setattro*/
1975         0,                      /*tp_as_buffer*/
1976         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1977         BZ2Decomp__doc__,       /*tp_doc*/
1978         0,                      /*tp_traverse*/
1979         0,                      /*tp_clear*/
1980         0,                      /*tp_richcompare*/
1981         0,                      /*tp_weaklistoffset*/
1982         0,                      /*tp_iter*/
1983         0,                      /*tp_iternext*/
1984         BZ2Decomp_methods,      /*tp_methods*/
1985         BZ2Decomp_members,      /*tp_members*/
1986         0,                      /*tp_getset*/
1987         0,                      /*tp_base*/
1988         0,                      /*tp_dict*/
1989         0,                      /*tp_descr_get*/
1990         0,                      /*tp_descr_set*/
1991         0,                      /*tp_dictoffset*/
1992         (initproc)BZ2Decomp_init, /*tp_init*/
1993         PyType_GenericAlloc,    /*tp_alloc*/
1994         PyType_GenericNew,      /*tp_new*/
1995         _PyObject_Del,          /*tp_free*/
1996         0,                      /*tp_is_gc*/
1997 };
1998
1999
2000 /* ===================================================================== */
2001 /* Module functions. */
2002
2003 PyDoc_STRVAR(bz2_compress__doc__,
2004 "compress(data [, compresslevel=9]) -> string\n\
2005 \n\
2006 Compress data in one shot. If you want to compress data sequentially,\n\
2007 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2008 given, must be a number between 1 and 9.\n\
2009 ");
2010
2011 static PyObject *
2012 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2013 {
2014         int compresslevel=9;
2015         char *data;
2016         int datasize;
2017         int bufsize;
2018         PyObject *ret = NULL;
2019         bz_stream _bzs;
2020         bz_stream *bzs = &_bzs;
2021         int bzerror;
2022         static const char *kwlist[] = {"data", "compresslevel", 0};
2023
2024         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2025                                          kwlist, &data, &datasize,
2026                                          &compresslevel))
2027                 return NULL;
2028
2029         if (compresslevel < 1 || compresslevel > 9) {
2030                 PyErr_SetString(PyExc_ValueError,
2031                                 "compresslevel must be between 1 and 9");
2032                 return NULL;
2033         }
2034
2035         /* Conforming to bz2 manual, this is large enough to fit compressed
2036          * data in one shot. We will check it later anyway. */
2037         bufsize = datasize + (datasize/100+1) + 600;
2038
2039         ret = PyString_FromStringAndSize(NULL, bufsize);
2040         if (!ret)
2041                 return NULL;
2042
2043         memset(bzs, 0, sizeof(bz_stream));
2044
2045         bzs->next_in = data;
2046         bzs->avail_in = datasize;
2047         bzs->next_out = BUF(ret);
2048         bzs->avail_out = bufsize;
2049
2050         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2051         if (bzerror != BZ_OK) {
2052                 Util_CatchBZ2Error(bzerror);
2053                 Py_DECREF(ret);
2054                 return NULL;
2055         }
2056
2057         for (;;) {
2058                 Py_BEGIN_ALLOW_THREADS
2059                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2060                 Py_END_ALLOW_THREADS
2061                 if (bzerror == BZ_STREAM_END) {
2062                         break;
2063                 } else if (bzerror != BZ_FINISH_OK) {
2064                         BZ2_bzCompressEnd(bzs);
2065                         Util_CatchBZ2Error(bzerror);
2066                         Py_DECREF(ret);
2067                         return NULL;
2068                 }
2069                 if (bzs->avail_out == 0) {
2070                         bufsize = Util_NewBufferSize(bufsize);
2071                         if (_PyString_Resize(&ret, bufsize) < 0) {
2072                                 BZ2_bzCompressEnd(bzs);
2073                                 Py_DECREF(ret);
2074                                 return NULL;
2075                         }
2076                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2077                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2078                 }
2079         }
2080
2081         if (bzs->avail_out != 0)
2082                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2083         BZ2_bzCompressEnd(bzs);
2084
2085         return ret;
2086 }
2087
2088 PyDoc_STRVAR(bz2_decompress__doc__,
2089 "decompress(data) -> decompressed data\n\
2090 \n\
2091 Decompress data in one shot. If you want to decompress data sequentially,\n\
2092 use an instance of BZ2Decompressor instead.\n\
2093 ");
2094
2095 static PyObject *
2096 bz2_decompress(PyObject *self, PyObject *args)
2097 {
2098         char *data;
2099         int datasize;
2100         int bufsize = SMALLCHUNK;
2101         PyObject *ret;
2102         bz_stream _bzs;
2103         bz_stream *bzs = &_bzs;
2104         int bzerror;
2105
2106         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2107                 return NULL;
2108
2109         if (datasize == 0)
2110                 return PyString_FromString("");
2111
2112         ret = PyString_FromStringAndSize(NULL, bufsize);
2113         if (!ret)
2114                 return NULL;
2115
2116         memset(bzs, 0, sizeof(bz_stream));
2117
2118         bzs->next_in = data;
2119         bzs->avail_in = datasize;
2120         bzs->next_out = BUF(ret);
2121         bzs->avail_out = bufsize;
2122
2123         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2124         if (bzerror != BZ_OK) {
2125                 Util_CatchBZ2Error(bzerror);
2126                 Py_DECREF(ret);
2127                 return NULL;
2128         }
2129
2130         for (;;) {
2131                 Py_BEGIN_ALLOW_THREADS
2132                 bzerror = BZ2_bzDecompress(bzs);
2133                 Py_END_ALLOW_THREADS
2134                 if (bzerror == BZ_STREAM_END) {
2135                         break;
2136                 } else if (bzerror != BZ_OK) {
2137                         BZ2_bzDecompressEnd(bzs);
2138                         Util_CatchBZ2Error(bzerror);
2139                         Py_DECREF(ret);
2140                         return NULL;
2141                 }
2142                 if (bzs->avail_out == 0) {
2143                         bufsize = Util_NewBufferSize(bufsize);
2144                         if (_PyString_Resize(&ret, bufsize) < 0) {
2145                                 BZ2_bzDecompressEnd(bzs);
2146                                 Py_DECREF(ret);
2147                                 return NULL;
2148                         }
2149                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2150                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2151                 } else if (bzs->avail_in == 0) {
2152                         BZ2_bzDecompressEnd(bzs);
2153                         PyErr_SetString(PyExc_ValueError,
2154                                         "couldn't find end of stream");
2155                         Py_DECREF(ret);
2156                         return NULL;
2157                 }
2158         }
2159
2160         if (bzs->avail_out != 0)
2161                 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
2162         BZ2_bzDecompressEnd(bzs);
2163
2164         return ret;
2165 }
2166
2167 static PyMethodDef bz2_methods[] = {
2168         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2169                 bz2_compress__doc__},
2170         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2171                 bz2_decompress__doc__},
2172         {NULL,          NULL}           /* sentinel */
2173 };
2174
2175 /* ===================================================================== */
2176 /* Initialization function. */
2177
2178 PyDoc_STRVAR(bz2__doc__,
2179 "The python bz2 module provides a comprehensive interface for\n\
2180 the bz2 compression library. It implements a complete file\n\
2181 interface, one shot (de)compression functions, and types for\n\
2182 sequential (de)compression.\n\
2183 ");
2184
2185 PyMODINIT_FUNC
2186 initbz2(void)
2187 {
2188         PyObject *m;
2189
2190         BZ2File_Type.ob_type = &PyType_Type;
2191         BZ2Comp_Type.ob_type = &PyType_Type;
2192         BZ2Decomp_Type.ob_type = &PyType_Type;
2193
2194         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2195         if (m == NULL)
2196                 return;
2197
2198         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2199
2200         Py_INCREF(&BZ2File_Type);
2201         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2202
2203         Py_INCREF(&BZ2Comp_Type);
2204         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2205
2206         Py_INCREF(&BZ2Decomp_Type);
2207         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2208 }