Modules/bz2module.c

   1 /*
   2
   3 python-bz2 - python bz2 library interface
   4
   5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
   6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
   7
   8 */
   9
  10 #include "Python.h"
  11 #include <stdio.h>
  12 #include <bzlib.h>
  13 #include "structmember.h"
  14
  15 #ifdef WITH_THREAD
  16 #include "pythread.h"
  17 #endif
  18
  19 static char __author__[] =
  20 "The bz2 python module was written by:\n\
  21 \n\
  22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
  23 ";
  24
  25 /* Our very own off_t-like type, 64-bit if possible */
  26 /* copied from Objects/fileobject.c */
  27 #if !defined(HAVE_LARGEFILE_SUPPORT)
  28 typedef off_t Py_off_t;
  29 #elif SIZEOF_OFF_T >= 8
  30 typedef off_t Py_off_t;
  31 #elif SIZEOF_FPOS_T >= 8
  32 typedef fpos_t Py_off_t;
  33 #else
  34 #error "Large file support, but neither off_t nor fpos_t is large enough."
  35 #endif
  36
  37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  38
  39 #define MODE_CLOSED   0
  40 #define MODE_READ     1
  41 #define MODE_READ_EOF 2
  42 #define MODE_WRITE    3
  43
  44 #define BZ2FileObject_Check(v)  ((v)->ob_type == &BZ2File_Type)
  45
  46
  47 #ifdef BZ_CONFIG_ERROR
  48
  49 #if SIZEOF_LONG >= 8
  50 #define BZS_TOTAL_OUT(bzs) \
  51         (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  52 #elif SIZEOF_LONG_LONG >= 8
  53 #define BZS_TOTAL_OUT(bzs) \
  54         (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
  55 #else
  56 #define BZS_TOTAL_OUT(bzs) \
  57         bzs->total_out_lo32
  58 #endif
  59
  60 #else /* ! BZ_CONFIG_ERROR */
  61
  62 #define BZ2_bzRead bzRead
  63 #define BZ2_bzReadOpen bzReadOpen
  64 #define BZ2_bzReadClose bzReadClose
  65 #define BZ2_bzWrite bzWrite
  66 #define BZ2_bzWriteOpen bzWriteOpen
  67 #define BZ2_bzWriteClose bzWriteClose
  68 #define BZ2_bzCompress bzCompress
  69 #define BZ2_bzCompressInit bzCompressInit
  70 #define BZ2_bzCompressEnd bzCompressEnd
  71 #define BZ2_bzDecompress bzDecompress
  72 #define BZ2_bzDecompressInit bzDecompressInit
  73 #define BZ2_bzDecompressEnd bzDecompressEnd
  74
  75 #define BZS_TOTAL_OUT(bzs) bzs->total_out
  76
  77 #endif /* ! BZ_CONFIG_ERROR */
  78
  79
  80 #ifdef WITH_THREAD
  81 #define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
  82 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
  83 #else
  84 #define ACQUIRE_LOCK(obj)
  85 #define RELEASE_LOCK(obj)
  86 #endif
  87
  88 /* Bits in f_newlinetypes */
  89 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  90 #define NEWLINE_CR 1            /* \r newline seen */
  91 #define NEWLINE_LF 2            /* \n newline seen */
  92 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  93
  94 /* ===================================================================== */
  95 /* Structure definitions. */
  96
  97 typedef struct {
  98         PyObject_HEAD
  99         PyObject *file;
 100
 101         char* f_buf;            /* Allocated readahead buffer */
 102         char* f_bufend;         /* Points after last occupied position */
 103         char* f_bufptr;         /* Current buffer position */
 104
 105         int f_softspace;        /* Flag used by 'print' command */
 106
 107         int f_univ_newline;     /* Handle any newline convention */
 108         int f_newlinetypes;     /* Types of newlines seen */
 109         int f_skipnextlf;       /* Skip next \n */
 110
 111         BZFILE *fp;
 112         int mode;
 113         Py_off_t pos;
 114         Py_off_t size;
 115 #ifdef WITH_THREAD
 116         PyThread_type_lock lock;
 117 #endif
 118 } BZ2FileObject;
 119
 120 typedef struct {
 121         PyObject_HEAD
 122         bz_stream bzs;
 123         int running;
 124 #ifdef WITH_THREAD
 125         PyThread_type_lock lock;
 126 #endif
 127 } BZ2CompObject;
 128
 129 typedef struct {
 130         PyObject_HEAD
 131         bz_stream bzs;
 132         int running;
 133         PyObject *unused_data;
 134 #ifdef WITH_THREAD
 135         PyThread_type_lock lock;
 136 #endif
 137 } BZ2DecompObject;
 138
 139 /* ===================================================================== */
 140 /* Utility functions. */
 141
 142 static int
 143 Util_CatchBZ2Error(int bzerror)
 144 {
 145         int ret = 0;
 146         switch(bzerror) {
 147                 case BZ_OK:
 148                 case BZ_STREAM_END:
 149                         break;
 150
 151 #ifdef BZ_CONFIG_ERROR
 152                 case BZ_CONFIG_ERROR:
 153                         PyErr_SetString(PyExc_SystemError,
 154                                         "the bz2 library was not compiled "
 155                                         "correctly");
 156                         ret = 1;
 157                         break;
 158 #endif
 159
 160                 case BZ_PARAM_ERROR:
 161                         PyErr_SetString(PyExc_ValueError,
 162                                         "the bz2 library has received wrong "
 163                                         "parameters");
 164                         ret = 1;
 165                         break;
 166
 167                 case BZ_MEM_ERROR:
 168                         PyErr_NoMemory();
 169                         ret = 1;
 170                         break;
 171
 172                 case BZ_DATA_ERROR:
 173                 case BZ_DATA_ERROR_MAGIC:
 174                         PyErr_SetString(PyExc_IOError, "invalid data stream");
 175                         ret = 1;
 176                         break;
 177
 178                 case BZ_IO_ERROR:
 179                         PyErr_SetString(PyExc_IOError, "unknown IO error");
 180                         ret = 1;
 181                         break;
 182
 183                 case BZ_UNEXPECTED_EOF:
 184                         PyErr_SetString(PyExc_EOFError,
 185                                         "compressed file ended before the "
 186                                         "logical end-of-stream was detected");
 187                         ret = 1;
 188                         break;
 189
 190                 case BZ_SEQUENCE_ERROR:
 191                         PyErr_SetString(PyExc_RuntimeError,
 192                                         "wrong sequence of bz2 library "
 193                                         "commands used");
 194                         ret = 1;
 195                         break;
 196         }
 197         return ret;
 198 }
 199
 200 #if BUFSIZ < 8192
 201 #define SMALLCHUNK 8192
 202 #else
 203 #define SMALLCHUNK BUFSIZ
 204 #endif
 205
 206 #if SIZEOF_INT < 4
 207 #define BIGCHUNK  (512 * 32)
 208 #else
 209 #define BIGCHUNK  (512 * 1024)
 210 #endif
 211
 212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
 213 static size_t
 214 Util_NewBufferSize(size_t currentsize)
 215 {
 216         if (currentsize > SMALLCHUNK) {
 217                 /* Keep doubling until we reach BIGCHUNK;
 218                    then keep adding BIGCHUNK. */
 219                 if (currentsize <= BIGCHUNK)
 220                         return currentsize + currentsize;
 221                 else
 222                         return currentsize + BIGCHUNK;
 223         }
 224         return currentsize + SMALLCHUNK;
 225 }
 226
 227 /* This is a hacked version of Python's fileobject.c:get_line(). */
 228 static PyObject *
 229 Util_GetLine(BZ2FileObject *f, int n)
 230 {
 231         char c;
 232         char *buf, *end;
 233         size_t total_v_size;    /* total # of slots in buffer */
 234         size_t used_v_size;     /* # used slots in buffer */
 235         size_t increment;       /* amount to increment the buffer */
 236         PyObject *v;
 237         int bzerror;
 238         int newlinetypes = f->f_newlinetypes;
 239         int skipnextlf = f->f_skipnextlf;
 240         int univ_newline = f->f_univ_newline;
 241
 242         total_v_size = n > 0 ? n : 100;
 243         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
 244         if (v == NULL)
 245                 return NULL;
 246
 247         buf = BUF(v);
 248         end = buf + total_v_size;
 249
 250         for (;;) {
 251                 Py_BEGIN_ALLOW_THREADS
 252                 if (univ_newline) {
 253                         while (1) {
 254                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 255                                 f->pos++;
 256                                 if (bzerror != BZ_OK || buf == end)
 257                                         break;
 258                                 if (skipnextlf) {
 259                                         skipnextlf = 0;
 260                                         if (c == '\n') {
 261                                                 /* Seeing a \n here with
 262                                                  * skipnextlf true means we
 263                                                  * saw a \r before.
 264                                                  */
 265                                                 newlinetypes |= NEWLINE_CRLF;
 266                                                 BZ2_bzRead(&bzerror, f->fp,
 267                                                            &c, 1);
 268                                                 if (bzerror != BZ_OK)
 269                                                         break;
 270                                         } else {
 271                                                 newlinetypes |= NEWLINE_CR;
 272                                         }
 273                                 }
 274                                 if (c == '\r') {
 275                                         skipnextlf = 1;
 276                                         c = '\n';
 277                                 } else if ( c == '\n')
 278                                         newlinetypes |= NEWLINE_LF;
 279                                 *buf++ = c;
 280                                 if (c == '\n') break;
 281                         }
 282                         if (bzerror == BZ_STREAM_END && skipnextlf)
 283                                 newlinetypes |= NEWLINE_CR;
 284                 } else /* If not universal newlines use the normal loop */
 285                         do {
 286                                 BZ2_bzRead(&bzerror, f->fp, &c, 1);
 287                                 f->pos++;
 288                                 *buf++ = c;
 289                         } while (bzerror == BZ_OK && c != '\n' && buf != end);
 290                 Py_END_ALLOW_THREADS
 291                 f->f_newlinetypes = newlinetypes;
 292                 f->f_skipnextlf = skipnextlf;
 293                 if (bzerror == BZ_STREAM_END) {
 294                         f->size = f->pos;
 295                         f->mode = MODE_READ_EOF;
 296                         break;
 297                 } else if (bzerror != BZ_OK) {
 298                         Util_CatchBZ2Error(bzerror);
 299                         Py_DECREF(v);
 300                         return NULL;
 301                 }
 302                 if (c == '\n')
 303                         break;
 304                 /* Must be because buf == end */
 305                 if (n > 0)
 306                         break;
 307                 used_v_size = total_v_size;
 308                 increment = total_v_size >> 2; /* mild exponential growth */
 309                 total_v_size += increment;
 310                 if (total_v_size > INT_MAX) {
 311                         PyErr_SetString(PyExc_OverflowError,
 312                             "line is longer than a Python string can hold");
 313                         Py_DECREF(v);
 314                         return NULL;
 315                 }
 316                 if (_PyString_Resize(&v, total_v_size) < 0)
 317                         return NULL;
 318                 buf = BUF(v) + used_v_size;
 319                 end = BUF(v) + total_v_size;
 320         }
 321
 322         used_v_size = buf - BUF(v);
 323         if (used_v_size != total_v_size)
 324                 _PyString_Resize(&v, used_v_size);
 325         return v;
 326 }
 327
 328 /* This is a hacked version of Python's
 329  * fileobject.c:Py_UniversalNewlineFread(). */
 330 size_t
 331 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
 332                      char* buf, size_t n, BZ2FileObject *f)
 333 {
 334         char *dst = buf;
 335         int newlinetypes, skipnextlf;
 336
 337         assert(buf != NULL);
 338         assert(stream != NULL);
 339
 340         if (!f->f_univ_newline)
 341                 return BZ2_bzRead(bzerror, stream, buf, n);
 342
 343         newlinetypes = f->f_newlinetypes;
 344         skipnextlf = f->f_skipnextlf;
 345
 346         /* Invariant:  n is the number of bytes remaining to be filled
 347          * in the buffer.
 348          */
 349         while (n) {
 350                 size_t nread;
 351                 int shortread;
 352                 char *src = dst;
 353
 354                 nread = BZ2_bzRead(bzerror, stream, dst, n);
 355                 assert(nread <= n);
 356                 n -= nread; /* assuming 1 byte out for each in; will adjust */
 357                 shortread = n != 0;     /* true iff EOF or error */
 358                 while (nread--) {
 359                         char c = *src++;
 360                         if (c == '\r') {
 361                                 /* Save as LF and set flag to skip next LF. */
 362                                 *dst++ = '\n';
 363                                 skipnextlf = 1;
 364                         }
 365                         else if (skipnextlf && c == '\n') {
 366                                 /* Skip LF, and remember we saw CR LF. */
 367                                 skipnextlf = 0;
 368                                 newlinetypes |= NEWLINE_CRLF;
 369                                 ++n;
 370                         }
 371                         else {
 372                                 /* Normal char to be stored in buffer.  Also
 373                                  * update the newlinetypes flag if either this
 374                                  * is an LF or the previous char was a CR.
 375                                  */
 376                                 if (c == '\n')
 377                                         newlinetypes |= NEWLINE_LF;
 378                                 else if (skipnextlf)
 379                                         newlinetypes |= NEWLINE_CR;
 380                                 *dst++ = c;
 381                                 skipnextlf = 0;
 382                         }
 383                 }
 384                 if (shortread) {
 385                         /* If this is EOF, update type flags. */
 386                         if (skipnextlf && *bzerror == BZ_STREAM_END)
 387                                 newlinetypes |= NEWLINE_CR;
 388                         break;
 389                 }
 390         }
 391         f->f_newlinetypes = newlinetypes;
 392         f->f_skipnextlf = skipnextlf;
 393         return dst - buf;
 394 }
 395
 396 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
 397 static void
 398 Util_DropReadAhead(BZ2FileObject *f)
 399 {
 400         if (f->f_buf != NULL) {
 401                 PyMem_Free(f->f_buf);
 402                 f->f_buf = NULL;
 403         }
 404 }
 405
 406 /* This is a hacked version of Python's fileobject.c:readahead(). */
 407 static int
 408 Util_ReadAhead(BZ2FileObject *f, int bufsize)
 409 {
 410         int chunksize;
 411         int bzerror;
 412
 413         if (f->f_buf != NULL) {
 414                 if((f->f_bufend - f->f_bufptr) >= 1)
 415                         return 0;
 416                 else
 417                         Util_DropReadAhead(f);
 418         }
 419         if (f->mode == MODE_READ_EOF) {
 420                 f->f_bufptr = f->f_buf;
 421                 f->f_bufend = f->f_buf;
 422                 return 0;
 423         }
 424         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
 425                 return -1;
 426         }
 427         Py_BEGIN_ALLOW_THREADS
 428         chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
 429                                          bufsize, f);
 430         Py_END_ALLOW_THREADS
 431         f->pos += chunksize;
 432         if (bzerror == BZ_STREAM_END) {
 433                 f->size = f->pos;
 434                 f->mode = MODE_READ_EOF;
 435         } else if (bzerror != BZ_OK) {
 436                 Util_CatchBZ2Error(bzerror);
 437                 Util_DropReadAhead(f);
 438                 return -1;
 439         }
 440         f->f_bufptr = f->f_buf;
 441         f->f_bufend = f->f_buf + chunksize;
 442         return 0;
 443 }
 444
 445 /* This is a hacked version of Python's
 446  * fileobject.c:readahead_get_line_skip(). */
 447 static PyStringObject *
 448 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
 449 {
 450         PyStringObject* s;
 451         char *bufptr;
 452         char *buf;
 453         int len;
 454
 455         if (f->f_buf == NULL)
 456                 if (Util_ReadAhead(f, bufsize) < 0)
 457                         return NULL;
 458
 459         len = f->f_bufend - f->f_bufptr;
 460         if (len == 0)
 461                 return (PyStringObject *)
 462                         PyString_FromStringAndSize(NULL, skip);
 463         bufptr = memchr(f->f_bufptr, '\n', len);
 464         if (bufptr != NULL) {
 465                 bufptr++;                       /* Count the '\n' */
 466                 len = bufptr - f->f_bufptr;
 467                 s = (PyStringObject *)
 468                         PyString_FromStringAndSize(NULL, skip+len);
 469                 if (s == NULL)
 470                         return NULL;
 471                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
 472                 f->f_bufptr = bufptr;
 473                 if (bufptr == f->f_bufend)
 474                         Util_DropReadAhead(f);
 475         } else {
 476                 bufptr = f->f_bufptr;
 477                 buf = f->f_buf;
 478                 f->f_buf = NULL;        /* Force new readahead buffer */
 479                 s = Util_ReadAheadGetLineSkip(f, skip+len,
 480                                               bufsize + (bufsize>>2));
 481                 if (s == NULL) {
 482                         PyMem_Free(buf);
 483                         return NULL;
 484                 }
 485                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
 486                 PyMem_Free(buf);
 487         }
 488         return s;
 489 }
 490
 491 /* ===================================================================== */
 492 /* Methods of BZ2File. */
 493
 494 PyDoc_STRVAR(BZ2File_read__doc__,
 495 "read([size]) -> string\n\
 496 \n\
 497 Read at most size uncompressed bytes, returned as a string. If the size\n\
 498 argument is negative or omitted, read until EOF is reached.\n\
 499 ");
 500
 501 /* This is a hacked version of Python's fileobject.c:file_read(). */
 502 static PyObject *
 503 BZ2File_read(BZ2FileObject *self, PyObject *args)
 504 {
 505         long bytesrequested = -1;
 506         size_t bytesread, buffersize, chunksize;
 507         int bzerror;
 508         PyObject *ret = NULL;
 509
 510         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 511                 return NULL;
 512
 513         ACQUIRE_LOCK(self);
 514         switch (self->mode) {
 515                 case MODE_READ:
 516                         break;
 517                 case MODE_READ_EOF:
 518                         ret = PyString_FromString("");
 519                         goto cleanup;
 520                 case MODE_CLOSED:
 521                         PyErr_SetString(PyExc_ValueError,
 522                                         "I/O operation on closed file");
 523                         goto cleanup;
 524                 default:
 525                         PyErr_SetString(PyExc_IOError,
 526                                         "file is not ready for reading");
 527                         goto cleanup;
 528         }
 529
 530         if (bytesrequested < 0)
 531                 buffersize = Util_NewBufferSize((size_t)0);
 532         else
 533                 buffersize = bytesrequested;
 534         if (buffersize > INT_MAX) {
 535                 PyErr_SetString(PyExc_OverflowError,
 536                                 "requested number of bytes is "
 537                                 "more than a Python string can hold");
 538                 goto cleanup;
 539         }
 540         ret = PyString_FromStringAndSize((char *)NULL, buffersize);
 541         if (ret == NULL)
 542                 goto cleanup;
 543         bytesread = 0;
 544
 545         for (;;) {
 546                 Py_BEGIN_ALLOW_THREADS
 547                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
 548                                                  BUF(ret)+bytesread,
 549                                                  buffersize-bytesread,
 550                                                  self);
 551                 self->pos += chunksize;
 552                 Py_END_ALLOW_THREADS
 553                 bytesread += chunksize;
 554                 if (bzerror == BZ_STREAM_END) {
 555                         self->size = self->pos;
 556                         self->mode = MODE_READ_EOF;
 557                         break;
 558                 } else if (bzerror != BZ_OK) {
 559                         Util_CatchBZ2Error(bzerror);
 560                         Py_DECREF(ret);
 561                         ret = NULL;
 562                         goto cleanup;
 563                 }
 564                 if (bytesrequested < 0) {
 565                         buffersize = Util_NewBufferSize(buffersize);
 566                         if (_PyString_Resize(&ret, buffersize) < 0)
 567                                 goto cleanup;
 568                 } else {
 569                         break;
 570                 }
 571         }
 572         if (bytesread != buffersize)
 573                 _PyString_Resize(&ret, bytesread);
 574
 575 cleanup:
 576         RELEASE_LOCK(self);
 577         return ret;
 578 }
 579
 580 PyDoc_STRVAR(BZ2File_readline__doc__,
 581 "readline([size]) -> string\n\
 582 \n\
 583 Return the next line from the file, as a string, retaining newline.\n\
 584 A non-negative size argument will limit the maximum number of bytes to\n\
 585 return (an incomplete line may be returned then). Return an empty\n\
 586 string at EOF.\n\
 587 ");
 588
 589 static PyObject *
 590 BZ2File_readline(BZ2FileObject *self, PyObject *args)
 591 {
 592         PyObject *ret = NULL;
 593         int sizehint = -1;
 594
 595         if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
 596                 return NULL;
 597
 598         ACQUIRE_LOCK(self);
 599         switch (self->mode) {
 600                 case MODE_READ:
 601                         break;
 602                 case MODE_READ_EOF:
 603                         ret = PyString_FromString("");
 604                         goto cleanup;
 605                 case MODE_CLOSED:
 606                         PyErr_SetString(PyExc_ValueError,
 607                                         "I/O operation on closed file");
 608                         goto cleanup;
 609                 default:
 610                         PyErr_SetString(PyExc_IOError,
 611                                         "file is not ready for reading");
 612                         goto cleanup;
 613         }
 614
 615         if (sizehint == 0)
 616                 ret = PyString_FromString("");
 617         else
 618                 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
 619
 620 cleanup:
 621         RELEASE_LOCK(self);
 622         return ret;
 623 }
 624
 625 PyDoc_STRVAR(BZ2File_readlines__doc__,
 626 "readlines([size]) -> list\n\
 627 \n\
 628 Call readline() repeatedly and return a list of lines read.\n\
 629 The optional size argument, if given, is an approximate bound on the\n\
 630 total number of bytes in the lines returned.\n\
 631 ");
 632
 633 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
 634 static PyObject *
 635 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
 636 {
 637         long sizehint = 0;
 638         PyObject *list = NULL;
 639         PyObject *line;
 640         char small_buffer[SMALLCHUNK];
 641         char *buffer = small_buffer;
 642         size_t buffersize = SMALLCHUNK;
 643         PyObject *big_buffer = NULL;
 644         size_t nfilled = 0;
 645         size_t nread;
 646         size_t totalread = 0;
 647         char *p, *q, *end;
 648         int err;
 649         int shortread = 0;
 650         int bzerror;
 651
 652         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
 653                 return NULL;
 654
 655         ACQUIRE_LOCK(self);
 656         switch (self->mode) {
 657                 case MODE_READ:
 658                         break;
 659                 case MODE_READ_EOF:
 660                         list = PyList_New(0);
 661                         goto cleanup;
 662                 case MODE_CLOSED:
 663                         PyErr_SetString(PyExc_ValueError,
 664                                         "I/O operation on closed file");
 665                         goto cleanup;
 666                 default:
 667                         PyErr_SetString(PyExc_IOError,
 668                                         "file is not ready for reading");
 669                         goto cleanup;
 670         }
 671
 672         if ((list = PyList_New(0)) == NULL)
 673                 goto cleanup;
 674
 675         for (;;) {
 676                 Py_BEGIN_ALLOW_THREADS
 677                 nread = Util_UnivNewlineRead(&bzerror, self->fp,
 678                                              buffer+nfilled,
 679                                              buffersize-nfilled, self);
 680                 self->pos += nread;
 681                 Py_END_ALLOW_THREADS
 682                 if (bzerror == BZ_STREAM_END) {
 683                         self->size = self->pos;
 684                         self->mode = MODE_READ_EOF;
 685                         if (nread == 0) {
 686                                 sizehint = 0;
 687                                 break;
 688                         }
 689                         shortread = 1;
 690                 } else if (bzerror != BZ_OK) {
 691                         Util_CatchBZ2Error(bzerror);
 692                   error:
 693                         Py_DECREF(list);
 694                         list = NULL;
 695                         goto cleanup;
 696                 }
 697                 totalread += nread;
 698                 p = memchr(buffer+nfilled, '\n', nread);
 699                 if (!shortread && p == NULL) {
 700                         /* Need a larger buffer to fit this line */
 701                         nfilled += nread;
 702                         buffersize *= 2;
 703                         if (buffersize > INT_MAX) {
 704                                 PyErr_SetString(PyExc_OverflowError,
 705                                 "line is longer than a Python string can hold");
 706                                 goto error;
 707                         }
 708                         if (big_buffer == NULL) {
 709                                 /* Create the big buffer */
 710                                 big_buffer = PyString_FromStringAndSize(
 711                                         NULL, buffersize);
 712                                 if (big_buffer == NULL)
 713                                         goto error;
 714                                 buffer = PyString_AS_STRING(big_buffer);
 715                                 memcpy(buffer, small_buffer, nfilled);
 716                         }
 717                         else {
 718                                 /* Grow the big buffer */
 719                                 _PyString_Resize(&big_buffer, buffersize);
 720                                 buffer = PyString_AS_STRING(big_buffer);
 721                         }
 722                         continue;
 723                 }
 724                 end = buffer+nfilled+nread;
 725                 q = buffer;
 726                 while (p != NULL) {
 727                         /* Process complete lines */
 728                         p++;
 729                         line = PyString_FromStringAndSize(q, p-q);
 730                         if (line == NULL)
 731                                 goto error;
 732                         err = PyList_Append(list, line);
 733                         Py_DECREF(line);
 734                         if (err != 0)
 735                                 goto error;
 736                         q = p;
 737                         p = memchr(q, '\n', end-q);
 738                 }
 739                 /* Move the remaining incomplete line to the start */
 740                 nfilled = end-q;
 741                 memmove(buffer, q, nfilled);
 742                 if (sizehint > 0)
 743                         if (totalread >= (size_t)sizehint)
 744                                 break;
 745                 if (shortread) {
 746                         sizehint = 0;
 747                         break;
 748                 }
 749         }
 750         if (nfilled != 0) {
 751                 /* Partial last line */
 752                 line = PyString_FromStringAndSize(buffer, nfilled);
 753                 if (line == NULL)
 754                         goto error;
 755                 if (sizehint > 0) {
 756                         /* Need to complete the last line */
 757                         PyObject *rest = Util_GetLine(self, 0);
 758                         if (rest == NULL) {
 759                                 Py_DECREF(line);
 760                                 goto error;
 761                         }
 762                         PyString_Concat(&line, rest);
 763                         Py_DECREF(rest);
 764                         if (line == NULL)
 765                                 goto error;
 766                 }
 767                 err = PyList_Append(list, line);
 768                 Py_DECREF(line);
 769                 if (err != 0)
 770                         goto error;
 771         }
 772
 773   cleanup:
 774         RELEASE_LOCK(self);
 775         if (big_buffer) {
 776                 Py_DECREF(big_buffer);
 777         }
 778         return list;
 779 }
 780
 781 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
 782 "xreadlines() -> self\n\
 783 \n\
 784 For backward compatibility. BZ2File objects now include the performance\n\
 785 optimizations previously implemented in the xreadlines module.\n\
 786 ");
 787
 788 PyDoc_STRVAR(BZ2File_write__doc__,
 789 "write(data) -> None\n\
 790 \n\
 791 Write the 'data' string to file. Note that due to buffering, close() may\n\
 792 be needed before the file on disk reflects the data written.\n\
 793 ");
 794
 795 /* This is a hacked version of Python's fileobject.c:file_write(). */
 796 static PyObject *
 797 BZ2File_write(BZ2FileObject *self, PyObject *args)
 798 {
 799         PyObject *ret = NULL;
 800         char *buf;
 801         int len;
 802         int bzerror;
 803
 804         if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
 805                 return NULL;
 806
 807         ACQUIRE_LOCK(self);
 808         switch (self->mode) {
 809                 case MODE_WRITE:
 810                         break;
 811
 812                 case MODE_CLOSED:
 813                         PyErr_SetString(PyExc_ValueError,
 814                                         "I/O operation on closed file");
 815                         goto cleanup;
 816
 817                 default:
 818                         PyErr_SetString(PyExc_IOError,
 819                                         "file is not ready for writing");
 820                         goto cleanup;
 821         }
 822
 823         self->f_softspace = 0;
 824
 825         Py_BEGIN_ALLOW_THREADS
 826         BZ2_bzWrite (&bzerror, self->fp, buf, len);
 827         self->pos += len;
 828         Py_END_ALLOW_THREADS
 829
 830         if (bzerror != BZ_OK) {
 831                 Util_CatchBZ2Error(bzerror);
 832                 goto cleanup;
 833         }
 834
 835         Py_INCREF(Py_None);
 836         ret = Py_None;
 837
 838 cleanup:
 839         RELEASE_LOCK(self);
 840         return ret;
 841 }
 842
 843 PyDoc_STRVAR(BZ2File_writelines__doc__,
 844 "writelines(sequence_of_strings) -> None\n\
 845 \n\
 846 Write the sequence of strings to the file. Note that newlines are not\n\
 847 added. The sequence can be any iterable object producing strings. This is\n\
 848 equivalent to calling write() for each string.\n\
 849 ");
 850
 851 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
 852 static PyObject *
 853 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
 854 {
 855 #define CHUNKSIZE 1000
 856         PyObject *list = NULL;
 857         PyObject *iter = NULL;
 858         PyObject *ret = NULL;
 859         PyObject *line;
 860         int i, j, index, len, islist;
 861         int bzerror;
 862
 863         ACQUIRE_LOCK(self);
 864         switch (self->mode) {
 865                 case MODE_WRITE:
 866                         break;
 867
 868                 case MODE_CLOSED:
 869                         PyErr_SetString(PyExc_ValueError,
 870                                         "I/O operation on closed file");
 871                         goto error;
 872
 873                 default:
 874                         PyErr_SetString(PyExc_IOError,
 875                                         "file is not ready for writing");
 876                         goto error;
 877         }
 878
 879         islist = PyList_Check(seq);
 880         if  (!islist) {
 881                 iter = PyObject_GetIter(seq);
 882                 if (iter == NULL) {
 883                         PyErr_SetString(PyExc_TypeError,
 884                                 "writelines() requires an iterable argument");
 885                         goto error;
 886                 }
 887                 list = PyList_New(CHUNKSIZE);
 888                 if (list == NULL)
 889                         goto error;
 890         }
 891
 892         /* Strategy: slurp CHUNKSIZE lines into a private list,
 893            checking that they are all strings, then write that list
 894            without holding the interpreter lock, then come back for more. */
 895         for (index = 0; ; index += CHUNKSIZE) {
 896                 if (islist) {
 897                         Py_XDECREF(list);
 898                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
 899                         if (list == NULL)
 900                                 goto error;
 901                         j = PyList_GET_SIZE(list);
 902                 }
 903                 else {
 904                         for (j = 0; j < CHUNKSIZE; j++) {
 905                                 line = PyIter_Next(iter);
 906                                 if (line == NULL) {
 907                                         if (PyErr_Occurred())
 908                                                 goto error;
 909                                         break;
 910                                 }
 911                                 PyList_SetItem(list, j, line);
 912                         }
 913                 }
 914                 if (j == 0)
 915                         break;
 916
 917                 /* Check that all entries are indeed strings. If not,
 918                    apply the same rules as for file.write() and
 919                    convert the rets to strings. This is slow, but
 920                    seems to be the only way since all conversion APIs
 921                    could potentially execute Python code. */
 922                 for (i = 0; i < j; i++) {
 923                         PyObject *v = PyList_GET_ITEM(list, i);
 924                         if (!PyString_Check(v)) {
 925                                 const char *buffer;
 926                                 Py_ssize_t len;
 927                                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
 928                                         PyErr_SetString(PyExc_TypeError,
 929                                                         "writelines() "
 930                                                         "argument must be "
 931                                                         "a sequence of "
 932                                                         "strings");
 933                                         goto error;
 934                                 }
 935                                 line = PyString_FromStringAndSize(buffer,
 936                                                                   len);
 937                                 if (line == NULL)
 938                                         goto error;
 939                                 Py_DECREF(v);
 940                                 PyList_SET_ITEM(list, i, line);
 941                         }
 942                 }
 943
 944                 self->f_softspace = 0;
 945
 946                 /* Since we are releasing the global lock, the
 947                    following code may *not* execute Python code. */
 948                 Py_BEGIN_ALLOW_THREADS
 949                 for (i = 0; i < j; i++) {
 950                         line = PyList_GET_ITEM(list, i);
 951                         len = PyString_GET_SIZE(line);
 952                         BZ2_bzWrite (&bzerror, self->fp,
 953                                      PyString_AS_STRING(line), len);
 954                         if (bzerror != BZ_OK) {
 955                                 Py_BLOCK_THREADS
 956                                 Util_CatchBZ2Error(bzerror);
 957                                 goto error;
 958                         }
 959                 }
 960                 Py_END_ALLOW_THREADS
 961
 962                 if (j < CHUNKSIZE)
 963                         break;
 964         }
 965
 966         Py_INCREF(Py_None);
 967         ret = Py_None;
 968
 969   error:
 970         RELEASE_LOCK(self);
 971         Py_XDECREF(list);
 972         Py_XDECREF(iter);
 973         return ret;
 974 #undef CHUNKSIZE
 975 }
 976
 977 PyDoc_STRVAR(BZ2File_seek__doc__,
 978 "seek(offset [, whence]) -> None\n\
 979 \n\
 980 Move to new file position. Argument offset is a byte count. Optional\n\
 981 argument whence defaults to 0 (offset from start of file, offset\n\
 982 should be >= 0); other values are 1 (move relative to current position,\n\
 983 positive or negative), and 2 (move relative to end of file, usually\n\
 984 negative, although many platforms allow seeking beyond the end of a file).\n\
 985 \n\
 986 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
 987 the operation may be extremely slow.\n\
 988 ");
 989
 990 static PyObject *
 991 BZ2File_seek(BZ2FileObject *self, PyObject *args)
 992 {
 993         int where = 0;
 994         PyObject *offobj;
 995         Py_off_t offset;
 996         char small_buffer[SMALLCHUNK];
 997         char *buffer = small_buffer;
 998         size_t buffersize = SMALLCHUNK;
 999         Py_off_t bytesread = 0;
1000         size_t readsize;
1001         int chunksize;
1002         int bzerror;
1003         PyObject *ret = NULL;
1004
1005         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1006                 return NULL;
1007 #if !defined(HAVE_LARGEFILE_SUPPORT)
1008         offset = PyInt_AsLong(offobj);
1009 #else
1010         offset = PyLong_Check(offobj) ?
1011                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1012 #endif
1013         if (PyErr_Occurred())
1014                 return NULL;
1015
1016         ACQUIRE_LOCK(self);
1017         Util_DropReadAhead(self);
1018         switch (self->mode) {
1019                 case MODE_READ:
1020                 case MODE_READ_EOF:
1021                         break;
1022
1023                 case MODE_CLOSED:
1024                         PyErr_SetString(PyExc_ValueError,
1025                                         "I/O operation on closed file");
1026                         goto cleanup;
1027
1028                 default:
1029                         PyErr_SetString(PyExc_IOError,
1030                                         "seek works only while reading");
1031                         goto cleanup;
1032         }
1033
1034         if (where == 2) {
1035                 if (self->size == -1) {
1036                         assert(self->mode != MODE_READ_EOF);
1037                         for (;;) {
1038                                 Py_BEGIN_ALLOW_THREADS
1039                                 chunksize = Util_UnivNewlineRead(
1040                                                 &bzerror, self->fp,
1041                                                 buffer, buffersize,
1042                                                 self);
1043                                 self->pos += chunksize;
1044                                 Py_END_ALLOW_THREADS
1045
1046                                 bytesread += chunksize;
1047                                 if (bzerror == BZ_STREAM_END) {
1048                                         break;
1049                                 } else if (bzerror != BZ_OK) {
1050                                         Util_CatchBZ2Error(bzerror);
1051                                         goto cleanup;
1052                                 }
1053                         }
1054                         self->mode = MODE_READ_EOF;
1055                         self->size = self->pos;
1056                         bytesread = 0;
1057                 }
1058                 offset = self->size + offset;
1059         } else if (where == 1) {
1060                 offset = self->pos + offset;
1061         }
1062
1063         /* Before getting here, offset must be the absolute position the file
1064          * pointer should be set to. */
1065
1066         if (offset >= self->pos) {
1067                 /* we can move forward */
1068                 offset -= self->pos;
1069         } else {
1070                 /* we cannot move back, so rewind the stream */
1071                 BZ2_bzReadClose(&bzerror, self->fp);
1072                 if (bzerror != BZ_OK) {
1073                         Util_CatchBZ2Error(bzerror);
1074                         goto cleanup;
1075                 }
1076                 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1077                 if (!ret)
1078                         goto cleanup;
1079                 Py_DECREF(ret);
1080                 ret = NULL;
1081                 self->pos = 0;
1082                 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1083                                           0, 0, NULL, 0);
1084                 if (bzerror != BZ_OK) {
1085                         Util_CatchBZ2Error(bzerror);
1086                         goto cleanup;
1087                 }
1088                 self->mode = MODE_READ;
1089         }
1090
1091         if (offset <= 0 || self->mode == MODE_READ_EOF)
1092                 goto exit;
1093
1094         /* Before getting here, offset must be set to the number of bytes
1095          * to walk forward. */
1096         for (;;) {
1097                 if (offset-bytesread > buffersize)
1098                         readsize = buffersize;
1099                 else
1100                         /* offset might be wider that readsize, but the result
1101                          * of the subtraction is bound by buffersize (see the
1102                          * condition above). buffersize is 8192. */
1103                         readsize = (size_t)(offset-bytesread);
1104                 Py_BEGIN_ALLOW_THREADS
1105                 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1106                                                  buffer, readsize, self);
1107                 self->pos += chunksize;
1108                 Py_END_ALLOW_THREADS
1109                 bytesread += chunksize;
1110                 if (bzerror == BZ_STREAM_END) {
1111                         self->size = self->pos;
1112                         self->mode = MODE_READ_EOF;
1113                         break;
1114                 } else if (bzerror != BZ_OK) {
1115                         Util_CatchBZ2Error(bzerror);
1116                         goto cleanup;
1117                 }
1118                 if (bytesread == offset)
1119                         break;
1120         }
1121
1122 exit:
1123         Py_INCREF(Py_None);
1124         ret = Py_None;
1125
1126 cleanup:
1127         RELEASE_LOCK(self);
1128         return ret;
1129 }
1130
1131 PyDoc_STRVAR(BZ2File_tell__doc__,
1132 "tell() -> int\n\
1133 \n\
1134 Return the current file position, an integer (may be a long integer).\n\
1135 ");
1136
1137 static PyObject *
1138 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1139 {
1140         PyObject *ret = NULL;
1141
1142         if (self->mode == MODE_CLOSED) {
1143                 PyErr_SetString(PyExc_ValueError,
1144                                 "I/O operation on closed file");
1145                 goto cleanup;
1146         }
1147
1148 #if !defined(HAVE_LARGEFILE_SUPPORT)
1149         ret = PyInt_FromLong(self->pos);
1150 #else
1151         ret = PyLong_FromLongLong(self->pos);
1152 #endif
1153
1154 cleanup:
1155         return ret;
1156 }
1157
1158 PyDoc_STRVAR(BZ2File_close__doc__,
1159 "close() -> None or (perhaps) an integer\n\
1160 \n\
1161 Close the file. Sets data attribute .closed to true. A closed file\n\
1162 cannot be used for further I/O operations. close() may be called more\n\
1163 than once without error.\n\
1164 ");
1165
1166 static PyObject *
1167 BZ2File_close(BZ2FileObject *self)
1168 {
1169         PyObject *ret = NULL;
1170         int bzerror = BZ_OK;
1171
1172         ACQUIRE_LOCK(self);
1173         switch (self->mode) {
1174                 case MODE_READ:
1175                 case MODE_READ_EOF:
1176                         BZ2_bzReadClose(&bzerror, self->fp);
1177                         break;
1178                 case MODE_WRITE:
1179                         BZ2_bzWriteClose(&bzerror, self->fp,
1180                                          0, NULL, NULL);
1181                         break;
1182         }
1183         self->mode = MODE_CLOSED;
1184         ret = PyObject_CallMethod(self->file, "close", NULL);
1185         if (bzerror != BZ_OK) {
1186                 Util_CatchBZ2Error(bzerror);
1187                 Py_XDECREF(ret);
1188                 ret = NULL;
1189         }
1190
1191         RELEASE_LOCK(self);
1192         return ret;
1193 }
1194
1195 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1196
1197 static PyMethodDef BZ2File_methods[] = {
1198         {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1199         {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1200         {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1201         {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1202         {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1203         {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1204         {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1205         {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1206         {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1207         {NULL,          NULL}           /* sentinel */
1208 };
1209
1210
1211 /* ===================================================================== */
1212 /* Getters and setters of BZ2File. */
1213
1214 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1215 static PyObject *
1216 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1217 {
1218         switch (self->f_newlinetypes) {
1219         case NEWLINE_UNKNOWN:
1220                 Py_INCREF(Py_None);
1221                 return Py_None;
1222         case NEWLINE_CR:
1223                 return PyString_FromString("\r");
1224         case NEWLINE_LF:
1225                 return PyString_FromString("\n");
1226         case NEWLINE_CR|NEWLINE_LF:
1227                 return Py_BuildValue("(ss)", "\r", "\n");
1228         case NEWLINE_CRLF:
1229                 return PyString_FromString("\r\n");
1230         case NEWLINE_CR|NEWLINE_CRLF:
1231                 return Py_BuildValue("(ss)", "\r", "\r\n");
1232         case NEWLINE_LF|NEWLINE_CRLF:
1233                 return Py_BuildValue("(ss)", "\n", "\r\n");
1234         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1235                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1236         default:
1237                 PyErr_Format(PyExc_SystemError,
1238                              "Unknown newlines value 0x%x\n",
1239                              self->f_newlinetypes);
1240                 return NULL;
1241         }
1242 }
1243
1244 static PyObject *
1245 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1246 {
1247         return PyInt_FromLong(self->mode == MODE_CLOSED);
1248 }
1249
1250 static PyObject *
1251 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1252 {
1253         return PyObject_GetAttrString(self->file, "mode");
1254 }
1255
1256 static PyObject *
1257 BZ2File_get_name(BZ2FileObject *self, void *closure)
1258 {
1259         return PyObject_GetAttrString(self->file, "name");
1260 }
1261
1262 static PyGetSetDef BZ2File_getset[] = {
1263         {"closed", (getter)BZ2File_get_closed, NULL,
1264                         "True if the file is closed"},
1265         {"newlines", (getter)BZ2File_get_newlines, NULL,
1266                         "end-of-line convention used in this file"},
1267         {"mode", (getter)BZ2File_get_mode, NULL,
1268                         "file mode ('r', 'w', or 'U')"},
1269         {"name", (getter)BZ2File_get_name, NULL,
1270                         "file name"},
1271         {NULL}  /* Sentinel */
1272 };
1273
1274
1275 /* ===================================================================== */
1276 /* Members of BZ2File_Type. */
1277
1278 #undef OFF
1279 #define OFF(x) offsetof(BZ2FileObject, x)
1280
1281 static PyMemberDef BZ2File_members[] = {
1282         {"softspace",   T_INT,          OFF(f_softspace), 0,
1283          "flag indicating that a space needs to be printed; used by print"},
1284         {NULL}  /* Sentinel */
1285 };
1286
1287 /* ===================================================================== */
1288 /* Slot definitions for BZ2File_Type. */
1289
1290 static int
1291 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1292 {
1293         static char *kwlist[] = {"filename", "mode", "buffering",
1294                                        "compresslevel", 0};
1295         PyObject *name;
1296         char *mode = "r";
1297         int buffering = -1;
1298         int compresslevel = 9;
1299         int bzerror;
1300         int mode_char = 0;
1301
1302         self->size = -1;
1303
1304         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1305                                          kwlist, &name, &mode, &buffering,
1306                                          &compresslevel))
1307                 return -1;
1308
1309         if (compresslevel < 1 || compresslevel > 9) {
1310                 PyErr_SetString(PyExc_ValueError,
1311                                 "compresslevel must be between 1 and 9");
1312                 return -1;
1313         }
1314
1315         for (;;) {
1316                 int error = 0;
1317                 switch (*mode) {
1318                         case 'r':
1319                         case 'w':
1320                                 if (mode_char)
1321                                         error = 1;
1322                                 mode_char = *mode;
1323                                 break;
1324
1325                         case 'b':
1326                                 break;
1327
1328                         case 'U':
1329 #ifdef __VMS
1330                                 self->f_univ_newline = 0;
1331 #else
1332                                 self->f_univ_newline = 1;
1333 #endif
1334                                 break;
1335
1336                         default:
1337                                 error = 1;
1338                                 break;
1339                 }
1340                 if (error) {
1341                         PyErr_Format(PyExc_ValueError,
1342                                      "invalid mode char %c", *mode);
1343                         return -1;
1344                 }
1345                 mode++;
1346                 if (*mode == '\0')
1347                         break;
1348         }
1349
1350         if (mode_char == 0) {
1351                 mode_char = 'r';
1352         }
1353
1354         mode = (mode_char == 'r') ? "rb" : "wb";
1355
1356         self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1357                                            name, mode, buffering);
1358         if (self->file == NULL)
1359                 return -1;
1360
1361         /* From now on, we have stuff to dealloc, so jump to error label
1362          * instead of returning */
1363
1364 #ifdef WITH_THREAD
1365         self->lock = PyThread_allocate_lock();
1366         if (!self->lock) {
1367                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1368                 goto error;
1369         }
1370 #endif
1371
1372         if (mode_char == 'r')
1373                 self->fp = BZ2_bzReadOpen(&bzerror,
1374                                           PyFile_AsFile(self->file),
1375                                           0, 0, NULL, 0);
1376         else
1377                 self->fp = BZ2_bzWriteOpen(&bzerror,
1378                                            PyFile_AsFile(self->file),
1379                                            compresslevel, 0, 0);
1380
1381         if (bzerror != BZ_OK) {
1382                 Util_CatchBZ2Error(bzerror);
1383                 goto error;
1384         }
1385
1386         self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1387
1388         return 0;
1389
1390 error:
1391         Py_CLEAR(self->file);
1392 #ifdef WITH_THREAD
1393         if (self->lock) {
1394                 PyThread_free_lock(self->lock);
1395                 self->lock = NULL;
1396         }
1397 #endif
1398         return -1;
1399 }
1400
1401 static void
1402 BZ2File_dealloc(BZ2FileObject *self)
1403 {
1404         int bzerror;
1405 #ifdef WITH_THREAD
1406         if (self->lock)
1407                 PyThread_free_lock(self->lock);
1408 #endif
1409         switch (self->mode) {
1410                 case MODE_READ:
1411                 case MODE_READ_EOF:
1412                         BZ2_bzReadClose(&bzerror, self->fp);
1413                         break;
1414                 case MODE_WRITE:
1415                         BZ2_bzWriteClose(&bzerror, self->fp,
1416                                          0, NULL, NULL);
1417                         break;
1418         }
1419         Util_DropReadAhead(self);
1420         Py_XDECREF(self->file);
1421         self->ob_type->tp_free((PyObject *)self);
1422 }
1423
1424 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1425 static PyObject *
1426 BZ2File_getiter(BZ2FileObject *self)
1427 {
1428         if (self->mode == MODE_CLOSED) {
1429                 PyErr_SetString(PyExc_ValueError,
1430                                 "I/O operation on closed file");
1431                 return NULL;
1432         }
1433         Py_INCREF((PyObject*)self);
1434         return (PyObject *)self;
1435 }
1436
1437 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1438 #define READAHEAD_BUFSIZE 8192
1439 static PyObject *
1440 BZ2File_iternext(BZ2FileObject *self)
1441 {
1442         PyStringObject* ret;
1443         ACQUIRE_LOCK(self);
1444         if (self->mode == MODE_CLOSED) {
1445                 PyErr_SetString(PyExc_ValueError,
1446                                 "I/O operation on closed file");
1447                 return NULL;
1448         }
1449         ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1450         RELEASE_LOCK(self);
1451         if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1452                 Py_XDECREF(ret);
1453                 return NULL;
1454         }
1455         return (PyObject *)ret;
1456 }
1457
1458 /* ===================================================================== */
1459 /* BZ2File_Type definition. */
1460
1461 PyDoc_VAR(BZ2File__doc__) =
1462 PyDoc_STR(
1463 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1464 \n\
1465 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1466 writing. When opened for writing, the file will be created if it doesn't\n\
1467 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1468 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1469 is given, must be a number between 1 and 9.\n\
1470 ")
1471 PyDoc_STR(
1472 "\n\
1473 Add a 'U' to mode to open the file for input with universal newline\n\
1474 support. Any line ending in the input file will be seen as a '\\n' in\n\
1475 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1476 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1477 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1478 newlines are available only when reading.\n\
1479 ")
1480 ;
1481
1482 static PyTypeObject BZ2File_Type = {
1483         PyObject_HEAD_INIT(NULL)
1484         0,                      /*ob_size*/
1485         "bz2.BZ2File",          /*tp_name*/
1486         sizeof(BZ2FileObject),  /*tp_basicsize*/
1487         0,                      /*tp_itemsize*/
1488         (destructor)BZ2File_dealloc, /*tp_dealloc*/
1489         0,                      /*tp_print*/
1490         0,                      /*tp_getattr*/
1491         0,                      /*tp_setattr*/
1492         0,                      /*tp_compare*/
1493         0,                      /*tp_repr*/
1494         0,                      /*tp_as_number*/
1495         0,                      /*tp_as_sequence*/
1496         0,                      /*tp_as_mapping*/
1497         0,                      /*tp_hash*/
1498         0,                      /*tp_call*/
1499         0,                      /*tp_str*/
1500         PyObject_GenericGetAttr,/*tp_getattro*/
1501         PyObject_GenericSetAttr,/*tp_setattro*/
1502         0,                      /*tp_as_buffer*/
1503         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1504         BZ2File__doc__,         /*tp_doc*/
1505         0,                      /*tp_traverse*/
1506         0,                      /*tp_clear*/
1507         0,                      /*tp_richcompare*/
1508         0,                      /*tp_weaklistoffset*/
1509         (getiterfunc)BZ2File_getiter, /*tp_iter*/
1510         (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1511         BZ2File_methods,        /*tp_methods*/
1512         BZ2File_members,        /*tp_members*/
1513         BZ2File_getset,         /*tp_getset*/
1514         0,                      /*tp_base*/
1515         0,                      /*tp_dict*/
1516         0,                      /*tp_descr_get*/
1517         0,                      /*tp_descr_set*/
1518         0,                      /*tp_dictoffset*/
1519         (initproc)BZ2File_init, /*tp_init*/
1520         PyType_GenericAlloc,    /*tp_alloc*/
1521         PyType_GenericNew,      /*tp_new*/
1522         _PyObject_Del,          /*tp_free*/
1523         0,                      /*tp_is_gc*/
1524 };
1525
1526
1527 /* ===================================================================== */
1528 /* Methods of BZ2Comp. */
1529
1530 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1531 "compress(data) -> string\n\
1532 \n\
1533 Provide more data to the compressor object. It will return chunks of\n\
1534 compressed data whenever possible. When you've finished providing data\n\
1535 to compress, call the flush() method to finish the compression process,\n\
1536 and return what is left in the internal buffers.\n\
1537 ");
1538
1539 static PyObject *
1540 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1541 {
1542         char *data;
1543         int datasize;
1544         int bufsize = SMALLCHUNK;
1545         PY_LONG_LONG totalout;
1546         PyObject *ret = NULL;
1547         bz_stream *bzs = &self->bzs;
1548         int bzerror;
1549
1550         if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
1551                 return NULL;
1552
1553         if (datasize == 0)
1554                 return PyString_FromString("");
1555
1556         ACQUIRE_LOCK(self);
1557         if (!self->running) {
1558                 PyErr_SetString(PyExc_ValueError,
1559                                 "this object was already flushed");
1560                 goto error;
1561         }
1562
1563         ret = PyString_FromStringAndSize(NULL, bufsize);
1564         if (!ret)
1565                 goto error;
1566
1567         bzs->next_in = data;
1568         bzs->avail_in = datasize;
1569         bzs->next_out = BUF(ret);
1570         bzs->avail_out = bufsize;
1571
1572         totalout = BZS_TOTAL_OUT(bzs);
1573
1574         for (;;) {
1575                 Py_BEGIN_ALLOW_THREADS
1576                 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1577                 Py_END_ALLOW_THREADS
1578                 if (bzerror != BZ_RUN_OK) {
1579                         Util_CatchBZ2Error(bzerror);
1580                         goto error;
1581                 }
1582                 if (bzs->avail_in == 0)
1583                         break; /* no more input data */
1584                 if (bzs->avail_out == 0) {
1585                         bufsize = Util_NewBufferSize(bufsize);
1586                         if (_PyString_Resize(&ret, bufsize) < 0) {
1587                                 BZ2_bzCompressEnd(bzs);
1588                                 goto error;
1589                         }
1590                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1591                                                     - totalout);
1592                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1593                 }
1594         }
1595
1596         _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1597
1598         RELEASE_LOCK(self);
1599         return ret;
1600
1601 error:
1602         RELEASE_LOCK(self);
1603         Py_XDECREF(ret);
1604         return NULL;
1605 }
1606
1607 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1608 "flush() -> string\n\
1609 \n\
1610 Finish the compression process and return what is left in internal buffers.\n\
1611 You must not use the compressor object after calling this method.\n\
1612 ");
1613
1614 static PyObject *
1615 BZ2Comp_flush(BZ2CompObject *self)
1616 {
1617         int bufsize = SMALLCHUNK;
1618         PyObject *ret = NULL;
1619         bz_stream *bzs = &self->bzs;
1620         PY_LONG_LONG totalout;
1621         int bzerror;
1622
1623         ACQUIRE_LOCK(self);
1624         if (!self->running) {
1625                 PyErr_SetString(PyExc_ValueError, "object was already "
1626                                                   "flushed");
1627                 goto error;
1628         }
1629         self->running = 0;
1630
1631         ret = PyString_FromStringAndSize(NULL, bufsize);
1632         if (!ret)
1633                 goto error;
1634
1635         bzs->next_out = BUF(ret);
1636         bzs->avail_out = bufsize;
1637
1638         totalout = BZS_TOTAL_OUT(bzs);
1639
1640         for (;;) {
1641                 Py_BEGIN_ALLOW_THREADS
1642                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1643                 Py_END_ALLOW_THREADS
1644                 if (bzerror == BZ_STREAM_END) {
1645                         break;
1646                 } else if (bzerror != BZ_FINISH_OK) {
1647                         Util_CatchBZ2Error(bzerror);
1648                         goto error;
1649                 }
1650                 if (bzs->avail_out == 0) {
1651                         bufsize = Util_NewBufferSize(bufsize);
1652                         if (_PyString_Resize(&ret, bufsize) < 0)
1653                                 goto error;
1654                         bzs->next_out = BUF(ret);
1655                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1656                                                     - totalout);
1657                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1658                 }
1659         }
1660
1661         if (bzs->avail_out != 0)
1662                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1663
1664         RELEASE_LOCK(self);
1665         return ret;
1666
1667 error:
1668         RELEASE_LOCK(self);
1669         Py_XDECREF(ret);
1670         return NULL;
1671 }
1672
1673 static PyMethodDef BZ2Comp_methods[] = {
1674         {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1675          BZ2Comp_compress__doc__},
1676         {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1677          BZ2Comp_flush__doc__},
1678         {NULL,          NULL}           /* sentinel */
1679 };
1680
1681
1682 /* ===================================================================== */
1683 /* Slot definitions for BZ2Comp_Type. */
1684
1685 static int
1686 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1687 {
1688         int compresslevel = 9;
1689         int bzerror;
1690         static char *kwlist[] = {"compresslevel", 0};
1691
1692         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1693                                          kwlist, &compresslevel))
1694                 return -1;
1695
1696         if (compresslevel < 1 || compresslevel > 9) {
1697                 PyErr_SetString(PyExc_ValueError,
1698                                 "compresslevel must be between 1 and 9");
1699                 goto error;
1700         }
1701
1702 #ifdef WITH_THREAD
1703         self->lock = PyThread_allocate_lock();
1704         if (!self->lock) {
1705                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1706                 goto error;
1707         }
1708 #endif
1709
1710         memset(&self->bzs, 0, sizeof(bz_stream));
1711         bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1712         if (bzerror != BZ_OK) {
1713                 Util_CatchBZ2Error(bzerror);
1714                 goto error;
1715         }
1716
1717         self->running = 1;
1718
1719         return 0;
1720 error:
1721 #ifdef WITH_THREAD
1722         if (self->lock) {
1723                 PyThread_free_lock(self->lock);
1724                 self->lock = NULL;
1725         }
1726 #endif
1727         return -1;
1728 }
1729
1730 static void
1731 BZ2Comp_dealloc(BZ2CompObject *self)
1732 {
1733 #ifdef WITH_THREAD
1734         if (self->lock)
1735                 PyThread_free_lock(self->lock);
1736 #endif
1737         BZ2_bzCompressEnd(&self->bzs);
1738         self->ob_type->tp_free((PyObject *)self);
1739 }
1740
1741
1742 /* ===================================================================== */
1743 /* BZ2Comp_Type definition. */
1744
1745 PyDoc_STRVAR(BZ2Comp__doc__,
1746 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1747 \n\
1748 Create a new compressor object. This object may be used to compress\n\
1749 data sequentially. If you want to compress data in one shot, use the\n\
1750 compress() function instead. The compresslevel parameter, if given,\n\
1751 must be a number between 1 and 9.\n\
1752 ");
1753
1754 static PyTypeObject BZ2Comp_Type = {
1755         PyObject_HEAD_INIT(NULL)
1756         0,                      /*ob_size*/
1757         "bz2.BZ2Compressor",    /*tp_name*/
1758         sizeof(BZ2CompObject),  /*tp_basicsize*/
1759         0,                      /*tp_itemsize*/
1760         (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1761         0,                      /*tp_print*/
1762         0,                      /*tp_getattr*/
1763         0,                      /*tp_setattr*/
1764         0,                      /*tp_compare*/
1765         0,                      /*tp_repr*/
1766         0,                      /*tp_as_number*/
1767         0,                      /*tp_as_sequence*/
1768         0,                      /*tp_as_mapping*/
1769         0,                      /*tp_hash*/
1770         0,                      /*tp_call*/
1771         0,                      /*tp_str*/
1772         PyObject_GenericGetAttr,/*tp_getattro*/
1773         PyObject_GenericSetAttr,/*tp_setattro*/
1774         0,                      /*tp_as_buffer*/
1775         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1776         BZ2Comp__doc__,         /*tp_doc*/
1777         0,                      /*tp_traverse*/
1778         0,                      /*tp_clear*/
1779         0,                      /*tp_richcompare*/
1780         0,                      /*tp_weaklistoffset*/
1781         0,                      /*tp_iter*/
1782         0,                      /*tp_iternext*/
1783         BZ2Comp_methods,        /*tp_methods*/
1784         0,                      /*tp_members*/
1785         0,                      /*tp_getset*/
1786         0,                      /*tp_base*/
1787         0,                      /*tp_dict*/
1788         0,                      /*tp_descr_get*/
1789         0,                      /*tp_descr_set*/
1790         0,                      /*tp_dictoffset*/
1791         (initproc)BZ2Comp_init, /*tp_init*/
1792         PyType_GenericAlloc,    /*tp_alloc*/
1793         PyType_GenericNew,      /*tp_new*/
1794         _PyObject_Del,          /*tp_free*/
1795         0,                      /*tp_is_gc*/
1796 };
1797
1798
1799 /* ===================================================================== */
1800 /* Members of BZ2Decomp. */
1801
1802 #undef OFF
1803 #define OFF(x) offsetof(BZ2DecompObject, x)
1804
1805 static PyMemberDef BZ2Decomp_members[] = {
1806         {"unused_data", T_OBJECT, OFF(unused_data), RO},
1807         {NULL}  /* Sentinel */
1808 };
1809
1810
1811 /* ===================================================================== */
1812 /* Methods of BZ2Decomp. */
1813
1814 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1815 "decompress(data) -> string\n\
1816 \n\
1817 Provide more data to the decompressor object. It will return chunks\n\
1818 of decompressed data whenever possible. If you try to decompress data\n\
1819 after the end of stream is found, EOFError will be raised. If any data\n\
1820 was found after the end of stream, it'll be ignored and saved in\n\
1821 unused_data attribute.\n\
1822 ");
1823
1824 static PyObject *
1825 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1826 {
1827         char *data;
1828         int datasize;
1829         int bufsize = SMALLCHUNK;
1830         PY_LONG_LONG totalout;
1831         PyObject *ret = NULL;
1832         bz_stream *bzs = &self->bzs;
1833         int bzerror;
1834
1835         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
1836                 return NULL;
1837
1838         ACQUIRE_LOCK(self);
1839         if (!self->running) {
1840                 PyErr_SetString(PyExc_EOFError, "end of stream was "
1841                                                 "already found");
1842                 goto error;
1843         }
1844
1845         ret = PyString_FromStringAndSize(NULL, bufsize);
1846         if (!ret)
1847                 goto error;
1848
1849         bzs->next_in = data;
1850         bzs->avail_in = datasize;
1851         bzs->next_out = BUF(ret);
1852         bzs->avail_out = bufsize;
1853
1854         totalout = BZS_TOTAL_OUT(bzs);
1855
1856         for (;;) {
1857                 Py_BEGIN_ALLOW_THREADS
1858                 bzerror = BZ2_bzDecompress(bzs);
1859                 Py_END_ALLOW_THREADS
1860                 if (bzerror == BZ_STREAM_END) {
1861                         if (bzs->avail_in != 0) {
1862                                 Py_DECREF(self->unused_data);
1863                                 self->unused_data =
1864                                     PyString_FromStringAndSize(bzs->next_in,
1865                                                                bzs->avail_in);
1866                         }
1867                         self->running = 0;
1868                         break;
1869                 }
1870                 if (bzerror != BZ_OK) {
1871                         Util_CatchBZ2Error(bzerror);
1872                         goto error;
1873                 }
1874                 if (bzs->avail_in == 0)
1875                         break; /* no more input data */
1876                 if (bzs->avail_out == 0) {
1877                         bufsize = Util_NewBufferSize(bufsize);
1878                         if (_PyString_Resize(&ret, bufsize) < 0) {
1879                                 BZ2_bzDecompressEnd(bzs);
1880                                 goto error;
1881                         }
1882                         bzs->next_out = BUF(ret);
1883                         bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1884                                                     - totalout);
1885                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1886                 }
1887         }
1888
1889         if (bzs->avail_out != 0)
1890                 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
1891
1892         RELEASE_LOCK(self);
1893         return ret;
1894
1895 error:
1896         RELEASE_LOCK(self);
1897         Py_XDECREF(ret);
1898         return NULL;
1899 }
1900
1901 static PyMethodDef BZ2Decomp_methods[] = {
1902         {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1903         {NULL,          NULL}           /* sentinel */
1904 };
1905
1906
1907 /* ===================================================================== */
1908 /* Slot definitions for BZ2Decomp_Type. */
1909
1910 static int
1911 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1912 {
1913         int bzerror;
1914
1915         if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1916                 return -1;
1917
1918 #ifdef WITH_THREAD
1919         self->lock = PyThread_allocate_lock();
1920         if (!self->lock) {
1921                 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1922                 goto error;
1923         }
1924 #endif
1925
1926         self->unused_data = PyString_FromString("");
1927         if (!self->unused_data)
1928                 goto error;
1929
1930         memset(&self->bzs, 0, sizeof(bz_stream));
1931         bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1932         if (bzerror != BZ_OK) {
1933                 Util_CatchBZ2Error(bzerror);
1934                 goto error;
1935         }
1936
1937         self->running = 1;
1938
1939         return 0;
1940
1941 error:
1942 #ifdef WITH_THREAD
1943         if (self->lock) {
1944                 PyThread_free_lock(self->lock);
1945                 self->lock = NULL;
1946         }
1947 #endif
1948         Py_CLEAR(self->unused_data);
1949         return -1;
1950 }
1951
1952 static void
1953 BZ2Decomp_dealloc(BZ2DecompObject *self)
1954 {
1955 #ifdef WITH_THREAD
1956         if (self->lock)
1957                 PyThread_free_lock(self->lock);
1958 #endif
1959         Py_XDECREF(self->unused_data);
1960         BZ2_bzDecompressEnd(&self->bzs);
1961         self->ob_type->tp_free((PyObject *)self);
1962 }
1963
1964
1965 /* ===================================================================== */
1966 /* BZ2Decomp_Type definition. */
1967
1968 PyDoc_STRVAR(BZ2Decomp__doc__,
1969 "BZ2Decompressor() -> decompressor object\n\
1970 \n\
1971 Create a new decompressor object. This object may be used to decompress\n\
1972 data sequentially. If you want to decompress data in one shot, use the\n\
1973 decompress() function instead.\n\
1974 ");
1975
1976 static PyTypeObject BZ2Decomp_Type = {
1977         PyObject_HEAD_INIT(NULL)
1978         0,                      /*ob_size*/
1979         "bz2.BZ2Decompressor",  /*tp_name*/
1980         sizeof(BZ2DecompObject), /*tp_basicsize*/
1981         0,                      /*tp_itemsize*/
1982         (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1983         0,                      /*tp_print*/
1984         0,                      /*tp_getattr*/
1985         0,                      /*tp_setattr*/
1986         0,                      /*tp_compare*/
1987         0,                      /*tp_repr*/
1988         0,                      /*tp_as_number*/
1989         0,                      /*tp_as_sequence*/
1990         0,                      /*tp_as_mapping*/
1991         0,                      /*tp_hash*/
1992         0,                      /*tp_call*/
1993         0,                      /*tp_str*/
1994         PyObject_GenericGetAttr,/*tp_getattro*/
1995         PyObject_GenericSetAttr,/*tp_setattro*/
1996         0,                      /*tp_as_buffer*/
1997         Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1998         BZ2Decomp__doc__,       /*tp_doc*/
1999         0,                      /*tp_traverse*/
2000         0,                      /*tp_clear*/
2001         0,                      /*tp_richcompare*/
2002         0,                      /*tp_weaklistoffset*/
2003         0,                      /*tp_iter*/
2004         0,                      /*tp_iternext*/
2005         BZ2Decomp_methods,      /*tp_methods*/
2006         BZ2Decomp_members,      /*tp_members*/
2007         0,                      /*tp_getset*/
2008         0,                      /*tp_base*/
2009         0,                      /*tp_dict*/
2010         0,                      /*tp_descr_get*/
2011         0,                      /*tp_descr_set*/
2012         0,                      /*tp_dictoffset*/
2013         (initproc)BZ2Decomp_init, /*tp_init*/
2014         PyType_GenericAlloc,    /*tp_alloc*/
2015         PyType_GenericNew,      /*tp_new*/
2016         _PyObject_Del,          /*tp_free*/
2017         0,                      /*tp_is_gc*/
2018 };
2019
2020
2021 /* ===================================================================== */
2022 /* Module functions. */
2023
2024 PyDoc_STRVAR(bz2_compress__doc__,
2025 "compress(data [, compresslevel=9]) -> string\n\
2026 \n\
2027 Compress data in one shot. If you want to compress data sequentially,\n\
2028 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2029 given, must be a number between 1 and 9.\n\
2030 ");
2031
2032 static PyObject *
2033 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2034 {
2035         int compresslevel=9;
2036         char *data;
2037         int datasize;
2038         int bufsize;
2039         PyObject *ret = NULL;
2040         bz_stream _bzs;
2041         bz_stream *bzs = &_bzs;
2042         int bzerror;
2043         static char *kwlist[] = {"data", "compresslevel", 0};
2044
2045         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2046                                          kwlist, &data, &datasize,
2047                                          &compresslevel))
2048                 return NULL;
2049
2050         if (compresslevel < 1 || compresslevel > 9) {
2051                 PyErr_SetString(PyExc_ValueError,
2052                                 "compresslevel must be between 1 and 9");
2053                 return NULL;
2054         }
2055
2056         /* Conforming to bz2 manual, this is large enough to fit compressed
2057          * data in one shot. We will check it later anyway. */
2058         bufsize = datasize + (datasize/100+1) + 600;
2059
2060         ret = PyString_FromStringAndSize(NULL, bufsize);
2061         if (!ret)
2062                 return NULL;
2063
2064         memset(bzs, 0, sizeof(bz_stream));
2065
2066         bzs->next_in = data;
2067         bzs->avail_in = datasize;
2068         bzs->next_out = BUF(ret);
2069         bzs->avail_out = bufsize;
2070
2071         bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2072         if (bzerror != BZ_OK) {
2073                 Util_CatchBZ2Error(bzerror);
2074                 Py_DECREF(ret);
2075                 return NULL;
2076         }
2077
2078         for (;;) {
2079                 Py_BEGIN_ALLOW_THREADS
2080                 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2081                 Py_END_ALLOW_THREADS
2082                 if (bzerror == BZ_STREAM_END) {
2083                         break;
2084                 } else if (bzerror != BZ_FINISH_OK) {
2085                         BZ2_bzCompressEnd(bzs);
2086                         Util_CatchBZ2Error(bzerror);
2087                         Py_DECREF(ret);
2088                         return NULL;
2089                 }
2090                 if (bzs->avail_out == 0) {
2091                         bufsize = Util_NewBufferSize(bufsize);
2092                         if (_PyString_Resize(&ret, bufsize) < 0) {
2093                                 BZ2_bzCompressEnd(bzs);
2094                                 Py_DECREF(ret);
2095                                 return NULL;
2096                         }
2097                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2098                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2099                 }
2100         }
2101
2102         if (bzs->avail_out != 0)
2103                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2104         BZ2_bzCompressEnd(bzs);
2105
2106         return ret;
2107 }
2108
2109 PyDoc_STRVAR(bz2_decompress__doc__,
2110 "decompress(data) -> decompressed data\n\
2111 \n\
2112 Decompress data in one shot. If you want to decompress data sequentially,\n\
2113 use an instance of BZ2Decompressor instead.\n\
2114 ");
2115
2116 static PyObject *
2117 bz2_decompress(PyObject *self, PyObject *args)
2118 {
2119         char *data;
2120         int datasize;
2121         int bufsize = SMALLCHUNK;
2122         PyObject *ret;
2123         bz_stream _bzs;
2124         bz_stream *bzs = &_bzs;
2125         int bzerror;
2126
2127         if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
2128                 return NULL;
2129
2130         if (datasize == 0)
2131                 return PyString_FromString("");
2132
2133         ret = PyString_FromStringAndSize(NULL, bufsize);
2134         if (!ret)
2135                 return NULL;
2136
2137         memset(bzs, 0, sizeof(bz_stream));
2138
2139         bzs->next_in = data;
2140         bzs->avail_in = datasize;
2141         bzs->next_out = BUF(ret);
2142         bzs->avail_out = bufsize;
2143
2144         bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2145         if (bzerror != BZ_OK) {
2146                 Util_CatchBZ2Error(bzerror);
2147                 Py_DECREF(ret);
2148                 return NULL;
2149         }
2150
2151         for (;;) {
2152                 Py_BEGIN_ALLOW_THREADS
2153                 bzerror = BZ2_bzDecompress(bzs);
2154                 Py_END_ALLOW_THREADS
2155                 if (bzerror == BZ_STREAM_END) {
2156                         break;
2157                 } else if (bzerror != BZ_OK) {
2158                         BZ2_bzDecompressEnd(bzs);
2159                         Util_CatchBZ2Error(bzerror);
2160                         Py_DECREF(ret);
2161                         return NULL;
2162                 }
2163                 if (bzs->avail_in == 0) {
2164                         BZ2_bzDecompressEnd(bzs);
2165                         PyErr_SetString(PyExc_ValueError,
2166                                         "couldn't find end of stream");
2167                         Py_DECREF(ret);
2168                         return NULL;
2169                 }
2170                 if (bzs->avail_out == 0) {
2171                         bufsize = Util_NewBufferSize(bufsize);
2172                         if (_PyString_Resize(&ret, bufsize) < 0) {
2173                                 BZ2_bzDecompressEnd(bzs);
2174                                 Py_DECREF(ret);
2175                                 return NULL;
2176                         }
2177                         bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2178                         bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2179                 }
2180         }
2181
2182         if (bzs->avail_out != 0)
2183                 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2184         BZ2_bzDecompressEnd(bzs);
2185
2186         return ret;
2187 }
2188
2189 static PyMethodDef bz2_methods[] = {
2190         {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2191                 bz2_compress__doc__},
2192         {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2193                 bz2_decompress__doc__},
2194         {NULL,          NULL}           /* sentinel */
2195 };
2196
2197 /* ===================================================================== */
2198 /* Initialization function. */
2199
2200 PyDoc_STRVAR(bz2__doc__,
2201 "The python bz2 module provides a comprehensive interface for\n\
2202 the bz2 compression library. It implements a complete file\n\
2203 interface, one shot (de)compression functions, and types for\n\
2204 sequential (de)compression.\n\
2205 ");
2206
2207 PyMODINIT_FUNC
2208 initbz2(void)
2209 {
2210         PyObject *m;
2211
2212         BZ2File_Type.ob_type = &PyType_Type;
2213         BZ2Comp_Type.ob_type = &PyType_Type;
2214         BZ2Decomp_Type.ob_type = &PyType_Type;
2215
2216         m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2217         if (m == NULL)
2218                 return;
2219
2220         PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2221
2222         Py_INCREF(&BZ2File_Type);
2223         PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2224
2225         Py_INCREF(&BZ2Comp_Type);
2226         PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2227
2228         Py_INCREF(&BZ2Decomp_Type);
2229         PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2230 }