Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56 #define PY_SSIZE_T_CLEAN
  57
  58 #include "Python.h"
  59 #ifdef USE_ZLIB_CRC32
  60 #include "zlib.h"
  61 #endif
  62
  63 static PyObject *Error;
  64 static PyObject *Incomplete;
  65
  66 /*
  67 ** hqx lookup table, ascii->binary.
  68 */
  69
  70 #define RUNCHAR 0x90
  71
  72 #define DONE 0x7F
  73 #define SKIP 0x7E
  74 #define FAIL 0x7D
  75
  76 static unsigned char table_a2b_hqx[256] = {
  77 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  78 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  79 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  80 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  81 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  82 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  83 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  84 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  85 /*              !     "     #     $     %     &     '   */
  86 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  87 /*        (     )     *     +     ,     -     .     /   */
  88 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  89 /*        0     1     2     3     4     5     6     7   */
  90 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  91 /*        8     9     :     ;     <     =     >     ?   */
  92 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  93 /*        @     A     B     C     D     E     F     G   */
  94 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  95 /*        H     I     J     K     L     M     N     O   */
  96 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  97 /*        P     Q     R     S     T     U     V     W   */
  98 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  99 /*        X     Y     Z     [     \     ]     ^     _   */
 100 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
 101 /*        `     a     b     c     d     e     f     g   */
 102 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
 103 /*        h     i     j     k     l     m     n     o   */
 104 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 105 /*        p     q     r     s     t     u     v     w   */
 106 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 107 /*        x     y     z     {     |     }     ~    ^?   */
 108 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 122         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 123         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 124         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 125 };
 126
 127 static unsigned char table_b2a_hqx[] =
 128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 129
 130 static char table_a2b_base64[] = {
 131         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 132         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 133         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 134         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 135         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 136         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 137         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 138         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 139 };
 140
 141 #define BASE64_PAD '='
 142
 143 /* Max binary chunk size; limited only by available memory */
 144 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
 145
 146 static unsigned char table_b2a_base64[] =
 147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 148
 149
 150
 151 static unsigned short crctab_hqx[256] = {
 152         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 153         0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 154         0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 155         0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 156         0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 157         0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 158         0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 159         0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 160         0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 161         0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 162         0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 163         0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 164         0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 165         0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 166         0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 167         0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 168         0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 169         0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 170         0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 171         0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 172         0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 173         0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 174         0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 175         0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 176         0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 177         0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 178         0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 179         0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 180         0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 181         0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 182         0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 183         0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 184 };
 185
 186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 187
 188 static PyObject *
 189 binascii_a2b_uu(PyObject *self, PyObject *args)
 190 {
 191         Py_buffer pascii;
 192         unsigned char *ascii_data, *bin_data;
 193         int leftbits = 0;
 194         unsigned char this_ch;
 195         unsigned int leftchar = 0;
 196         PyObject *rv;
 197         Py_ssize_t ascii_len, bin_len;
 198
 199         if ( !PyArg_ParseTuple(args, "y*:a2b_uu", &pascii) )
 200                 return NULL;
 201         ascii_data = pascii.buf;
 202         ascii_len = pascii.len;
 203
 204         assert(ascii_len >= 0);
 205
 206         /* First byte: binary data length (in bytes) */
 207         bin_len = (*ascii_data++ - ' ') & 077;
 208         ascii_len--;
 209
 210         /* Allocate the buffer */
 211         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
 212                 PyBuffer_Release(&pascii);
 213                 return NULL;
 214         }
 215         bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
 216
 217         for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 218                 /* XXX is it really best to add NULs if there's no more data */
 219                 this_ch = (ascii_len > 0) ? *ascii_data : 0;
 220                 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 221                         /*
 222                         ** Whitespace. Assume some spaces got eaten at
 223                         ** end-of-line. (We check this later)
 224                         */
 225                         this_ch = 0;
 226                 } else {
 227                         /* Check the character for legality
 228                         ** The 64 in stead of the expected 63 is because
 229                         ** there are a few uuencodes out there that use
 230                         ** '`' as zero instead of space.
 231                         */
 232                         if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 233                                 PyErr_SetString(Error, "Illegal char");
 234                                 PyBuffer_Release(&pascii);
 235                                 Py_DECREF(rv);
 236                                 return NULL;
 237                         }
 238                         this_ch = (this_ch - ' ') & 077;
 239                 }
 240                 /*
 241                 ** Shift it in on the low end, and see if there's
 242                 ** a byte ready for output.
 243                 */
 244                 leftchar = (leftchar << 6) | (this_ch);
 245                 leftbits += 6;
 246                 if ( leftbits >= 8 ) {
 247                         leftbits -= 8;
 248                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 249                         leftchar &= ((1 << leftbits) - 1);
 250                         bin_len--;
 251                 }
 252         }
 253         /*
 254         ** Finally, check that if there's anything left on the line
 255         ** that it's whitespace only.
 256         */
 257         while( ascii_len-- > 0 ) {
 258                 this_ch = *ascii_data++;
 259                 /* Extra '`' may be written as padding in some cases */
 260                 if ( this_ch != ' ' && this_ch != ' '+64 &&
 261                      this_ch != '\n' && this_ch != '\r' ) {
 262                         PyErr_SetString(Error, "Trailing garbage");
 263                         PyBuffer_Release(&pascii);
 264                         Py_DECREF(rv);
 265                         return NULL;
 266                 }
 267         }
 268         PyBuffer_Release(&pascii);
 269         return rv;
 270 }
 271
 272 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 273
 274 static PyObject *
 275 binascii_b2a_uu(PyObject *self, PyObject *args)
 276 {
 277         Py_buffer pbin;
 278         unsigned char *ascii_data, *bin_data;
 279         int leftbits = 0;
 280         unsigned char this_ch;
 281         unsigned int leftchar = 0;
 282         PyObject *rv;
 283         Py_ssize_t bin_len;
 284
 285         if ( !PyArg_ParseTuple(args, "y*:b2a_uu", &pbin) )
 286                 return NULL;
 287         bin_data = pbin.buf;
 288         bin_len = pbin.len;
 289         if ( bin_len > 45 ) {
 290                 /* The 45 is a limit that appears in all uuencode's */
 291                 PyErr_SetString(Error, "At most 45 bytes at once");
 292                 PyBuffer_Release(&pbin);
 293                 return NULL;
 294         }
 295
 296         /* We're lazy and allocate to much (fixed up later) */
 297         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL ) {
 298                 PyBuffer_Release(&pbin);
 299                 return NULL;
 300         }
 301         ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
 302
 303         /* Store the length */
 304         *ascii_data++ = ' ' + (bin_len & 077);
 305
 306         for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 307                 /* Shift the data (or padding) into our buffer */
 308                 if ( bin_len > 0 )      /* Data */
 309                         leftchar = (leftchar << 8) | *bin_data;
 310                 else                    /* Padding */
 311                         leftchar <<= 8;
 312                 leftbits += 8;
 313
 314                 /* See if there are 6-bit groups ready */
 315                 while ( leftbits >= 6 ) {
 316                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 317                         leftbits -= 6;
 318                         *ascii_data++ = this_ch + ' ';
 319                 }
 320         }
 321         *ascii_data++ = '\n';   /* Append a courtesy newline */
 322
 323         if (_PyBytes_Resize(&rv,
 324                            (ascii_data -
 325                             (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
 326                 Py_DECREF(rv);
 327                 rv = NULL;
 328         }
 329         PyBuffer_Release(&pbin);
 330         return rv;
 331 }
 332
 333
 334 static int
 335 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
 336 {
 337         /* Finds & returns the (num+1)th
 338         ** valid character for base64, or -1 if none.
 339         */
 340
 341         int ret = -1;
 342         unsigned char c, b64val;
 343
 344         while ((slen > 0) && (ret == -1)) {
 345                 c = *s;
 346                 b64val = table_a2b_base64[c & 0x7f];
 347                 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 348                         if (num == 0)
 349                                 ret = *s;
 350                         num--;
 351                 }
 352
 353                 s++;
 354                 slen--;
 355         }
 356         return ret;
 357 }
 358
 359 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 360
 361 static PyObject *
 362 binascii_a2b_base64(PyObject *self, PyObject *args)
 363 {
 364         Py_buffer pascii;
 365         unsigned char *ascii_data, *bin_data;
 366         int leftbits = 0;
 367         unsigned char this_ch;
 368         unsigned int leftchar = 0;
 369         PyObject *rv;
 370         Py_ssize_t ascii_len, bin_len;
 371         int quad_pos = 0;
 372
 373         if ( !PyArg_ParseTuple(args, "y*:a2b_base64", &pascii) )
 374                 return NULL;
 375         ascii_data = pascii.buf;
 376         ascii_len = pascii.len;
 377
 378         assert(ascii_len >= 0);
 379
 380         if (ascii_len > PY_SSIZE_T_MAX - 3) {
 381                 PyBuffer_Release(&pascii);
 382                 return PyErr_NoMemory();
 383         }
 384
 385         bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 386
 387         /* Allocate the buffer */
 388         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL ) {
 389                 PyBuffer_Release(&pascii);
 390                 return NULL;
 391         }
 392         bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
 393         bin_len = 0;
 394
 395         for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 396                 this_ch = *ascii_data;
 397
 398                 if (this_ch > 0x7f ||
 399                     this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 400                         continue;
 401
 402                 /* Check for pad sequences and ignore
 403                 ** the invalid ones.
 404                 */
 405                 if (this_ch == BASE64_PAD) {
 406                         if ( (quad_pos < 2) ||
 407                              ((quad_pos == 2) &&
 408                               (binascii_find_valid(ascii_data, ascii_len, 1)
 409                                != BASE64_PAD)) )
 410                         {
 411                                 continue;
 412                         }
 413                         else {
 414                                 /* A pad sequence means no more input.
 415                                 ** We've already interpreted the data
 416                                 ** from the quad at this point.
 417                                 */
 418                                 leftbits = 0;
 419                                 break;
 420                         }
 421                 }
 422
 423                 this_ch = table_a2b_base64[*ascii_data];
 424                 if ( this_ch == (unsigned char) -1 )
 425                         continue;
 426
 427                 /*
 428                 ** Shift it in on the low end, and see if there's
 429                 ** a byte ready for output.
 430                 */
 431                 quad_pos = (quad_pos + 1) & 0x03;
 432                 leftchar = (leftchar << 6) | (this_ch);
 433                 leftbits += 6;
 434
 435                 if ( leftbits >= 8 ) {
 436                         leftbits -= 8;
 437                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 438                         bin_len++;
 439                         leftchar &= ((1 << leftbits) - 1);
 440                 }
 441         }
 442
 443         if (leftbits != 0) {
 444                 PyBuffer_Release(&pascii);
 445                 PyErr_SetString(Error, "Incorrect padding");
 446                 Py_DECREF(rv);
 447                 return NULL;
 448         }
 449
 450         /* And set string size correctly. If the result string is empty
 451         ** (because the input was all invalid) return the shared empty
 452         ** string instead; _PyBytes_Resize() won't do this for us.
 453         */
 454         if (bin_len > 0) {
 455                 if (_PyBytes_Resize(&rv, bin_len) < 0) {
 456                         Py_DECREF(rv);
 457                         rv = NULL;
 458                 }
 459         }
 460         else {
 461                 Py_DECREF(rv);
 462                 rv = PyBytes_FromStringAndSize("", 0);
 463         }
 464         PyBuffer_Release(&pascii);
 465         return rv;
 466 }
 467
 468 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 469
 470 static PyObject *
 471 binascii_b2a_base64(PyObject *self, PyObject *args)
 472 {
 473         Py_buffer pbuf;
 474         unsigned char *ascii_data, *bin_data;
 475         int leftbits = 0;
 476         unsigned char this_ch;
 477         unsigned int leftchar = 0;
 478         PyObject *rv;
 479         Py_ssize_t bin_len;
 480
 481         if ( !PyArg_ParseTuple(args, "y*:b2a_base64", &pbuf) )
 482                 return NULL;
 483         bin_data = pbuf.buf;
 484         bin_len = pbuf.len;
 485
 486         assert(bin_len >= 0);
 487
 488         if ( bin_len > BASE64_MAXBIN ) {
 489                 PyErr_SetString(Error, "Too much data for base64 line");
 490                 PyBuffer_Release(&pbuf);
 491                 return NULL;
 492         }
 493
 494         /* We're lazy and allocate too much (fixed up later).
 495            "+3" leaves room for up to two pad characters and a trailing
 496            newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 497         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
 498                 PyBuffer_Release(&pbuf);
 499                 return NULL;
 500         }
 501         ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
 502
 503         for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 504                 /* Shift the data into our buffer */
 505                 leftchar = (leftchar << 8) | *bin_data;
 506                 leftbits += 8;
 507
 508                 /* See if there are 6-bit groups ready */
 509                 while ( leftbits >= 6 ) {
 510                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 511                         leftbits -= 6;
 512                         *ascii_data++ = table_b2a_base64[this_ch];
 513                 }
 514         }
 515         if ( leftbits == 2 ) {
 516                 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 517                 *ascii_data++ = BASE64_PAD;
 518                 *ascii_data++ = BASE64_PAD;
 519         } else if ( leftbits == 4 ) {
 520                 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 521                 *ascii_data++ = BASE64_PAD;
 522         }
 523         *ascii_data++ = '\n';   /* Append a courtesy newline */
 524
 525         if (_PyBytes_Resize(&rv,
 526                            (ascii_data -
 527                             (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
 528                 Py_DECREF(rv);
 529                 rv = NULL;
 530         }
 531         PyBuffer_Release(&pbuf);
 532         return rv;
 533 }
 534
 535 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 536
 537 static PyObject *
 538 binascii_a2b_hqx(PyObject *self, PyObject *args)
 539 {
 540         unsigned char *ascii_data, *bin_data;
 541         int leftbits = 0;
 542         unsigned char this_ch;
 543         unsigned int leftchar = 0;
 544         PyObject *rv;
 545         Py_ssize_t len;
 546         int done = 0;
 547
 548         if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 549                 return NULL;
 550
 551         assert(len >= 0);
 552
 553         if (len > PY_SSIZE_T_MAX - 2)
 554                 return PyErr_NoMemory();
 555
 556         /* Allocate a string that is too big (fixed later)
 557            Add two to the initial length to prevent interning which
 558            would preclude subsequent resizing.  */
 559         if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
 560                 return NULL;
 561         bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
 562
 563         for( ; len > 0 ; len--, ascii_data++ ) {
 564                 /* Get the byte and look it up */
 565                 this_ch = table_a2b_hqx[*ascii_data];
 566                 if ( this_ch == SKIP )
 567                         continue;
 568                 if ( this_ch == FAIL ) {
 569                         PyErr_SetString(Error, "Illegal char");
 570                         Py_DECREF(rv);
 571                         return NULL;
 572                 }
 573                 if ( this_ch == DONE ) {
 574                         /* The terminating colon */
 575                         done = 1;
 576                         break;
 577                 }
 578
 579                 /* Shift it into the buffer and see if any bytes are ready */
 580                 leftchar = (leftchar << 6) | (this_ch);
 581                 leftbits += 6;
 582                 if ( leftbits >= 8 ) {
 583                         leftbits -= 8;
 584                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 585                         leftchar &= ((1 << leftbits) - 1);
 586                 }
 587         }
 588
 589         if ( leftbits && !done ) {
 590                 PyErr_SetString(Incomplete,
 591                                 "String has incomplete number of bytes");
 592                 Py_DECREF(rv);
 593                 return NULL;
 594         }
 595         if (_PyBytes_Resize(&rv,
 596                            (bin_data -
 597                             (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
 598                 Py_DECREF(rv);
 599                 rv = NULL;
 600         }
 601         if (rv) {
 602                 PyObject *rrv = Py_BuildValue("Oi", rv, done);
 603                 Py_DECREF(rv);
 604                 return rrv;
 605         }
 606
 607         return NULL;
 608 }
 609
 610 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 611
 612 static PyObject *
 613 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 614 {
 615         Py_buffer pbuf;
 616         unsigned char *in_data, *out_data;
 617         PyObject *rv;
 618         unsigned char ch;
 619         Py_ssize_t in, inend, len;
 620
 621         if ( !PyArg_ParseTuple(args, "y*:rlecode_hqx", &pbuf) )
 622                 return NULL;
 623         in_data = pbuf.buf;
 624         len = pbuf.len;
 625
 626         assert(len >= 0);
 627
 628         if (len > PY_SSIZE_T_MAX / 2 - 2) {
 629                 PyBuffer_Release(&pbuf);
 630                 return PyErr_NoMemory();
 631         }
 632
 633         /* Worst case: output is twice as big as input (fixed later) */
 634         if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
 635                 PyBuffer_Release(&pbuf);
 636                 return NULL;
 637         }
 638         out_data = (unsigned char *)PyBytes_AS_STRING(rv);
 639
 640         for( in=0; in<len; in++) {
 641                 ch = in_data[in];
 642                 if ( ch == RUNCHAR ) {
 643                         /* RUNCHAR. Escape it. */
 644                         *out_data++ = RUNCHAR;
 645                         *out_data++ = 0;
 646                 } else {
 647                         /* Check how many following are the same */
 648                         for(inend=in+1;
 649                             inend<len && in_data[inend] == ch &&
 650                                     inend < in+255;
 651                             inend++) ;
 652                         if ( inend - in > 3 ) {
 653                                 /* More than 3 in a row. Output RLE. */
 654                                 *out_data++ = ch;
 655                                 *out_data++ = RUNCHAR;
 656                                 *out_data++ = inend-in;
 657                                 in = inend-1;
 658                         } else {
 659                                 /* Less than 3. Output the byte itself */
 660                                 *out_data++ = ch;
 661                         }
 662                 }
 663         }
 664         if (_PyBytes_Resize(&rv,
 665                            (out_data -
 666                             (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
 667                 Py_DECREF(rv);
 668                 rv = NULL;
 669         }
 670         PyBuffer_Release(&pbuf);
 671         return rv;
 672 }
 673
 674 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 675
 676 static PyObject *
 677 binascii_b2a_hqx(PyObject *self, PyObject *args)
 678 {
 679         Py_buffer pbin;
 680         unsigned char *ascii_data, *bin_data;
 681         int leftbits = 0;
 682         unsigned char this_ch;
 683         unsigned int leftchar = 0;
 684         PyObject *rv;
 685         Py_ssize_t len;
 686
 687         if ( !PyArg_ParseTuple(args, "y*:b2a_hqx", &pbin) )
 688                 return NULL;
 689         bin_data = pbin.buf;
 690         len = pbin.len;
 691
 692         assert(len >= 0);
 693
 694         if (len > PY_SSIZE_T_MAX / 2 - 2) {
 695                 PyBuffer_Release(&pbin);
 696                 return PyErr_NoMemory();
 697         }
 698
 699         /* Allocate a buffer that is at least large enough */
 700         if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL ) {
 701                 PyBuffer_Release(&pbin);
 702                 return NULL;
 703         }
 704         ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
 705
 706         for( ; len > 0 ; len--, bin_data++ ) {
 707                 /* Shift into our buffer, and output any 6bits ready */
 708                 leftchar = (leftchar << 8) | *bin_data;
 709                 leftbits += 8;
 710                 while ( leftbits >= 6 ) {
 711                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 712                         leftbits -= 6;
 713                         *ascii_data++ = table_b2a_hqx[this_ch];
 714                 }
 715         }
 716         /* Output a possible runt byte */
 717         if ( leftbits ) {
 718                 leftchar <<= (6-leftbits);
 719                 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 720         }
 721         if (_PyBytes_Resize(&rv,
 722                            (ascii_data -
 723                             (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
 724                 Py_DECREF(rv);
 725                 rv = NULL;
 726         }
 727         PyBuffer_Release(&pbin);
 728         return rv;
 729 }
 730
 731 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 732
 733 static PyObject *
 734 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 735 {
 736         Py_buffer pin;
 737         unsigned char *in_data, *out_data;
 738         unsigned char in_byte, in_repeat;
 739         PyObject *rv;
 740         Py_ssize_t in_len, out_len, out_len_left;
 741
 742         if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
 743                 return NULL;
 744         in_data = pin.buf;
 745         in_len = pin.len;
 746
 747         assert(in_len >= 0);
 748
 749         /* Empty string is a special case */
 750         if ( in_len == 0 ) {
 751                 PyBuffer_Release(&pin);
 752                 return PyBytes_FromStringAndSize("", 0);
 753         }
 754         else if (in_len > PY_SSIZE_T_MAX / 2) {
 755                 PyBuffer_Release(&pin);
 756                 return PyErr_NoMemory();
 757         }
 758
 759         /* Allocate a buffer of reasonable size. Resized when needed */
 760         out_len = in_len*2;
 761         if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL ) {
 762                 PyBuffer_Release(&pin);
 763                 return NULL;
 764         }
 765         out_len_left = out_len;
 766         out_data = (unsigned char *)PyBytes_AS_STRING(rv);
 767
 768         /*
 769         ** We need two macros here to get/put bytes and handle
 770         ** end-of-buffer for input and output strings.
 771         */
 772 #define INBYTE(b) \
 773         do { \
 774                  if ( --in_len < 0 ) { \
 775                            PyErr_SetString(Incomplete, ""); \
 776                            Py_DECREF(rv); \
 777                            PyBuffer_Release(&pin); \
 778                            return NULL; \
 779                  } \
 780                  b = *in_data++; \
 781         } while(0)
 782
 783 #define OUTBYTE(b) \
 784         do { \
 785                  if ( --out_len_left < 0 ) { \
 786                           if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
 787                           if (_PyBytes_Resize(&rv, 2*out_len) < 0) \
 788                             { Py_DECREF(rv); PyBuffer_Release(&pin); return NULL; } \
 789                           out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
 790                                                                  + out_len; \
 791                           out_len_left = out_len-1; \
 792                           out_len = out_len * 2; \
 793                  } \
 794                  *out_data++ = b; \
 795         } while(0)
 796
 797                 /*
 798                 ** Handle first byte separately (since we have to get angry
 799                 ** in case of an orphaned RLE code).
 800                 */
 801                 INBYTE(in_byte);
 802
 803         if (in_byte == RUNCHAR) {
 804                 INBYTE(in_repeat);
 805                 if (in_repeat != 0) {
 806                         /* Note Error, not Incomplete (which is at the end
 807                         ** of the string only). This is a programmer error.
 808                         */
 809                         PyErr_SetString(Error, "Orphaned RLE code at start");
 810                         PyBuffer_Release(&pin);
 811                         Py_DECREF(rv);
 812                         return NULL;
 813                 }
 814                 OUTBYTE(RUNCHAR);
 815         } else {
 816                 OUTBYTE(in_byte);
 817         }
 818
 819         while( in_len > 0 ) {
 820                 INBYTE(in_byte);
 821
 822                 if (in_byte == RUNCHAR) {
 823                         INBYTE(in_repeat);
 824                         if ( in_repeat == 0 ) {
 825                                 /* Just an escaped RUNCHAR value */
 826                                 OUTBYTE(RUNCHAR);
 827                         } else {
 828                                 /* Pick up value and output a sequence of it */
 829                                 in_byte = out_data[-1];
 830                                 while ( --in_repeat > 0 )
 831                                         OUTBYTE(in_byte);
 832                         }
 833                 } else {
 834                         /* Normal byte */
 835                         OUTBYTE(in_byte);
 836                 }
 837         }
 838         if (_PyBytes_Resize(&rv,
 839                            (out_data -
 840                             (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
 841                 Py_DECREF(rv);
 842                 rv = NULL;
 843         }
 844         PyBuffer_Release(&pin);
 845         return rv;
 846 }
 847
 848 PyDoc_STRVAR(doc_crc_hqx,
 849 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 850
 851 static PyObject *
 852 binascii_crc_hqx(PyObject *self, PyObject *args)
 853 {
 854         Py_buffer pin;
 855         unsigned char *bin_data;
 856         unsigned int crc;
 857         Py_ssize_t len;
 858
 859         if ( !PyArg_ParseTuple(args, "y*i:crc_hqx", &pin, &crc) )
 860                 return NULL;
 861         bin_data = pin.buf;
 862         len = pin.len;
 863
 864         while(len-- > 0) {
 865                 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 866         }
 867
 868         PyBuffer_Release(&pin);
 869         return Py_BuildValue("i", crc);
 870 }
 871
 872 PyDoc_STRVAR(doc_crc32,
 873 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 874
 875 #ifdef USE_ZLIB_CRC32
 876 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
 877 static PyObject *
 878 binascii_crc32(PyObject *self, PyObject *args)
 879 {
 880     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
 881     Py_buffer pbuf;
 882     Byte *buf;
 883     Py_ssize_t len;
 884     int signed_val;
 885
 886     if (!PyArg_ParseTuple(args, "y*|I:crc32", &pbuf, &crc32val))
 887         return NULL;
 888     buf = (Byte*)pbuf.buf;
 889     len = pbuf.len;
 890     signed_val = crc32(crc32val, buf, len);
 891     PyBuffer_Release(&pbuf);
 892     return PyLong_FromUnsignedLong(signed_val & 0xffffffffU);
 893 }
 894 #else  /* USE_ZLIB_CRC32 */
 895 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 896     Also known as: ISO 3307
 897 **********************************************************************|
 898 *                                                                    *|
 899 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 900 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 901 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 902 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 903 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 904 * this polynomial is or will be included in CCITT V.41, which        *|
 905 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 906 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 907 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 908 *                                                                    *|
 909 **********************************************************************|
 910
 911  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 912  code or tables extracted from it, as desired without restriction.
 913
 914  First, the polynomial itself and its table of feedback terms.  The
 915  polynomial is
 916  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 917  Note that we take it "backwards" and put the highest-order term in
 918  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 919  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 920  the MSB being 1.
 921
 922  Note that the usual hardware shift register implementation, which
 923  is what we're using (we're merely optimizing it by doing eight-bit
 924  chunks at a time) shifts bits into the lowest-order term.  In our
 925  implementation, that means shifting towards the right.  Why do we
 926  do it this way?  Because the calculated CRC must be transmitted in
 927  order from highest-order term to lowest-order term.  UARTs transmit
 928  characters in order from LSB to MSB.  By storing the CRC this way,
 929  we hand it to the UART in the order low-byte to high-byte; the UART
 930  sends each low-bit to hight-bit; and the result is transmission bit
 931  by bit from highest- to lowest-order term without requiring any bit
 932  shuffling on our part.  Reception works similarly.
 933
 934  The feedback terms table consists of 256, 32-bit entries.  Notes:
 935
 936   1. The table can be generated at runtime if desired; code to do so
 937      is shown later.  It might not be obvious, but the feedback
 938      terms simply represent the results of eight shift/xor opera-
 939      tions for all combinations of data and CRC register values.
 940
 941   2. The CRC accumulation logic is the same for all CRC polynomials,
 942      be they sixteen or thirty-two bits wide.  You simply choose the
 943      appropriate table.  Alternatively, because the table can be
 944      generated at runtime, you can start by generating the table for
 945      the polynomial in question and use exactly the same "updcrc",
 946      if your application needn't simultaneously handle two CRC
 947      polynomials.  (Note, however, that XMODEM is strange.)
 948
 949   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 950      of course, 32-bit entries work OK if the high 16 bits are zero.
 951
 952   4. The values must be right-shifted by eight bits by the "updcrc"
 953      logic; the shift must be unsigned (bring in zeroes).  On some
 954      hardware you could probably optimize the shift in assembler by
 955      using byte-swap instructions.
 956 ********************************************************************/
 957
 958 static unsigned int crc_32_tab[256] = {
 959 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
 960 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
 961 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
 962 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
 963 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
 964 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
 965 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
 966 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
 967 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
 968 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
 969 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
 970 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
 971 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
 972 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
 973 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
 974 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
 975 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
 976 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
 977 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
 978 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
 979 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
 980 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
 981 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
 982 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
 983 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
 984 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
 985 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
 986 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
 987 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
 988 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
 989 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
 990 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
 991 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
 992 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
 993 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
 994 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
 995 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
 996 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
 997 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
 998 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
 999 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1000 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1001 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1002 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1003 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1004 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1005 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1006 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1007 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1008 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1009 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1010 0x2d02ef8dU
1011 };
1012
1013 static PyObject *
1014 binascii_crc32(PyObject *self, PyObject *args)
1015 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1016         Py_buffer pbin;
1017         unsigned char *bin_data;
1018         unsigned int crc = 0;   /* initial value of CRC */
1019         Py_ssize_t len;
1020         unsigned int result;
1021
1022         if ( !PyArg_ParseTuple(args, "y*|I:crc32", &pbin, &crc) )
1023                 return NULL;
1024         bin_data = pbin.buf;
1025         len = pbin.len;
1026
1027         crc = ~ crc;
1028         while (len-- > 0) {
1029                 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
1030                 /* Note:  (crc >> 8) MUST zero fill on left */
1031         }
1032
1033         result = (crc ^ 0xFFFFFFFF);
1034         PyBuffer_Release(&pbin);
1035         return PyLong_FromUnsignedLong(result & 0xffffffff);
1036 }
1037 #endif  /* USE_ZLIB_CRC32 */
1038
1039
1040 static PyObject *
1041 binascii_hexlify(PyObject *self, PyObject *args)
1042 {
1043         Py_buffer parg;
1044         char* argbuf;
1045         Py_ssize_t arglen;
1046         PyObject *retval;
1047         char* retbuf;
1048         Py_ssize_t i, j;
1049
1050         if (!PyArg_ParseTuple(args, "y*:b2a_hex", &parg))
1051                 return NULL;
1052         argbuf = parg.buf;
1053         arglen = parg.len;
1054
1055         assert(arglen >= 0);
1056         if (arglen > PY_SSIZE_T_MAX / 2) {
1057                 PyBuffer_Release(&parg);
1058                 return PyErr_NoMemory();
1059         }
1060
1061         retval = PyBytes_FromStringAndSize(NULL, arglen*2);
1062         if (!retval) {
1063                 PyBuffer_Release(&parg);
1064                 return NULL;
1065         }
1066         retbuf = PyBytes_AS_STRING(retval);
1067
1068         /* make hex version of string, taken from shamodule.c */
1069         for (i=j=0; i < arglen; i++) {
1070                 char c;
1071                 c = (argbuf[i] >> 4) & 0xf;
1072                 c = (c>9) ? c+'a'-10 : c + '0';
1073                 retbuf[j++] = c;
1074                 c = argbuf[i] & 0xf;
1075                 c = (c>9) ? c+'a'-10 : c + '0';
1076                 retbuf[j++] = c;
1077         }
1078         PyBuffer_Release(&parg);
1079         return retval;
1080 }
1081
1082 PyDoc_STRVAR(doc_hexlify,
1083 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1084 \n\
1085 This function is also available as \"hexlify()\".");
1086
1087
1088 static int
1089 to_int(int c)
1090 {
1091         if (isdigit(c))
1092                 return c - '0';
1093         else {
1094                 if (isupper(c))
1095                         c = tolower(c);
1096                 if (c >= 'a' && c <= 'f')
1097                         return c - 'a' + 10;
1098         }
1099         return -1;
1100 }
1101
1102
1103 static PyObject *
1104 binascii_unhexlify(PyObject *self, PyObject *args)
1105 {
1106         Py_buffer parg;
1107         char* argbuf;
1108         Py_ssize_t arglen;
1109         PyObject *retval;
1110         char* retbuf;
1111         Py_ssize_t i, j;
1112
1113         if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1114                 return NULL;
1115         argbuf = parg.buf;
1116         arglen = parg.len;
1117
1118         assert(arglen >= 0);
1119
1120         /* XXX What should we do about strings with an odd length?  Should
1121          * we add an implicit leading zero, or a trailing zero?  For now,
1122          * raise an exception.
1123          */
1124         if (arglen % 2) {
1125                 PyBuffer_Release(&parg);
1126                 PyErr_SetString(Error, "Odd-length string");
1127                 return NULL;
1128         }
1129
1130         retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
1131         if (!retval) {
1132                 PyBuffer_Release(&parg);
1133                 return NULL;
1134         }
1135         retbuf = PyBytes_AS_STRING(retval);
1136
1137         for (i=j=0; i < arglen; i += 2) {
1138                 int top = to_int(Py_CHARMASK(argbuf[i]));
1139                 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1140                 if (top == -1 || bot == -1) {
1141                         PyErr_SetString(Error,
1142                                         "Non-hexadecimal digit found");
1143                         goto finally;
1144                 }
1145                 retbuf[j++] = (top << 4) + bot;
1146         }
1147         PyBuffer_Release(&parg);
1148         return retval;
1149
1150   finally:
1151         PyBuffer_Release(&parg);
1152         Py_DECREF(retval);
1153         return NULL;
1154 }
1155
1156 PyDoc_STRVAR(doc_unhexlify,
1157 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1158 \n\
1159 hexstr must contain an even number of hex digits (upper or lower case).\n\
1160 This function is also available as \"unhexlify()\"");
1161
1162 static int table_hex[128] = {
1163   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1164   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1165   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1166    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1167   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1168   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1169   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1170   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1171 };
1172
1173 #define hexval(c) table_hex[(unsigned int)(c)]
1174
1175 #define MAXLINESIZE 76
1176
1177 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1178
1179 static PyObject*
1180 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1181 {
1182         Py_ssize_t in, out;
1183         char ch;
1184         Py_buffer pdata;
1185         unsigned char *data, *odata;
1186         Py_ssize_t datalen = 0;
1187         PyObject *rv;
1188         static char *kwlist[] = {"data", "header", NULL};
1189         int header = 0;
1190
1191         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1192               &header))
1193                 return NULL;
1194         data = pdata.buf;
1195         datalen = pdata.len;
1196
1197         /* We allocate the output same size as input, this is overkill.
1198          * The previous implementation used calloc() so we'll zero out the
1199          * memory here too, since PyMem_Malloc() does not guarantee that.
1200          */
1201         odata = (unsigned char *) PyMem_Malloc(datalen);
1202         if (odata == NULL) {
1203                 PyBuffer_Release(&pdata);
1204                 PyErr_NoMemory();
1205                 return NULL;
1206         }
1207         memset(odata, 0, datalen);
1208
1209         in = out = 0;
1210         while (in < datalen) {
1211                 if (data[in] == '=') {
1212                         in++;
1213                         if (in >= datalen) break;
1214                         /* Soft line breaks */
1215                         if ((data[in] == '\n') || (data[in] == '\r')) {
1216                                 if (data[in] != '\n') {
1217                                         while (in < datalen && data[in] != '\n') in++;
1218                                 }
1219                                 if (in < datalen) in++;
1220                         }
1221                         else if (data[in] == '=') {
1222                                 /* broken case from broken python qp */
1223                                 odata[out++] = '=';
1224                                 in++;
1225                         }
1226                         else if (((data[in] >= 'A' && data[in] <= 'F') ||
1227                                   (data[in] >= 'a' && data[in] <= 'f') ||
1228                                   (data[in] >= '0' && data[in] <= '9')) &&
1229                                  ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1230                                   (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1231                                   (data[in+1] >= '0' && data[in+1] <= '9'))) {
1232                                 /* hexval */
1233                                 ch = hexval(data[in]) << 4;
1234                                 in++;
1235                                 ch |= hexval(data[in]);
1236                                 in++;
1237                                 odata[out++] = ch;
1238                         }
1239                         else {
1240                           odata[out++] = '=';
1241                         }
1242                 }
1243                 else if (header && data[in] == '_') {
1244                         odata[out++] = ' ';
1245                         in++;
1246                 }
1247                 else {
1248                         odata[out] = data[in];
1249                         in++;
1250                         out++;
1251                 }
1252         }
1253         if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1254                 PyBuffer_Release(&pdata);
1255                 PyMem_Free(odata);
1256                 return NULL;
1257         }
1258         PyBuffer_Release(&pdata);
1259         PyMem_Free(odata);
1260         return rv;
1261 }
1262
1263 static int
1264 to_hex (unsigned char ch, unsigned char *s)
1265 {
1266         unsigned int uvalue = ch;
1267
1268         s[1] = "0123456789ABCDEF"[uvalue % 16];
1269         uvalue = (uvalue / 16);
1270         s[0] = "0123456789ABCDEF"[uvalue % 16];
1271         return 0;
1272 }
1273
1274 PyDoc_STRVAR(doc_b2a_qp,
1275 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1276  Encode a string using quoted-printable encoding. \n\
1277 \n\
1278 On encoding, when istext is set, newlines are not encoded, and white \n\
1279 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1280 both encoded.  When quotetabs is set, space and tabs are encoded.");
1281
1282 /* XXX: This is ridiculously complicated to be backward compatible
1283  * (mostly) with the quopri module.  It doesn't re-create the quopri
1284  * module bug where text ending in CRLF has the CR encoded */
1285 static PyObject*
1286 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1287 {
1288         Py_ssize_t in, out;
1289         Py_buffer pdata;
1290         unsigned char *data, *odata;
1291         Py_ssize_t datalen = 0, odatalen = 0;
1292         PyObject *rv;
1293         unsigned int linelen = 0;
1294         static char *kwlist[] = {"data", "quotetabs", "istext",
1295                                        "header", NULL};
1296         int istext = 1;
1297         int quotetabs = 0;
1298         int header = 0;
1299         unsigned char ch;
1300         int crlf = 0;
1301         unsigned char *p;
1302
1303         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|iii", kwlist, &pdata,
1304               &quotetabs, &istext, &header))
1305                 return NULL;
1306         data = pdata.buf;
1307         datalen = pdata.len;
1308
1309         /* See if this string is using CRLF line ends */
1310         /* XXX: this function has the side effect of converting all of
1311          * the end of lines to be the same depending on this detection
1312          * here */
1313         p = (unsigned char *) memchr(data, '\n', datalen);
1314         if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1315                 crlf = 1;
1316
1317         /* First, scan to see how many characters need to be encoded */
1318         in = 0;
1319         while (in < datalen) {
1320                 if ((data[in] > 126) ||
1321                     (data[in] == '=') ||
1322                     (header && data[in] == '_') ||
1323                     ((data[in] == '.') && (linelen == 0) &&
1324                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1325                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1326                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1327                     ((data[in] < 33) &&
1328                      (data[in] != '\r') && (data[in] != '\n') &&
1329                      (quotetabs ||
1330                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1331                 {
1332                         if ((linelen + 3) >= MAXLINESIZE) {
1333                                 linelen = 0;
1334                                 if (crlf)
1335                                         odatalen += 3;
1336                                 else
1337                                         odatalen += 2;
1338                         }
1339                         linelen += 3;
1340                         odatalen += 3;
1341                         in++;
1342                 }
1343                 else {
1344                         if (istext &&
1345                             ((data[in] == '\n') ||
1346                              ((in+1 < datalen) && (data[in] == '\r') &&
1347                              (data[in+1] == '\n'))))
1348                         {
1349                                 linelen = 0;
1350                                 /* Protect against whitespace on end of line */
1351                                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1352                                         odatalen += 2;
1353                                 if (crlf)
1354                                         odatalen += 2;
1355                                 else
1356                                         odatalen += 1;
1357                                 if (data[in] == '\r')
1358                                         in += 2;
1359                                 else
1360                                         in++;
1361                         }
1362                         else {
1363                                 if ((in + 1 != datalen) &&
1364                                     (data[in+1] != '\n') &&
1365                                     (linelen + 1) >= MAXLINESIZE) {
1366                                         linelen = 0;
1367                                         if (crlf)
1368                                                 odatalen += 3;
1369                                         else
1370                                                 odatalen += 2;
1371                                 }
1372                                 linelen++;
1373                                 odatalen++;
1374                                 in++;
1375                         }
1376                 }
1377         }
1378
1379         /* We allocate the output same size as input, this is overkill.
1380          * The previous implementation used calloc() so we'll zero out the
1381          * memory here too, since PyMem_Malloc() does not guarantee that.
1382          */
1383         odata = (unsigned char *) PyMem_Malloc(odatalen);
1384         if (odata == NULL) {
1385                 PyBuffer_Release(&pdata);
1386                 PyErr_NoMemory();
1387                 return NULL;
1388         }
1389         memset(odata, 0, odatalen);
1390
1391         in = out = linelen = 0;
1392         while (in < datalen) {
1393                 if ((data[in] > 126) ||
1394                     (data[in] == '=') ||
1395                     (header && data[in] == '_') ||
1396                     ((data[in] == '.') && (linelen == 0) &&
1397                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1398                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1399                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1400                     ((data[in] < 33) &&
1401                      (data[in] != '\r') && (data[in] != '\n') &&
1402                      (quotetabs ||
1403                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1404                 {
1405                         if ((linelen + 3 )>= MAXLINESIZE) {
1406                                 odata[out++] = '=';
1407                                 if (crlf) odata[out++] = '\r';
1408                                 odata[out++] = '\n';
1409                                 linelen = 0;
1410                         }
1411                         odata[out++] = '=';
1412                         to_hex(data[in], &odata[out]);
1413                         out += 2;
1414                         in++;
1415                         linelen += 3;
1416                 }
1417                 else {
1418                         if (istext &&
1419                             ((data[in] == '\n') ||
1420                              ((in+1 < datalen) && (data[in] == '\r') &&
1421                              (data[in+1] == '\n'))))
1422                         {
1423                                 linelen = 0;
1424                                 /* Protect against whitespace on end of line */
1425                                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1426                                         ch = odata[out-1];
1427                                         odata[out-1] = '=';
1428                                         to_hex(ch, &odata[out]);
1429                                         out += 2;
1430                                 }
1431
1432                                 if (crlf) odata[out++] = '\r';
1433                                 odata[out++] = '\n';
1434                                 if (data[in] == '\r')
1435                                         in += 2;
1436                                 else
1437                                         in++;
1438                         }
1439                         else {
1440                                 if ((in + 1 != datalen) &&
1441                                     (data[in+1] != '\n') &&
1442                                     (linelen + 1) >= MAXLINESIZE) {
1443                                         odata[out++] = '=';
1444                                         if (crlf) odata[out++] = '\r';
1445                                         odata[out++] = '\n';
1446                                         linelen = 0;
1447                                 }
1448                                 linelen++;
1449                                 if (header && data[in] == ' ') {
1450                                         odata[out++] = '_';
1451                                         in++;
1452                                 }
1453                                 else {
1454                                         odata[out++] = data[in++];
1455                                 }
1456                         }
1457                 }
1458         }
1459         if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1460                 PyBuffer_Release(&pdata);
1461                 PyMem_Free(odata);
1462                 return NULL;
1463         }
1464         PyBuffer_Release(&pdata);
1465         PyMem_Free(odata);
1466         return rv;
1467 }
1468
1469 /* List of functions defined in the module */
1470
1471 static struct PyMethodDef binascii_module_methods[] = {
1472         {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1473         {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1474         {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1475         {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1476         {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1477         {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1478         {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1479         {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1480         {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1481         {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1482         {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1483         {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1484          doc_rledecode_hqx},
1485         {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1486         {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1487         {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1488           doc_a2b_qp},
1489         {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1490           doc_b2a_qp},
1491         {NULL, NULL}                         /* sentinel */
1492 };
1493
1494
1495 /* Initialization function for the module (*must* be called PyInit_binascii) */
1496 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1497
1498
1499 static struct PyModuleDef binasciimodule = {
1500         PyModuleDef_HEAD_INIT,
1501         "binascii",
1502         doc_binascii,
1503         -1,
1504         binascii_module_methods,
1505         NULL,
1506         NULL,
1507         NULL,
1508         NULL
1509 };
1510
1511 PyMODINIT_FUNC
1512 PyInit_binascii(void)
1513 {
1514         PyObject *m, *d;
1515
1516         /* Create the module and add the functions */
1517         m = PyModule_Create(&binasciimodule);
1518         if (m == NULL)
1519                 return NULL;
1520
1521         d = PyModule_GetDict(m);
1522
1523         Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1524         PyDict_SetItemString(d, "Error", Error);
1525         Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1526         PyDict_SetItemString(d, "Incomplete", Incomplete);
1527         if (PyErr_Occurred()) {
1528                 Py_DECREF(m);
1529                 m = NULL;
1530         }
1531         return m;
1532 }