Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56 #define PY_SSIZE_T_CLEAN
  57
  58 #include "Python.h"
  59 #ifdef USE_ZLIB_CRC32
  60 #include "zlib.h"
  61 #endif
  62
  63 static PyObject *Error;
  64 static PyObject *Incomplete;
  65
  66 /*
  67 ** hqx lookup table, ascii->binary.
  68 */
  69
  70 #define RUNCHAR 0x90
  71
  72 #define DONE 0x7F
  73 #define SKIP 0x7E
  74 #define FAIL 0x7D
  75
  76 static unsigned char table_a2b_hqx[256] = {
  77 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  78 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  79 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  80 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  81 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  82 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  83 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  84 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  85 /*              !     "     #     $     %     &     '   */
  86 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  87 /*        (     )     *     +     ,     -     .     /   */
  88 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  89 /*        0     1     2     3     4     5     6     7   */
  90 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  91 /*        8     9     :     ;     <     =     >     ?   */
  92 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  93 /*        @     A     B     C     D     E     F     G   */
  94 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  95 /*        H     I     J     K     L     M     N     O   */
  96 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  97 /*        P     Q     R     S     T     U     V     W   */
  98 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  99 /*        X     Y     Z     [     \     ]     ^     _   */
 100 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
 101 /*        `     a     b     c     d     e     f     g   */
 102 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
 103 /*        h     i     j     k     l     m     n     o   */
 104 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 105 /*        p     q     r     s     t     u     v     w   */
 106 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 107 /*        x     y     z     {     |     }     ~    ^?   */
 108 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 122         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 123         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 124         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 125 };
 126
 127 static unsigned char table_b2a_hqx[] =
 128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 129
 130 static char table_a2b_base64[] = {
 131         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 132         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 133         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 134         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 135         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 136         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 137         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 138         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 139 };
 140
 141 #define BASE64_PAD '='
 142
 143 /* Max binary chunk size; limited only by available memory */
 144 #define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
 145
 146 static unsigned char table_b2a_base64[] =
 147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 148
 149
 150
 151 static unsigned short crctab_hqx[256] = {
 152         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 153         0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 154         0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 155         0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 156         0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 157         0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 158         0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 159         0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 160         0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 161         0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 162         0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 163         0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 164         0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 165         0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 166         0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 167         0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 168         0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 169         0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 170         0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 171         0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 172         0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 173         0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 174         0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 175         0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 176         0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 177         0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 178         0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 179         0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 180         0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 181         0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 182         0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 183         0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 184 };
 185
 186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 187
 188 static PyObject *
 189 binascii_a2b_uu(PyObject *self, PyObject *args)
 190 {
 191         unsigned char *ascii_data, *bin_data;
 192         int leftbits = 0;
 193         unsigned char this_ch;
 194         unsigned int leftchar = 0;
 195         PyObject *rv;
 196         Py_ssize_t ascii_len, bin_len;
 197
 198         if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
 199                 return NULL;
 200
 201         assert(ascii_len >= 0);
 202
 203         /* First byte: binary data length (in bytes) */
 204         bin_len = (*ascii_data++ - ' ') & 077;
 205         ascii_len--;
 206
 207         /* Allocate the buffer */
 208         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 209                 return NULL;
 210         bin_data = (unsigned char *)PyString_AsString(rv);
 211
 212         for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 213                 /* XXX is it really best to add NULs if there's no more data */
 214                 this_ch = (ascii_len > 0) ? *ascii_data : 0;
 215                 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 216                         /*
 217                         ** Whitespace. Assume some spaces got eaten at
 218                         ** end-of-line. (We check this later)
 219                         */
 220                         this_ch = 0;
 221                 } else {
 222                         /* Check the character for legality
 223                         ** The 64 in stead of the expected 63 is because
 224                         ** there are a few uuencodes out there that use
 225                         ** '`' as zero instead of space.
 226                         */
 227                         if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 228                                 PyErr_SetString(Error, "Illegal char");
 229                                 Py_DECREF(rv);
 230                                 return NULL;
 231                         }
 232                         this_ch = (this_ch - ' ') & 077;
 233                 }
 234                 /*
 235                 ** Shift it in on the low end, and see if there's
 236                 ** a byte ready for output.
 237                 */
 238                 leftchar = (leftchar << 6) | (this_ch);
 239                 leftbits += 6;
 240                 if ( leftbits >= 8 ) {
 241                         leftbits -= 8;
 242                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 243                         leftchar &= ((1 << leftbits) - 1);
 244                         bin_len--;
 245                 }
 246         }
 247         /*
 248         ** Finally, check that if there's anything left on the line
 249         ** that it's whitespace only.
 250         */
 251         while( ascii_len-- > 0 ) {
 252                 this_ch = *ascii_data++;
 253                 /* Extra '`' may be written as padding in some cases */
 254                 if ( this_ch != ' ' && this_ch != ' '+64 &&
 255                      this_ch != '\n' && this_ch != '\r' ) {
 256                         PyErr_SetString(Error, "Trailing garbage");
 257                         Py_DECREF(rv);
 258                         return NULL;
 259                 }
 260         }
 261         return rv;
 262 }
 263
 264 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 265
 266 static PyObject *
 267 binascii_b2a_uu(PyObject *self, PyObject *args)
 268 {
 269         unsigned char *ascii_data, *bin_data;
 270         int leftbits = 0;
 271         unsigned char this_ch;
 272         unsigned int leftchar = 0;
 273         PyObject *rv;
 274         Py_ssize_t bin_len;
 275
 276         if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
 277                 return NULL;
 278         if ( bin_len > 45 ) {
 279                 /* The 45 is a limit that appears in all uuencode's */
 280                 PyErr_SetString(Error, "At most 45 bytes at once");
 281                 return NULL;
 282         }
 283
 284         /* We're lazy and allocate to much (fixed up later) */
 285         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
 286                 return NULL;
 287         ascii_data = (unsigned char *)PyString_AsString(rv);
 288
 289         /* Store the length */
 290         *ascii_data++ = ' ' + (bin_len & 077);
 291
 292         for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 293                 /* Shift the data (or padding) into our buffer */
 294                 if ( bin_len > 0 )      /* Data */
 295                         leftchar = (leftchar << 8) | *bin_data;
 296                 else                    /* Padding */
 297                         leftchar <<= 8;
 298                 leftbits += 8;
 299
 300                 /* See if there are 6-bit groups ready */
 301                 while ( leftbits >= 6 ) {
 302                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 303                         leftbits -= 6;
 304                         *ascii_data++ = this_ch + ' ';
 305                 }
 306         }
 307         *ascii_data++ = '\n';   /* Append a courtesy newline */
 308
 309         _PyString_Resize(&rv, (ascii_data -
 310                                (unsigned char *)PyString_AsString(rv)));
 311         return rv;
 312 }
 313
 314
 315 static int
 316 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
 317 {
 318         /* Finds & returns the (num+1)th
 319         ** valid character for base64, or -1 if none.
 320         */
 321
 322         int ret = -1;
 323         unsigned char c, b64val;
 324
 325         while ((slen > 0) && (ret == -1)) {
 326                 c = *s;
 327                 b64val = table_a2b_base64[c & 0x7f];
 328                 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 329                         if (num == 0)
 330                                 ret = *s;
 331                         num--;
 332                 }
 333
 334                 s++;
 335                 slen--;
 336         }
 337         return ret;
 338 }
 339
 340 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 341
 342 static PyObject *
 343 binascii_a2b_base64(PyObject *self, PyObject *args)
 344 {
 345         unsigned char *ascii_data, *bin_data;
 346         int leftbits = 0;
 347         unsigned char this_ch;
 348         unsigned int leftchar = 0;
 349         PyObject *rv;
 350         Py_ssize_t ascii_len, bin_len;
 351         int quad_pos = 0;
 352
 353         if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
 354                 return NULL;
 355
 356         assert(ascii_len >= 0);
 357
 358         if (ascii_len > PY_SSIZE_T_MAX - 3)
 359                 return PyErr_NoMemory();
 360
 361         bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 362
 363         /* Allocate the buffer */
 364         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 365                 return NULL;
 366         bin_data = (unsigned char *)PyString_AsString(rv);
 367         bin_len = 0;
 368
 369         for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 370                 this_ch = *ascii_data;
 371
 372                 if (this_ch > 0x7f ||
 373                     this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 374                         continue;
 375
 376                 /* Check for pad sequences and ignore
 377                 ** the invalid ones.
 378                 */
 379                 if (this_ch == BASE64_PAD) {
 380                         if ( (quad_pos < 2) ||
 381                              ((quad_pos == 2) &&
 382                               (binascii_find_valid(ascii_data, ascii_len, 1)
 383                                != BASE64_PAD)) )
 384                         {
 385                                 continue;
 386                         }
 387                         else {
 388                                 /* A pad sequence means no more input.
 389                                 ** We've already interpreted the data
 390                                 ** from the quad at this point.
 391                                 */
 392                                 leftbits = 0;
 393                                 break;
 394                         }
 395                 }
 396
 397                 this_ch = table_a2b_base64[*ascii_data];
 398                 if ( this_ch == (unsigned char) -1 )
 399                         continue;
 400
 401                 /*
 402                 ** Shift it in on the low end, and see if there's
 403                 ** a byte ready for output.
 404                 */
 405                 quad_pos = (quad_pos + 1) & 0x03;
 406                 leftchar = (leftchar << 6) | (this_ch);
 407                 leftbits += 6;
 408
 409                 if ( leftbits >= 8 ) {
 410                         leftbits -= 8;
 411                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 412                         bin_len++;
 413                         leftchar &= ((1 << leftbits) - 1);
 414                 }
 415         }
 416
 417         if (leftbits != 0) {
 418                 PyErr_SetString(Error, "Incorrect padding");
 419                 Py_DECREF(rv);
 420                 return NULL;
 421         }
 422
 423         /* And set string size correctly. If the result string is empty
 424         ** (because the input was all invalid) return the shared empty
 425         ** string instead; _PyString_Resize() won't do this for us.
 426         */
 427         if (bin_len > 0)
 428                 _PyString_Resize(&rv, bin_len);
 429         else {
 430                 Py_DECREF(rv);
 431                 rv = PyString_FromString("");
 432         }
 433         return rv;
 434 }
 435
 436 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 437
 438 static PyObject *
 439 binascii_b2a_base64(PyObject *self, PyObject *args)
 440 {
 441         unsigned char *ascii_data, *bin_data;
 442         int leftbits = 0;
 443         unsigned char this_ch;
 444         unsigned int leftchar = 0;
 445         PyObject *rv;
 446         Py_ssize_t bin_len;
 447
 448         if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
 449                 return NULL;
 450
 451         assert(bin_len >= 0);
 452
 453         if ( bin_len > BASE64_MAXBIN ) {
 454                 PyErr_SetString(Error, "Too much data for base64 line");
 455                 return NULL;
 456         }
 457
 458         /* We're lazy and allocate too much (fixed up later).
 459            "+3" leaves room for up to two pad characters and a trailing
 460            newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 461         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
 462                 return NULL;
 463         ascii_data = (unsigned char *)PyString_AsString(rv);
 464
 465         for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 466                 /* Shift the data into our buffer */
 467                 leftchar = (leftchar << 8) | *bin_data;
 468                 leftbits += 8;
 469
 470                 /* See if there are 6-bit groups ready */
 471                 while ( leftbits >= 6 ) {
 472                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 473                         leftbits -= 6;
 474                         *ascii_data++ = table_b2a_base64[this_ch];
 475                 }
 476         }
 477         if ( leftbits == 2 ) {
 478                 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 479                 *ascii_data++ = BASE64_PAD;
 480                 *ascii_data++ = BASE64_PAD;
 481         } else if ( leftbits == 4 ) {
 482                 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 483                 *ascii_data++ = BASE64_PAD;
 484         }
 485         *ascii_data++ = '\n';   /* Append a courtesy newline */
 486
 487         _PyString_Resize(&rv, (ascii_data -
 488                                (unsigned char *)PyString_AsString(rv)));
 489         return rv;
 490 }
 491
 492 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 493
 494 static PyObject *
 495 binascii_a2b_hqx(PyObject *self, PyObject *args)
 496 {
 497         unsigned char *ascii_data, *bin_data;
 498         int leftbits = 0;
 499         unsigned char this_ch;
 500         unsigned int leftchar = 0;
 501         PyObject *rv;
 502         Py_ssize_t len;
 503         int done = 0;
 504
 505         if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 506                 return NULL;
 507
 508         assert(len >= 0);
 509
 510         if (len > PY_SSIZE_T_MAX - 2)
 511                 return PyErr_NoMemory();
 512
 513         /* Allocate a string that is too big (fixed later)
 514            Add two to the initial length to prevent interning which
 515            would preclude subsequent resizing.  */
 516         if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
 517                 return NULL;
 518         bin_data = (unsigned char *)PyString_AsString(rv);
 519
 520         for( ; len > 0 ; len--, ascii_data++ ) {
 521                 /* Get the byte and look it up */
 522                 this_ch = table_a2b_hqx[*ascii_data];
 523                 if ( this_ch == SKIP )
 524                         continue;
 525                 if ( this_ch == FAIL ) {
 526                         PyErr_SetString(Error, "Illegal char");
 527                         Py_DECREF(rv);
 528                         return NULL;
 529                 }
 530                 if ( this_ch == DONE ) {
 531                         /* The terminating colon */
 532                         done = 1;
 533                         break;
 534                 }
 535
 536                 /* Shift it into the buffer and see if any bytes are ready */
 537                 leftchar = (leftchar << 6) | (this_ch);
 538                 leftbits += 6;
 539                 if ( leftbits >= 8 ) {
 540                         leftbits -= 8;
 541                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 542                         leftchar &= ((1 << leftbits) - 1);
 543                 }
 544         }
 545
 546         if ( leftbits && !done ) {
 547                 PyErr_SetString(Incomplete,
 548                                 "String has incomplete number of bytes");
 549                 Py_DECREF(rv);
 550                 return NULL;
 551         }
 552         _PyString_Resize(
 553                 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
 554         if (rv) {
 555                 PyObject *rrv = Py_BuildValue("Oi", rv, done);
 556                 Py_DECREF(rv);
 557                 return rrv;
 558         }
 559
 560         return NULL;
 561 }
 562
 563 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 564
 565 static PyObject *
 566 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 567 {
 568         unsigned char *in_data, *out_data;
 569         PyObject *rv;
 570         unsigned char ch;
 571         Py_ssize_t in, inend, len;
 572
 573         if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
 574                 return NULL;
 575
 576         assert(len >= 0);
 577
 578         if (len > PY_SSIZE_T_MAX / 2 - 2)
 579                 return PyErr_NoMemory();
 580
 581         /* Worst case: output is twice as big as input (fixed later) */
 582         if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
 583                 return NULL;
 584         out_data = (unsigned char *)PyString_AsString(rv);
 585
 586         for( in=0; in<len; in++) {
 587                 ch = in_data[in];
 588                 if ( ch == RUNCHAR ) {
 589                         /* RUNCHAR. Escape it. */
 590                         *out_data++ = RUNCHAR;
 591                         *out_data++ = 0;
 592                 } else {
 593                         /* Check how many following are the same */
 594                         for(inend=in+1;
 595                             inend<len && in_data[inend] == ch &&
 596                                     inend < in+255;
 597                             inend++) ;
 598                         if ( inend - in > 3 ) {
 599                                 /* More than 3 in a row. Output RLE. */
 600                                 *out_data++ = ch;
 601                                 *out_data++ = RUNCHAR;
 602                                 *out_data++ = inend-in;
 603                                 in = inend-1;
 604                         } else {
 605                                 /* Less than 3. Output the byte itself */
 606                                 *out_data++ = ch;
 607                         }
 608                 }
 609         }
 610         _PyString_Resize(&rv, (out_data -
 611                                (unsigned char *)PyString_AsString(rv)));
 612         return rv;
 613 }
 614
 615 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 616
 617 static PyObject *
 618 binascii_b2a_hqx(PyObject *self, PyObject *args)
 619 {
 620         unsigned char *ascii_data, *bin_data;
 621         int leftbits = 0;
 622         unsigned char this_ch;
 623         unsigned int leftchar = 0;
 624         PyObject *rv;
 625         Py_ssize_t len;
 626
 627         if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
 628                 return NULL;
 629
 630         assert(len >= 0);
 631
 632         if (len > PY_SSIZE_T_MAX / 2 - 2)
 633                 return PyErr_NoMemory();
 634
 635         /* Allocate a buffer that is at least large enough */
 636         if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
 637                 return NULL;
 638         ascii_data = (unsigned char *)PyString_AsString(rv);
 639
 640         for( ; len > 0 ; len--, bin_data++ ) {
 641                 /* Shift into our buffer, and output any 6bits ready */
 642                 leftchar = (leftchar << 8) | *bin_data;
 643                 leftbits += 8;
 644                 while ( leftbits >= 6 ) {
 645                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 646                         leftbits -= 6;
 647                         *ascii_data++ = table_b2a_hqx[this_ch];
 648                 }
 649         }
 650         /* Output a possible runt byte */
 651         if ( leftbits ) {
 652                 leftchar <<= (6-leftbits);
 653                 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 654         }
 655         _PyString_Resize(&rv, (ascii_data -
 656                                (unsigned char *)PyString_AsString(rv)));
 657         return rv;
 658 }
 659
 660 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 661
 662 static PyObject *
 663 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 664 {
 665         unsigned char *in_data, *out_data;
 666         unsigned char in_byte, in_repeat;
 667         PyObject *rv;
 668         Py_ssize_t in_len, out_len, out_len_left;
 669
 670         if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
 671                 return NULL;
 672
 673         assert(in_len >= 0);
 674
 675         /* Empty string is a special case */
 676         if ( in_len == 0 )
 677                 return PyString_FromString("");
 678     else if (in_len > PY_SSIZE_T_MAX / 2)
 679         return PyErr_NoMemory();
 680
 681         /* Allocate a buffer of reasonable size. Resized when needed */
 682         out_len = in_len*2;
 683         if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
 684                 return NULL;
 685         out_len_left = out_len;
 686         out_data = (unsigned char *)PyString_AsString(rv);
 687
 688         /*
 689         ** We need two macros here to get/put bytes and handle
 690         ** end-of-buffer for input and output strings.
 691         */
 692 #define INBYTE(b) \
 693         do { \
 694                  if ( --in_len < 0 ) { \
 695                            PyErr_SetString(Incomplete, ""); \
 696                            Py_DECREF(rv); \
 697                            return NULL; \
 698                  } \
 699                  b = *in_data++; \
 700         } while(0)
 701
 702 #define OUTBYTE(b) \
 703         do { \
 704                  if ( --out_len_left < 0 ) { \
 705                           if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
 706                           _PyString_Resize(&rv, 2*out_len); \
 707                           if ( rv == NULL ) return NULL; \
 708                           out_data = (unsigned char *)PyString_AsString(rv) \
 709                                                                  + out_len; \
 710                           out_len_left = out_len-1; \
 711                           out_len = out_len * 2; \
 712                  } \
 713                  *out_data++ = b; \
 714         } while(0)
 715
 716                 /*
 717                 ** Handle first byte separately (since we have to get angry
 718                 ** in case of an orphaned RLE code).
 719                 */
 720                 INBYTE(in_byte);
 721
 722         if (in_byte == RUNCHAR) {
 723                 INBYTE(in_repeat);
 724                 if (in_repeat != 0) {
 725                         /* Note Error, not Incomplete (which is at the end
 726                         ** of the string only). This is a programmer error.
 727                         */
 728                         PyErr_SetString(Error, "Orphaned RLE code at start");
 729                         Py_DECREF(rv);
 730                         return NULL;
 731                 }
 732                 OUTBYTE(RUNCHAR);
 733         } else {
 734                 OUTBYTE(in_byte);
 735         }
 736
 737         while( in_len > 0 ) {
 738                 INBYTE(in_byte);
 739
 740                 if (in_byte == RUNCHAR) {
 741                         INBYTE(in_repeat);
 742                         if ( in_repeat == 0 ) {
 743                                 /* Just an escaped RUNCHAR value */
 744                                 OUTBYTE(RUNCHAR);
 745                         } else {
 746                                 /* Pick up value and output a sequence of it */
 747                                 in_byte = out_data[-1];
 748                                 while ( --in_repeat > 0 )
 749                                         OUTBYTE(in_byte);
 750                         }
 751                 } else {
 752                         /* Normal byte */
 753                         OUTBYTE(in_byte);
 754                 }
 755         }
 756         _PyString_Resize(&rv, (out_data -
 757                                (unsigned char *)PyString_AsString(rv)));
 758         return rv;
 759 }
 760
 761 PyDoc_STRVAR(doc_crc_hqx,
 762 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 763
 764 static PyObject *
 765 binascii_crc_hqx(PyObject *self, PyObject *args)
 766 {
 767         unsigned char *bin_data;
 768         unsigned int crc;
 769         Py_ssize_t len;
 770
 771         if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
 772                 return NULL;
 773
 774         while(len-- > 0) {
 775                 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 776         }
 777
 778         return Py_BuildValue("i", crc);
 779 }
 780
 781 PyDoc_STRVAR(doc_crc32,
 782 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 783
 784 #ifdef USE_ZLIB_CRC32
 785 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
 786 static PyObject *
 787 binascii_crc32(PyObject *self, PyObject *args)
 788 {
 789     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
 790     Byte *buf;
 791     Py_ssize_t len;
 792     int signed_val;
 793
 794     if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
 795         return NULL;
 796     /* In Python 2.x we return a signed integer regardless of native platform
 797      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
 798      * extended into a 64-bit long inside the integer object).  3.0 does the
 799      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
 800     signed_val = crc32(crc32val, buf, len);
 801     return PyInt_FromLong(signed_val);
 802 }
 803 #else  /* USE_ZLIB_CRC32 */
 804 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 805     Also known as: ISO 3307
 806 **********************************************************************|
 807 *                                                                    *|
 808 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 809 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 810 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 811 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 812 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 813 * this polynomial is or will be included in CCITT V.41, which        *|
 814 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 815 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 816 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 817 *                                                                    *|
 818 **********************************************************************|
 819
 820  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 821  code or tables extracted from it, as desired without restriction.
 822
 823  First, the polynomial itself and its table of feedback terms.  The
 824  polynomial is
 825  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 826  Note that we take it "backwards" and put the highest-order term in
 827  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 828  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 829  the MSB being 1.
 830
 831  Note that the usual hardware shift register implementation, which
 832  is what we're using (we're merely optimizing it by doing eight-bit
 833  chunks at a time) shifts bits into the lowest-order term.  In our
 834  implementation, that means shifting towards the right.  Why do we
 835  do it this way?  Because the calculated CRC must be transmitted in
 836  order from highest-order term to lowest-order term.  UARTs transmit
 837  characters in order from LSB to MSB.  By storing the CRC this way,
 838  we hand it to the UART in the order low-byte to high-byte; the UART
 839  sends each low-bit to hight-bit; and the result is transmission bit
 840  by bit from highest- to lowest-order term without requiring any bit
 841  shuffling on our part.  Reception works similarly.
 842
 843  The feedback terms table consists of 256, 32-bit entries.  Notes:
 844
 845   1. The table can be generated at runtime if desired; code to do so
 846      is shown later.  It might not be obvious, but the feedback
 847      terms simply represent the results of eight shift/xor opera-
 848      tions for all combinations of data and CRC register values.
 849
 850   2. The CRC accumulation logic is the same for all CRC polynomials,
 851      be they sixteen or thirty-two bits wide.  You simply choose the
 852      appropriate table.  Alternatively, because the table can be
 853      generated at runtime, you can start by generating the table for
 854      the polynomial in question and use exactly the same "updcrc",
 855      if your application needn't simultaneously handle two CRC
 856      polynomials.  (Note, however, that XMODEM is strange.)
 857
 858   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 859      of course, 32-bit entries work OK if the high 16 bits are zero.
 860
 861   4. The values must be right-shifted by eight bits by the "updcrc"
 862      logic; the shift must be unsigned (bring in zeroes).  On some
 863      hardware you could probably optimize the shift in assembler by
 864      using byte-swap instructions.
 865 ********************************************************************/
 866
 867 static unsigned int crc_32_tab[256] = {
 868 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
 869 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
 870 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
 871 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
 872 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
 873 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
 874 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
 875 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
 876 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
 877 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
 878 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
 879 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
 880 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
 881 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
 882 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
 883 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
 884 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
 885 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
 886 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
 887 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
 888 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
 889 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
 890 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
 891 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
 892 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
 893 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
 894 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
 895 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
 896 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
 897 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
 898 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
 899 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
 900 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
 901 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
 902 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
 903 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
 904 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
 905 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
 906 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
 907 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
 908 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
 909 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
 910 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
 911 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
 912 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
 913 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
 914 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
 915 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
 916 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
 917 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
 918 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
 919 0x2d02ef8dU
 920 };
 921
 922 static PyObject *
 923 binascii_crc32(PyObject *self, PyObject *args)
 924 { /* By Jim Ahlstrom; All rights transferred to CNRI */
 925         unsigned char *bin_data;
 926         unsigned int crc = 0U;  /* initial value of CRC */
 927         Py_ssize_t len;
 928         int result;
 929
 930         if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
 931                 return NULL;
 932
 933         crc = ~ crc;
 934         while (len-- > 0)
 935                 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
 936                 /* Note:  (crc >> 8) MUST zero fill on left */
 937
 938         result = (int)(crc ^ 0xFFFFFFFFU);
 939         return PyInt_FromLong(result);
 940 }
 941 #endif  /* USE_ZLIB_CRC32 */
 942
 943
 944 static PyObject *
 945 binascii_hexlify(PyObject *self, PyObject *args)
 946 {
 947         char* argbuf;
 948         Py_ssize_t arglen;
 949         PyObject *retval;
 950         char* retbuf;
 951         Py_ssize_t i, j;
 952
 953         if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
 954                 return NULL;
 955
 956         assert(arglen >= 0);
 957         if (arglen > PY_SSIZE_T_MAX / 2)
 958                 return PyErr_NoMemory();
 959
 960         retval = PyString_FromStringAndSize(NULL, arglen*2);
 961         if (!retval)
 962                 return NULL;
 963         retbuf = PyString_AsString(retval);
 964         if (!retbuf)
 965                 goto finally;
 966
 967         /* make hex version of string, taken from shamodule.c */
 968         for (i=j=0; i < arglen; i++) {
 969                 char c;
 970                 c = (argbuf[i] >> 4) & 0xf;
 971                 c = (c>9) ? c+'a'-10 : c + '0';
 972                 retbuf[j++] = c;
 973                 c = argbuf[i] & 0xf;
 974                 c = (c>9) ? c+'a'-10 : c + '0';
 975                 retbuf[j++] = c;
 976         }
 977         return retval;
 978
 979   finally:
 980         Py_DECREF(retval);
 981         return NULL;
 982 }
 983
 984 PyDoc_STRVAR(doc_hexlify,
 985 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
 986 \n\
 987 This function is also available as \"hexlify()\".");
 988
 989
 990 static int
 991 to_int(int c)
 992 {
 993         if (isdigit(c))
 994                 return c - '0';
 995         else {
 996                 if (isupper(c))
 997                         c = tolower(c);
 998                 if (c >= 'a' && c <= 'f')
 999                         return c - 'a' + 10;
1000         }
1001         return -1;
1002 }
1003
1004
1005 static PyObject *
1006 binascii_unhexlify(PyObject *self, PyObject *args)
1007 {
1008         char* argbuf;
1009         Py_ssize_t arglen;
1010         PyObject *retval;
1011         char* retbuf;
1012         Py_ssize_t i, j;
1013
1014         if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
1015                 return NULL;
1016
1017         assert(arglen >= 0);
1018
1019         /* XXX What should we do about strings with an odd length?  Should
1020          * we add an implicit leading zero, or a trailing zero?  For now,
1021          * raise an exception.
1022          */
1023         if (arglen % 2) {
1024                 PyErr_SetString(PyExc_TypeError, "Odd-length string");
1025                 return NULL;
1026         }
1027
1028         retval = PyString_FromStringAndSize(NULL, (arglen/2));
1029         if (!retval)
1030                 return NULL;
1031         retbuf = PyString_AsString(retval);
1032         if (!retbuf)
1033                 goto finally;
1034
1035         for (i=j=0; i < arglen; i += 2) {
1036                 int top = to_int(Py_CHARMASK(argbuf[i]));
1037                 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1038                 if (top == -1 || bot == -1) {
1039                         PyErr_SetString(PyExc_TypeError,
1040                                         "Non-hexadecimal digit found");
1041                         goto finally;
1042                 }
1043                 retbuf[j++] = (top << 4) + bot;
1044         }
1045         return retval;
1046
1047   finally:
1048         Py_DECREF(retval);
1049         return NULL;
1050 }
1051
1052 PyDoc_STRVAR(doc_unhexlify,
1053 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1054 \n\
1055 hexstr must contain an even number of hex digits (upper or lower case).\n\
1056 This function is also available as \"unhexlify()\"");
1057
1058 static int table_hex[128] = {
1059   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1060   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1061   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1062    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1063   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1064   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1065   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1066   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1067 };
1068
1069 #define hexval(c) table_hex[(unsigned int)(c)]
1070
1071 #define MAXLINESIZE 76
1072
1073 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1074
1075 static PyObject*
1076 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1077 {
1078         Py_ssize_t in, out;
1079         char ch;
1080         unsigned char *data, *odata;
1081         Py_ssize_t datalen = 0;
1082         PyObject *rv;
1083         static char *kwlist[] = {"data", "header", NULL};
1084         int header = 0;
1085
1086         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1087               &datalen, &header))
1088                 return NULL;
1089
1090         /* We allocate the output same size as input, this is overkill.
1091          * The previous implementation used calloc() so we'll zero out the
1092          * memory here too, since PyMem_Malloc() does not guarantee that.
1093          */
1094         odata = (unsigned char *) PyMem_Malloc(datalen);
1095         if (odata == NULL) {
1096                 PyErr_NoMemory();
1097                 return NULL;
1098         }
1099         memset(odata, 0, datalen);
1100
1101         in = out = 0;
1102         while (in < datalen) {
1103                 if (data[in] == '=') {
1104                         in++;
1105                         if (in >= datalen) break;
1106                         /* Soft line breaks */
1107                         if ((data[in] == '\n') || (data[in] == '\r')) {
1108                                 if (data[in] != '\n') {
1109                                         while (in < datalen && data[in] != '\n') in++;
1110                                 }
1111                                 if (in < datalen) in++;
1112                         }
1113                         else if (data[in] == '=') {
1114                                 /* broken case from broken python qp */
1115                                 odata[out++] = '=';
1116                                 in++;
1117                         }
1118                         else if (((data[in] >= 'A' && data[in] <= 'F') ||
1119                                   (data[in] >= 'a' && data[in] <= 'f') ||
1120                                   (data[in] >= '0' && data[in] <= '9')) &&
1121                                  ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1122                                   (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1123                                   (data[in+1] >= '0' && data[in+1] <= '9'))) {
1124                                 /* hexval */
1125                                 ch = hexval(data[in]) << 4;
1126                                 in++;
1127                                 ch |= hexval(data[in]);
1128                                 in++;
1129                                 odata[out++] = ch;
1130                         }
1131                         else {
1132                           odata[out++] = '=';
1133                         }
1134                 }
1135                 else if (header && data[in] == '_') {
1136                         odata[out++] = ' ';
1137                         in++;
1138                 }
1139                 else {
1140                         odata[out] = data[in];
1141                         in++;
1142                         out++;
1143                 }
1144         }
1145         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1146                 PyMem_Free(odata);
1147                 return NULL;
1148         }
1149         PyMem_Free(odata);
1150         return rv;
1151 }
1152
1153 static int
1154 to_hex (unsigned char ch, unsigned char *s)
1155 {
1156         unsigned int uvalue = ch;
1157
1158         s[1] = "0123456789ABCDEF"[uvalue % 16];
1159         uvalue = (uvalue / 16);
1160         s[0] = "0123456789ABCDEF"[uvalue % 16];
1161         return 0;
1162 }
1163
1164 PyDoc_STRVAR(doc_b2a_qp,
1165 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1166  Encode a string using quoted-printable encoding. \n\
1167 \n\
1168 On encoding, when istext is set, newlines are not encoded, and white \n\
1169 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1170 both encoded.  When quotetabs is set, space and tabs are encoded.");
1171
1172 /* XXX: This is ridiculously complicated to be backward compatible
1173  * (mostly) with the quopri module.  It doesn't re-create the quopri
1174  * module bug where text ending in CRLF has the CR encoded */
1175 static PyObject*
1176 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1177 {
1178         Py_ssize_t in, out;
1179         unsigned char *data, *odata;
1180         Py_ssize_t datalen = 0, odatalen = 0;
1181         PyObject *rv;
1182         unsigned int linelen = 0;
1183         static char *kwlist[] = {"data", "quotetabs", "istext",
1184                                        "header", NULL};
1185         int istext = 1;
1186         int quotetabs = 0;
1187         int header = 0;
1188         unsigned char ch;
1189         int crlf = 0;
1190         unsigned char *p;
1191
1192         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1193               &datalen, &quotetabs, &istext, &header))
1194                 return NULL;
1195
1196         /* See if this string is using CRLF line ends */
1197         /* XXX: this function has the side effect of converting all of
1198          * the end of lines to be the same depending on this detection
1199          * here */
1200         p = (unsigned char *) memchr(data, '\n', datalen);
1201         if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1202                 crlf = 1;
1203
1204         /* First, scan to see how many characters need to be encoded */
1205         in = 0;
1206         while (in < datalen) {
1207                 if ((data[in] > 126) ||
1208                     (data[in] == '=') ||
1209                     (header && data[in] == '_') ||
1210                     ((data[in] == '.') && (linelen == 0) &&
1211                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1212                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1213                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1214                     ((data[in] < 33) &&
1215                      (data[in] != '\r') && (data[in] != '\n') &&
1216                      (quotetabs ||
1217                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1218                 {
1219                         if ((linelen + 3) >= MAXLINESIZE) {
1220                                 linelen = 0;
1221                                 if (crlf)
1222                                         odatalen += 3;
1223                                 else
1224                                         odatalen += 2;
1225                         }
1226                         linelen += 3;
1227                         odatalen += 3;
1228                         in++;
1229                 }
1230                 else {
1231                         if (istext &&
1232                             ((data[in] == '\n') ||
1233                              ((in+1 < datalen) && (data[in] == '\r') &&
1234                              (data[in+1] == '\n'))))
1235                         {
1236                                 linelen = 0;
1237                                 /* Protect against whitespace on end of line */
1238                                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1239                                         odatalen += 2;
1240                                 if (crlf)
1241                                         odatalen += 2;
1242                                 else
1243                                         odatalen += 1;
1244                                 if (data[in] == '\r')
1245                                         in += 2;
1246                                 else
1247                                         in++;
1248                         }
1249                         else {
1250                                 if ((in + 1 != datalen) &&
1251                                     (data[in+1] != '\n') &&
1252                                     (linelen + 1) >= MAXLINESIZE) {
1253                                         linelen = 0;
1254                                         if (crlf)
1255                                                 odatalen += 3;
1256                                         else
1257                                                 odatalen += 2;
1258                                 }
1259                                 linelen++;
1260                                 odatalen++;
1261                                 in++;
1262                         }
1263                 }
1264         }
1265
1266         /* We allocate the output same size as input, this is overkill.
1267          * The previous implementation used calloc() so we'll zero out the
1268          * memory here too, since PyMem_Malloc() does not guarantee that.
1269          */
1270         odata = (unsigned char *) PyMem_Malloc(odatalen);
1271         if (odata == NULL) {
1272                 PyErr_NoMemory();
1273                 return NULL;
1274         }
1275         memset(odata, 0, odatalen);
1276
1277         in = out = linelen = 0;
1278         while (in < datalen) {
1279                 if ((data[in] > 126) ||
1280                     (data[in] == '=') ||
1281                     (header && data[in] == '_') ||
1282                     ((data[in] == '.') && (linelen == 0) &&
1283                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1284                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1285                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1286                     ((data[in] < 33) &&
1287                      (data[in] != '\r') && (data[in] != '\n') &&
1288                      (quotetabs ||
1289                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1290                 {
1291                         if ((linelen + 3 )>= MAXLINESIZE) {
1292                                 odata[out++] = '=';
1293                                 if (crlf) odata[out++] = '\r';
1294                                 odata[out++] = '\n';
1295                                 linelen = 0;
1296                         }
1297                         odata[out++] = '=';
1298                         to_hex(data[in], &odata[out]);
1299                         out += 2;
1300                         in++;
1301                         linelen += 3;
1302                 }
1303                 else {
1304                         if (istext &&
1305                             ((data[in] == '\n') ||
1306                              ((in+1 < datalen) && (data[in] == '\r') &&
1307                              (data[in+1] == '\n'))))
1308                         {
1309                                 linelen = 0;
1310                                 /* Protect against whitespace on end of line */
1311                                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1312                                         ch = odata[out-1];
1313                                         odata[out-1] = '=';
1314                                         to_hex(ch, &odata[out]);
1315                                         out += 2;
1316                                 }
1317
1318                                 if (crlf) odata[out++] = '\r';
1319                                 odata[out++] = '\n';
1320                                 if (data[in] == '\r')
1321                                         in += 2;
1322                                 else
1323                                         in++;
1324                         }
1325                         else {
1326                                 if ((in + 1 != datalen) &&
1327                                     (data[in+1] != '\n') &&
1328                                     (linelen + 1) >= MAXLINESIZE) {
1329                                         odata[out++] = '=';
1330                                         if (crlf) odata[out++] = '\r';
1331                                         odata[out++] = '\n';
1332                                         linelen = 0;
1333                                 }
1334                                 linelen++;
1335                                 if (header && data[in] == ' ') {
1336                                         odata[out++] = '_';
1337                                         in++;
1338                                 }
1339                                 else {
1340                                         odata[out++] = data[in++];
1341                                 }
1342                         }
1343                 }
1344         }
1345         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1346                 PyMem_Free(odata);
1347                 return NULL;
1348         }
1349         PyMem_Free(odata);
1350         return rv;
1351 }
1352
1353 /* List of functions defined in the module */
1354
1355 static struct PyMethodDef binascii_module_methods[] = {
1356         {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1357         {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1358         {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1359         {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1360         {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1361         {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1362         {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1363         {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1364         {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1365         {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1366         {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1367         {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1368          doc_rledecode_hqx},
1369         {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1370         {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1371         {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1372           doc_a2b_qp},
1373         {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1374           doc_b2a_qp},
1375         {NULL, NULL}                         /* sentinel */
1376 };
1377
1378
1379 /* Initialization function for the module (*must* be called initbinascii) */
1380 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1381
1382 PyMODINIT_FUNC
1383 initbinascii(void)
1384 {
1385         PyObject *m, *d, *x;
1386
1387         /* Create the module and add the functions */
1388         m = Py_InitModule("binascii", binascii_module_methods);
1389         if (m == NULL)
1390                 return;
1391
1392         d = PyModule_GetDict(m);
1393         x = PyString_FromString(doc_binascii);
1394         PyDict_SetItemString(d, "__doc__", x);
1395         Py_XDECREF(x);
1396
1397         Error = PyErr_NewException("binascii.Error", NULL, NULL);
1398         PyDict_SetItemString(d, "Error", Error);
1399         Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1400         PyDict_SetItemString(d, "Incomplete", Incomplete);
1401 }