Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56 #define PY_SSIZE_T_CLEAN
  57
  58 #include "Python.h"
  59
  60 static PyObject *Error;
  61 static PyObject *Incomplete;
  62
  63 /*
  64 ** hqx lookup table, ascii->binary.
  65 */
  66
  67 #define RUNCHAR 0x90
  68
  69 #define DONE 0x7F
  70 #define SKIP 0x7E
  71 #define FAIL 0x7D
  72
  73 static unsigned char table_a2b_hqx[256] = {
  74 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  75 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  76 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  77 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  78 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  79 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  80 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  81 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  82 /*              !     "     #     $     %     &     '   */
  83 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  84 /*        (     )     *     +     ,     -     .     /   */
  85 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  86 /*        0     1     2     3     4     5     6     7   */
  87 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  88 /*        8     9     :     ;     <     =     >     ?   */
  89 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  90 /*        @     A     B     C     D     E     F     G   */
  91 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  92 /*        H     I     J     K     L     M     N     O   */
  93 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  94 /*        P     Q     R     S     T     U     V     W   */
  95 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  96 /*        X     Y     Z     [     \     ]     ^     _   */
  97 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
  98 /*        `     a     b     c     d     e     f     g   */
  99 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
 100 /*        h     i     j     k     l     m     n     o   */
 101 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 102 /*        p     q     r     s     t     u     v     w   */
 103 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 104 /*        x     y     z     {     |     }     ~    ^?   */
 105 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 106 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 107         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 108         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 122 };
 123
 124 static unsigned char table_b2a_hqx[] =
 125 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 126
 127 static char table_a2b_base64[] = {
 128         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 129         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 130         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 131         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 132         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 133         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 134         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 135         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 136 };
 137
 138 #define BASE64_PAD '='
 139
 140 /* Max binary chunk size; limited only by available memory */
 141 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
 142
 143 static unsigned char table_b2a_base64[] =
 144 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 145
 146
 147
 148 static unsigned short crctab_hqx[256] = {
 149         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 150         0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 151         0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 152         0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 153         0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 154         0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 155         0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 156         0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 157         0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 158         0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 159         0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 160         0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 161         0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 162         0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 163         0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 164         0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 165         0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 166         0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 167         0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 168         0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 169         0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 170         0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 171         0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 172         0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 173         0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 174         0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 175         0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 176         0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 177         0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 178         0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 179         0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 180         0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 181 };
 182
 183 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 184
 185 static PyObject *
 186 binascii_a2b_uu(PyObject *self, PyObject *args)
 187 {
 188         unsigned char *ascii_data, *bin_data;
 189         int leftbits = 0;
 190         unsigned char this_ch;
 191         unsigned int leftchar = 0;
 192         PyObject *rv;
 193         Py_ssize_t ascii_len, bin_len;
 194
 195         if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
 196                 return NULL;
 197
 198         /* First byte: binary data length (in bytes) */
 199         bin_len = (*ascii_data++ - ' ') & 077;
 200         ascii_len--;
 201
 202         /* Allocate the buffer */
 203         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 204                 return NULL;
 205         bin_data = (unsigned char *)PyString_AsString(rv);
 206
 207         for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 208                 /* XXX is it really best to add NULs if there's no more data */
 209                 this_ch = (ascii_len > 0) ? *ascii_data : 0;
 210                 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 211                         /*
 212                         ** Whitespace. Assume some spaces got eaten at
 213                         ** end-of-line. (We check this later)
 214                         */
 215                         this_ch = 0;
 216                 } else {
 217                         /* Check the character for legality
 218                         ** The 64 in stead of the expected 63 is because
 219                         ** there are a few uuencodes out there that use
 220                         ** '`' as zero instead of space.
 221                         */
 222                         if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 223                                 PyErr_SetString(Error, "Illegal char");
 224                                 Py_DECREF(rv);
 225                                 return NULL;
 226                         }
 227                         this_ch = (this_ch - ' ') & 077;
 228                 }
 229                 /*
 230                 ** Shift it in on the low end, and see if there's
 231                 ** a byte ready for output.
 232                 */
 233                 leftchar = (leftchar << 6) | (this_ch);
 234                 leftbits += 6;
 235                 if ( leftbits >= 8 ) {
 236                         leftbits -= 8;
 237                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 238                         leftchar &= ((1 << leftbits) - 1);
 239                         bin_len--;
 240                 }
 241         }
 242         /*
 243         ** Finally, check that if there's anything left on the line
 244         ** that it's whitespace only.
 245         */
 246         while( ascii_len-- > 0 ) {
 247                 this_ch = *ascii_data++;
 248                 /* Extra '`' may be written as padding in some cases */
 249                 if ( this_ch != ' ' && this_ch != ' '+64 &&
 250                      this_ch != '\n' && this_ch != '\r' ) {
 251                         PyErr_SetString(Error, "Trailing garbage");
 252                         Py_DECREF(rv);
 253                         return NULL;
 254                 }
 255         }
 256         return rv;
 257 }
 258
 259 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 260
 261 static PyObject *
 262 binascii_b2a_uu(PyObject *self, PyObject *args)
 263 {
 264         unsigned char *ascii_data, *bin_data;
 265         int leftbits = 0;
 266         unsigned char this_ch;
 267         unsigned int leftchar = 0;
 268         PyObject *rv;
 269         Py_ssize_t bin_len;
 270
 271         if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
 272                 return NULL;
 273         if ( bin_len > 45 ) {
 274                 /* The 45 is a limit that appears in all uuencode's */
 275                 PyErr_SetString(Error, "At most 45 bytes at once");
 276                 return NULL;
 277         }
 278
 279         /* We're lazy and allocate to much (fixed up later) */
 280         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
 281                 return NULL;
 282         ascii_data = (unsigned char *)PyString_AsString(rv);
 283
 284         /* Store the length */
 285         *ascii_data++ = ' ' + (bin_len & 077);
 286
 287         for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 288                 /* Shift the data (or padding) into our buffer */
 289                 if ( bin_len > 0 )      /* Data */
 290                         leftchar = (leftchar << 8) | *bin_data;
 291                 else                    /* Padding */
 292                         leftchar <<= 8;
 293                 leftbits += 8;
 294
 295                 /* See if there are 6-bit groups ready */
 296                 while ( leftbits >= 6 ) {
 297                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 298                         leftbits -= 6;
 299                         *ascii_data++ = this_ch + ' ';
 300                 }
 301         }
 302         *ascii_data++ = '\n';   /* Append a courtesy newline */
 303
 304         _PyString_Resize(&rv, (ascii_data -
 305                                (unsigned char *)PyString_AsString(rv)));
 306         return rv;
 307 }
 308
 309
 310 static int
 311 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
 312 {
 313         /* Finds & returns the (num+1)th
 314         ** valid character for base64, or -1 if none.
 315         */
 316
 317         int ret = -1;
 318         unsigned char c, b64val;
 319
 320         while ((slen > 0) && (ret == -1)) {
 321                 c = *s;
 322                 b64val = table_a2b_base64[c & 0x7f];
 323                 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 324                         if (num == 0)
 325                                 ret = *s;
 326                         num--;
 327                 }
 328
 329                 s++;
 330                 slen--;
 331         }
 332         return ret;
 333 }
 334
 335 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 336
 337 static PyObject *
 338 binascii_a2b_base64(PyObject *self, PyObject *args)
 339 {
 340         unsigned char *ascii_data, *bin_data;
 341         int leftbits = 0;
 342         unsigned char this_ch;
 343         unsigned int leftchar = 0;
 344         PyObject *rv;
 345         Py_ssize_t ascii_len, bin_len;
 346         int quad_pos = 0;
 347
 348         if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
 349                 return NULL;
 350
 351         bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 352
 353         /* Allocate the buffer */
 354         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 355                 return NULL;
 356         bin_data = (unsigned char *)PyString_AsString(rv);
 357         bin_len = 0;
 358
 359         for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 360                 this_ch = *ascii_data;
 361
 362                 if (this_ch > 0x7f ||
 363                     this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 364                         continue;
 365
 366                 /* Check for pad sequences and ignore
 367                 ** the invalid ones.
 368                 */
 369                 if (this_ch == BASE64_PAD) {
 370                         if ( (quad_pos < 2) ||
 371                              ((quad_pos == 2) &&
 372                               (binascii_find_valid(ascii_data, ascii_len, 1)
 373                                != BASE64_PAD)) )
 374                         {
 375                                 continue;
 376                         }
 377                         else {
 378                                 /* A pad sequence means no more input.
 379                                 ** We've already interpreted the data
 380                                 ** from the quad at this point.
 381                                 */
 382                                 leftbits = 0;
 383                                 break;
 384                         }
 385                 }
 386
 387                 this_ch = table_a2b_base64[*ascii_data];
 388                 if ( this_ch == (unsigned char) -1 )
 389                         continue;
 390
 391                 /*
 392                 ** Shift it in on the low end, and see if there's
 393                 ** a byte ready for output.
 394                 */
 395                 quad_pos = (quad_pos + 1) & 0x03;
 396                 leftchar = (leftchar << 6) | (this_ch);
 397                 leftbits += 6;
 398
 399                 if ( leftbits >= 8 ) {
 400                         leftbits -= 8;
 401                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 402                         bin_len++;
 403                         leftchar &= ((1 << leftbits) - 1);
 404                 }
 405         }
 406
 407         if (leftbits != 0) {
 408                 PyErr_SetString(Error, "Incorrect padding");
 409                 Py_DECREF(rv);
 410                 return NULL;
 411         }
 412
 413         /* And set string size correctly. If the result string is empty
 414         ** (because the input was all invalid) return the shared empty
 415         ** string instead; _PyString_Resize() won't do this for us.
 416         */
 417         if (bin_len > 0)
 418                 _PyString_Resize(&rv, bin_len);
 419         else {
 420                 Py_DECREF(rv);
 421                 rv = PyString_FromString("");
 422         }
 423         return rv;
 424 }
 425
 426 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 427
 428 static PyObject *
 429 binascii_b2a_base64(PyObject *self, PyObject *args)
 430 {
 431         unsigned char *ascii_data, *bin_data;
 432         int leftbits = 0;
 433         unsigned char this_ch;
 434         unsigned int leftchar = 0;
 435         PyObject *rv;
 436         Py_ssize_t bin_len;
 437
 438         if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
 439                 return NULL;
 440         if ( bin_len > BASE64_MAXBIN ) {
 441                 PyErr_SetString(Error, "Too much data for base64 line");
 442                 return NULL;
 443         }
 444
 445         /* We're lazy and allocate too much (fixed up later).
 446            "+3" leaves room for up to two pad characters and a trailing
 447            newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 448         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
 449                 return NULL;
 450         ascii_data = (unsigned char *)PyString_AsString(rv);
 451
 452         for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 453                 /* Shift the data into our buffer */
 454                 leftchar = (leftchar << 8) | *bin_data;
 455                 leftbits += 8;
 456
 457                 /* See if there are 6-bit groups ready */
 458                 while ( leftbits >= 6 ) {
 459                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 460                         leftbits -= 6;
 461                         *ascii_data++ = table_b2a_base64[this_ch];
 462                 }
 463         }
 464         if ( leftbits == 2 ) {
 465                 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 466                 *ascii_data++ = BASE64_PAD;
 467                 *ascii_data++ = BASE64_PAD;
 468         } else if ( leftbits == 4 ) {
 469                 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 470                 *ascii_data++ = BASE64_PAD;
 471         }
 472         *ascii_data++ = '\n';   /* Append a courtesy newline */
 473
 474         _PyString_Resize(&rv, (ascii_data -
 475                                (unsigned char *)PyString_AsString(rv)));
 476         return rv;
 477 }
 478
 479 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 480
 481 static PyObject *
 482 binascii_a2b_hqx(PyObject *self, PyObject *args)
 483 {
 484         unsigned char *ascii_data, *bin_data;
 485         int leftbits = 0;
 486         unsigned char this_ch;
 487         unsigned int leftchar = 0;
 488         PyObject *rv;
 489         Py_ssize_t len;
 490         int done = 0;
 491
 492         if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 493                 return NULL;
 494
 495         /* Allocate a string that is too big (fixed later)
 496            Add two to the initial length to prevent interning which
 497            would preclude subsequent resizing.  */
 498         if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
 499                 return NULL;
 500         bin_data = (unsigned char *)PyString_AsString(rv);
 501
 502         for( ; len > 0 ; len--, ascii_data++ ) {
 503                 /* Get the byte and look it up */
 504                 this_ch = table_a2b_hqx[*ascii_data];
 505                 if ( this_ch == SKIP )
 506                         continue;
 507                 if ( this_ch == FAIL ) {
 508                         PyErr_SetString(Error, "Illegal char");
 509                         Py_DECREF(rv);
 510                         return NULL;
 511                 }
 512                 if ( this_ch == DONE ) {
 513                         /* The terminating colon */
 514                         done = 1;
 515                         break;
 516                 }
 517
 518                 /* Shift it into the buffer and see if any bytes are ready */
 519                 leftchar = (leftchar << 6) | (this_ch);
 520                 leftbits += 6;
 521                 if ( leftbits >= 8 ) {
 522                         leftbits -= 8;
 523                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 524                         leftchar &= ((1 << leftbits) - 1);
 525                 }
 526         }
 527
 528         if ( leftbits && !done ) {
 529                 PyErr_SetString(Incomplete,
 530                                 "String has incomplete number of bytes");
 531                 Py_DECREF(rv);
 532                 return NULL;
 533         }
 534         _PyString_Resize(
 535                 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
 536         if (rv) {
 537                 PyObject *rrv = Py_BuildValue("Oi", rv, done);
 538                 Py_DECREF(rv);
 539                 return rrv;
 540         }
 541
 542         return NULL;
 543 }
 544
 545 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 546
 547 static PyObject *
 548 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 549 {
 550         unsigned char *in_data, *out_data;
 551         PyObject *rv;
 552         unsigned char ch;
 553         Py_ssize_t in, inend, len;
 554
 555         if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
 556                 return NULL;
 557
 558         /* Worst case: output is twice as big as input (fixed later) */
 559         if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
 560                 return NULL;
 561         out_data = (unsigned char *)PyString_AsString(rv);
 562
 563         for( in=0; in<len; in++) {
 564                 ch = in_data[in];
 565                 if ( ch == RUNCHAR ) {
 566                         /* RUNCHAR. Escape it. */
 567                         *out_data++ = RUNCHAR;
 568                         *out_data++ = 0;
 569                 } else {
 570                         /* Check how many following are the same */
 571                         for(inend=in+1;
 572                             inend<len && in_data[inend] == ch &&
 573                                     inend < in+255;
 574                             inend++) ;
 575                         if ( inend - in > 3 ) {
 576                                 /* More than 3 in a row. Output RLE. */
 577                                 *out_data++ = ch;
 578                                 *out_data++ = RUNCHAR;
 579                                 *out_data++ = inend-in;
 580                                 in = inend-1;
 581                         } else {
 582                                 /* Less than 3. Output the byte itself */
 583                                 *out_data++ = ch;
 584                         }
 585                 }
 586         }
 587         _PyString_Resize(&rv, (out_data -
 588                                (unsigned char *)PyString_AsString(rv)));
 589         return rv;
 590 }
 591
 592 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 593
 594 static PyObject *
 595 binascii_b2a_hqx(PyObject *self, PyObject *args)
 596 {
 597         unsigned char *ascii_data, *bin_data;
 598         int leftbits = 0;
 599         unsigned char this_ch;
 600         unsigned int leftchar = 0;
 601         PyObject *rv;
 602         Py_ssize_t len;
 603
 604         if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
 605                 return NULL;
 606
 607         /* Allocate a buffer that is at least large enough */
 608         if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
 609                 return NULL;
 610         ascii_data = (unsigned char *)PyString_AsString(rv);
 611
 612         for( ; len > 0 ; len--, bin_data++ ) {
 613                 /* Shift into our buffer, and output any 6bits ready */
 614                 leftchar = (leftchar << 8) | *bin_data;
 615                 leftbits += 8;
 616                 while ( leftbits >= 6 ) {
 617                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 618                         leftbits -= 6;
 619                         *ascii_data++ = table_b2a_hqx[this_ch];
 620                 }
 621         }
 622         /* Output a possible runt byte */
 623         if ( leftbits ) {
 624                 leftchar <<= (6-leftbits);
 625                 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 626         }
 627         _PyString_Resize(&rv, (ascii_data -
 628                                (unsigned char *)PyString_AsString(rv)));
 629         return rv;
 630 }
 631
 632 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 633
 634 static PyObject *
 635 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 636 {
 637         unsigned char *in_data, *out_data;
 638         unsigned char in_byte, in_repeat;
 639         PyObject *rv;
 640         Py_ssize_t in_len, out_len, out_len_left;
 641
 642         if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
 643                 return NULL;
 644
 645         /* Empty string is a special case */
 646         if ( in_len == 0 )
 647                 return PyString_FromString("");
 648
 649         /* Allocate a buffer of reasonable size. Resized when needed */
 650         out_len = in_len*2;
 651         if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
 652                 return NULL;
 653         out_len_left = out_len;
 654         out_data = (unsigned char *)PyString_AsString(rv);
 655
 656         /*
 657         ** We need two macros here to get/put bytes and handle
 658         ** end-of-buffer for input and output strings.
 659         */
 660 #define INBYTE(b) \
 661         do { \
 662                  if ( --in_len < 0 ) { \
 663                            PyErr_SetString(Incomplete, ""); \
 664                            Py_DECREF(rv); \
 665                            return NULL; \
 666                  } \
 667                  b = *in_data++; \
 668         } while(0)
 669
 670 #define OUTBYTE(b) \
 671         do { \
 672                  if ( --out_len_left < 0 ) { \
 673                           _PyString_Resize(&rv, 2*out_len); \
 674                           if ( rv == NULL ) return NULL; \
 675                           out_data = (unsigned char *)PyString_AsString(rv) \
 676                                                                  + out_len; \
 677                           out_len_left = out_len-1; \
 678                           out_len = out_len * 2; \
 679                  } \
 680                  *out_data++ = b; \
 681         } while(0)
 682
 683                 /*
 684                 ** Handle first byte separately (since we have to get angry
 685                 ** in case of an orphaned RLE code).
 686                 */
 687                 INBYTE(in_byte);
 688
 689         if (in_byte == RUNCHAR) {
 690                 INBYTE(in_repeat);
 691                 if (in_repeat != 0) {
 692                         /* Note Error, not Incomplete (which is at the end
 693                         ** of the string only). This is a programmer error.
 694                         */
 695                         PyErr_SetString(Error, "Orphaned RLE code at start");
 696                         Py_DECREF(rv);
 697                         return NULL;
 698                 }
 699                 OUTBYTE(RUNCHAR);
 700         } else {
 701                 OUTBYTE(in_byte);
 702         }
 703
 704         while( in_len > 0 ) {
 705                 INBYTE(in_byte);
 706
 707                 if (in_byte == RUNCHAR) {
 708                         INBYTE(in_repeat);
 709                         if ( in_repeat == 0 ) {
 710                                 /* Just an escaped RUNCHAR value */
 711                                 OUTBYTE(RUNCHAR);
 712                         } else {
 713                                 /* Pick up value and output a sequence of it */
 714                                 in_byte = out_data[-1];
 715                                 while ( --in_repeat > 0 )
 716                                         OUTBYTE(in_byte);
 717                         }
 718                 } else {
 719                         /* Normal byte */
 720                         OUTBYTE(in_byte);
 721                 }
 722         }
 723         _PyString_Resize(&rv, (out_data -
 724                                (unsigned char *)PyString_AsString(rv)));
 725         return rv;
 726 }
 727
 728 PyDoc_STRVAR(doc_crc_hqx,
 729 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 730
 731 static PyObject *
 732 binascii_crc_hqx(PyObject *self, PyObject *args)
 733 {
 734         unsigned char *bin_data;
 735         unsigned int crc;
 736         Py_ssize_t len;
 737
 738         if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
 739                 return NULL;
 740
 741         while(len--) {
 742                 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 743         }
 744
 745         return Py_BuildValue("i", crc);
 746 }
 747
 748 PyDoc_STRVAR(doc_crc32,
 749 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 750
 751 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 752     Also known as: ISO 3307
 753 **********************************************************************|
 754 *                                                                    *|
 755 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 756 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 757 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 758 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 759 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 760 * this polynomial is or will be included in CCITT V.41, which        *|
 761 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 762 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 763 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 764 *                                                                    *|
 765 **********************************************************************|
 766
 767  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 768  code or tables extracted from it, as desired without restriction.
 769
 770  First, the polynomial itself and its table of feedback terms.  The
 771  polynomial is
 772  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 773  Note that we take it "backwards" and put the highest-order term in
 774  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 775  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 776  the MSB being 1.
 777
 778  Note that the usual hardware shift register implementation, which
 779  is what we're using (we're merely optimizing it by doing eight-bit
 780  chunks at a time) shifts bits into the lowest-order term.  In our
 781  implementation, that means shifting towards the right.  Why do we
 782  do it this way?  Because the calculated CRC must be transmitted in
 783  order from highest-order term to lowest-order term.  UARTs transmit
 784  characters in order from LSB to MSB.  By storing the CRC this way,
 785  we hand it to the UART in the order low-byte to high-byte; the UART
 786  sends each low-bit to hight-bit; and the result is transmission bit
 787  by bit from highest- to lowest-order term without requiring any bit
 788  shuffling on our part.  Reception works similarly.
 789
 790  The feedback terms table consists of 256, 32-bit entries.  Notes:
 791
 792   1. The table can be generated at runtime if desired; code to do so
 793      is shown later.  It might not be obvious, but the feedback
 794      terms simply represent the results of eight shift/xor opera-
 795      tions for all combinations of data and CRC register values.
 796
 797   2. The CRC accumulation logic is the same for all CRC polynomials,
 798      be they sixteen or thirty-two bits wide.  You simply choose the
 799      appropriate table.  Alternatively, because the table can be
 800      generated at runtime, you can start by generating the table for
 801      the polynomial in question and use exactly the same "updcrc",
 802      if your application needn't simultaneously handle two CRC
 803      polynomials.  (Note, however, that XMODEM is strange.)
 804
 805   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 806      of course, 32-bit entries work OK if the high 16 bits are zero.
 807
 808   4. The values must be right-shifted by eight bits by the "updcrc"
 809      logic; the shift must be unsigned (bring in zeroes).  On some
 810      hardware you could probably optimize the shift in assembler by
 811      using byte-swap instructions.
 812 ********************************************************************/
 813
 814 static unsigned long crc_32_tab[256] = {
 815 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
 816 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
 817 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
 818 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
 819 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
 820 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
 821 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
 822 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
 823 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
 824 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
 825 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
 826 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
 827 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
 828 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
 829 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
 830 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
 831 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
 832 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
 833 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
 834 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
 835 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
 836 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
 837 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
 838 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
 839 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
 840 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
 841 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
 842 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
 843 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
 844 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
 845 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
 846 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
 847 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
 848 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 849 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 850 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 851 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 852 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 853 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 854 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 855 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 856 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 857 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 858 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 859 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 860 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 861 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 862 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 863 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 864 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 865 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 866 0x2d02ef8dUL
 867 };
 868
 869 static PyObject *
 870 binascii_crc32(PyObject *self, PyObject *args)
 871 { /* By Jim Ahlstrom; All rights transferred to CNRI */
 872         unsigned char *bin_data;
 873         unsigned long crc = 0UL;        /* initial value of CRC */
 874         Py_ssize_t len;
 875         long result;
 876
 877         if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
 878                 return NULL;
 879
 880         crc = ~ crc;
 881 #if SIZEOF_LONG > 4
 882         /* only want the trailing 32 bits */
 883         crc &= 0xFFFFFFFFUL;
 884 #endif
 885         while (len--)
 886                 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
 887                 /* Note:  (crc >> 8) MUST zero fill on left */
 888
 889         result = (long)(crc ^ 0xFFFFFFFFUL);
 890 #if SIZEOF_LONG > 4
 891         /* Extend the sign bit.  This is one way to ensure the result is the
 892          * same across platforms.  The other way would be to return an
 893          * unbounded unsigned long, but the evidence suggests that lots of
 894          * code outside this treats the result as if it were a signed 4-byte
 895          * integer.
 896          */
 897         result |= -(result & (1L << 31));
 898 #endif
 899         return PyInt_FromLong(result);
 900 }
 901
 902
 903 static PyObject *
 904 binascii_hexlify(PyObject *self, PyObject *args)
 905 {
 906         char* argbuf;
 907         Py_ssize_t arglen;
 908         PyObject *retval;
 909         char* retbuf;
 910         Py_ssize_t i, j;
 911
 912         if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
 913                 return NULL;
 914
 915         retval = PyString_FromStringAndSize(NULL, arglen*2);
 916         if (!retval)
 917                 return NULL;
 918         retbuf = PyString_AsString(retval);
 919         if (!retbuf)
 920                 goto finally;
 921
 922         /* make hex version of string, taken from shamodule.c */
 923         for (i=j=0; i < arglen; i++) {
 924                 char c;
 925                 c = (argbuf[i] >> 4) & 0xf;
 926                 c = (c>9) ? c+'a'-10 : c + '0';
 927                 retbuf[j++] = c;
 928                 c = argbuf[i] & 0xf;
 929                 c = (c>9) ? c+'a'-10 : c + '0';
 930                 retbuf[j++] = c;
 931         }
 932         return retval;
 933
 934   finally:
 935         Py_DECREF(retval);
 936         return NULL;
 937 }
 938
 939 PyDoc_STRVAR(doc_hexlify,
 940 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
 941 \n\
 942 This function is also available as \"hexlify()\".");
 943
 944
 945 static int
 946 to_int(int c)
 947 {
 948         if (isdigit(c))
 949                 return c - '0';
 950         else {
 951                 if (isupper(c))
 952                         c = tolower(c);
 953                 if (c >= 'a' && c <= 'f')
 954                         return c - 'a' + 10;
 955         }
 956         return -1;
 957 }
 958
 959
 960 static PyObject *
 961 binascii_unhexlify(PyObject *self, PyObject *args)
 962 {
 963         char* argbuf;
 964         Py_ssize_t arglen;
 965         PyObject *retval;
 966         char* retbuf;
 967         Py_ssize_t i, j;
 968
 969         if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
 970                 return NULL;
 971
 972         /* XXX What should we do about strings with an odd length?  Should
 973          * we add an implicit leading zero, or a trailing zero?  For now,
 974          * raise an exception.
 975          */
 976         if (arglen % 2) {
 977                 PyErr_SetString(PyExc_TypeError, "Odd-length string");
 978                 return NULL;
 979         }
 980
 981         retval = PyString_FromStringAndSize(NULL, (arglen/2));
 982         if (!retval)
 983                 return NULL;
 984         retbuf = PyString_AsString(retval);
 985         if (!retbuf)
 986                 goto finally;
 987
 988         for (i=j=0; i < arglen; i += 2) {
 989                 int top = to_int(Py_CHARMASK(argbuf[i]));
 990                 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
 991                 if (top == -1 || bot == -1) {
 992                         PyErr_SetString(PyExc_TypeError,
 993                                         "Non-hexadecimal digit found");
 994                         goto finally;
 995                 }
 996                 retbuf[j++] = (top << 4) + bot;
 997         }
 998         return retval;
 999
1000   finally:
1001         Py_DECREF(retval);
1002         return NULL;
1003 }
1004
1005 PyDoc_STRVAR(doc_unhexlify,
1006 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1007 \n\
1008 hexstr must contain an even number of hex digits (upper or lower case).\n\
1009 This function is also available as \"unhexlify()\"");
1010
1011 static int table_hex[128] = {
1012   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1013   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1014   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1015    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1016   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1017   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1018   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1019   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1020 };
1021
1022 #define hexval(c) table_hex[(unsigned int)(c)]
1023
1024 #define MAXLINESIZE 76
1025
1026 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1027
1028 static PyObject*
1029 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1030 {
1031         Py_ssize_t in, out;
1032         char ch;
1033         unsigned char *data, *odata;
1034         Py_ssize_t datalen = 0;
1035         PyObject *rv;
1036         static char *kwlist[] = {"data", "header", NULL};
1037         int header = 0;
1038
1039         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1040               &datalen, &header))
1041                 return NULL;
1042
1043         /* We allocate the output same size as input, this is overkill.
1044          * The previous implementation used calloc() so we'll zero out the
1045          * memory here too, since PyMem_Malloc() does not guarantee that.
1046          */
1047         odata = (unsigned char *) PyMem_Malloc(datalen);
1048         if (odata == NULL) {
1049                 PyErr_NoMemory();
1050                 return NULL;
1051         }
1052         memset(odata, 0, datalen);
1053
1054         in = out = 0;
1055         while (in < datalen) {
1056                 if (data[in] == '=') {
1057                         in++;
1058                         if (in >= datalen) break;
1059                         /* Soft line breaks */
1060                         if ((data[in] == '\n') || (data[in] == '\r')) {
1061                                 if (data[in] != '\n') {
1062                                         while (in < datalen && data[in] != '\n') in++;
1063                                 }
1064                                 if (in < datalen) in++;
1065                         }
1066                         else if (data[in] == '=') {
1067                                 /* broken case from broken python qp */
1068                                 odata[out++] = '=';
1069                                 in++;
1070                         }
1071                         else if (((data[in] >= 'A' && data[in] <= 'F') ||
1072                                   (data[in] >= 'a' && data[in] <= 'f') ||
1073                                   (data[in] >= '0' && data[in] <= '9')) &&
1074                                  ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1075                                   (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1076                                   (data[in+1] >= '0' && data[in+1] <= '9'))) {
1077                                 /* hexval */
1078                                 ch = hexval(data[in]) << 4;
1079                                 in++;
1080                                 ch |= hexval(data[in]);
1081                                 in++;
1082                                 odata[out++] = ch;
1083                         }
1084                         else {
1085                           odata[out++] = '=';
1086                         }
1087                 }
1088                 else if (header && data[in] == '_') {
1089                         odata[out++] = ' ';
1090                         in++;
1091                 }
1092                 else {
1093                         odata[out] = data[in];
1094                         in++;
1095                         out++;
1096                 }
1097         }
1098         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1099                 PyMem_Free(odata);
1100                 return NULL;
1101         }
1102         PyMem_Free(odata);
1103         return rv;
1104 }
1105
1106 static int
1107 to_hex (unsigned char ch, unsigned char *s)
1108 {
1109         unsigned int uvalue = ch;
1110
1111         s[1] = "0123456789ABCDEF"[uvalue % 16];
1112         uvalue = (uvalue / 16);
1113         s[0] = "0123456789ABCDEF"[uvalue % 16];
1114         return 0;
1115 }
1116
1117 PyDoc_STRVAR(doc_b2a_qp,
1118 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1119  Encode a string using quoted-printable encoding. \n\
1120 \n\
1121 On encoding, when istext is set, newlines are not encoded, and white \n\
1122 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1123 both encoded.  When quotetabs is set, space and tabs are encoded.");
1124
1125 /* XXX: This is ridiculously complicated to be backward compatible
1126  * (mostly) with the quopri module.  It doesn't re-create the quopri
1127  * module bug where text ending in CRLF has the CR encoded */
1128 static PyObject*
1129 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1130 {
1131         Py_ssize_t in, out;
1132         unsigned char *data, *odata;
1133         Py_ssize_t datalen = 0, odatalen = 0;
1134         PyObject *rv;
1135         unsigned int linelen = 0;
1136         static char *kwlist[] = {"data", "quotetabs", "istext",
1137                                        "header", NULL};
1138         int istext = 1;
1139         int quotetabs = 0;
1140         int header = 0;
1141         unsigned char ch;
1142         int crlf = 0;
1143         unsigned char *p;
1144
1145         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1146               &datalen, &quotetabs, &istext, &header))
1147                 return NULL;
1148
1149         /* See if this string is using CRLF line ends */
1150         /* XXX: this function has the side effect of converting all of
1151          * the end of lines to be the same depending on this detection
1152          * here */
1153         p = (unsigned char *) memchr(data, '\n', datalen);
1154         if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1155                 crlf = 1;
1156
1157         /* First, scan to see how many characters need to be encoded */
1158         in = 0;
1159         while (in < datalen) {
1160                 if ((data[in] > 126) ||
1161                     (data[in] == '=') ||
1162                     (header && data[in] == '_') ||
1163                     ((data[in] == '.') && (linelen == 0) &&
1164                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1165                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1166                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1167                     ((data[in] < 33) &&
1168                      (data[in] != '\r') && (data[in] != '\n') &&
1169                      (quotetabs ||
1170                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1171                 {
1172                         if ((linelen + 3) >= MAXLINESIZE) {
1173                                 linelen = 0;
1174                                 if (crlf)
1175                                         odatalen += 3;
1176                                 else
1177                                         odatalen += 2;
1178                         }
1179                         linelen += 3;
1180                         odatalen += 3;
1181                         in++;
1182                 }
1183                 else {
1184                         if (istext &&
1185                             ((data[in] == '\n') ||
1186                              ((in+1 < datalen) && (data[in] == '\r') &&
1187                              (data[in+1] == '\n'))))
1188                         {
1189                                 linelen = 0;
1190                                 /* Protect against whitespace on end of line */
1191                                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1192                                         odatalen += 2;
1193                                 if (crlf)
1194                                         odatalen += 2;
1195                                 else
1196                                         odatalen += 1;
1197                                 if (data[in] == '\r')
1198                                         in += 2;
1199                                 else
1200                                         in++;
1201                         }
1202                         else {
1203                                 if ((in + 1 != datalen) &&
1204                                     (data[in+1] != '\n') &&
1205                                     (linelen + 1) >= MAXLINESIZE) {
1206                                         linelen = 0;
1207                                         if (crlf)
1208                                                 odatalen += 3;
1209                                         else
1210                                                 odatalen += 2;
1211                                 }
1212                                 linelen++;
1213                                 odatalen++;
1214                                 in++;
1215                         }
1216                 }
1217         }
1218
1219         /* We allocate the output same size as input, this is overkill.
1220          * The previous implementation used calloc() so we'll zero out the
1221          * memory here too, since PyMem_Malloc() does not guarantee that.
1222          */
1223         odata = (unsigned char *) PyMem_Malloc(odatalen);
1224         if (odata == NULL) {
1225                 PyErr_NoMemory();
1226                 return NULL;
1227         }
1228         memset(odata, 0, odatalen);
1229
1230         in = out = linelen = 0;
1231         while (in < datalen) {
1232                 if ((data[in] > 126) ||
1233                     (data[in] == '=') ||
1234                     (header && data[in] == '_') ||
1235                     ((data[in] == '.') && (linelen == 0) &&
1236                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1237                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1238                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1239                     ((data[in] < 33) &&
1240                      (data[in] != '\r') && (data[in] != '\n') &&
1241                      (quotetabs ||
1242                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1243                 {
1244                         if ((linelen + 3 )>= MAXLINESIZE) {
1245                                 odata[out++] = '=';
1246                                 if (crlf) odata[out++] = '\r';
1247                                 odata[out++] = '\n';
1248                                 linelen = 0;
1249                         }
1250                         odata[out++] = '=';
1251                         to_hex(data[in], &odata[out]);
1252                         out += 2;
1253                         in++;
1254                         linelen += 3;
1255                 }
1256                 else {
1257                         if (istext &&
1258                             ((data[in] == '\n') ||
1259                              ((in+1 < datalen) && (data[in] == '\r') &&
1260                              (data[in+1] == '\n'))))
1261                         {
1262                                 linelen = 0;
1263                                 /* Protect against whitespace on end of line */
1264                                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1265                                         ch = odata[out-1];
1266                                         odata[out-1] = '=';
1267                                         to_hex(ch, &odata[out]);
1268                                         out += 2;
1269                                 }
1270
1271                                 if (crlf) odata[out++] = '\r';
1272                                 odata[out++] = '\n';
1273                                 if (data[in] == '\r')
1274                                         in += 2;
1275                                 else
1276                                         in++;
1277                         }
1278                         else {
1279                                 if ((in + 1 != datalen) &&
1280                                     (data[in+1] != '\n') &&
1281                                     (linelen + 1) >= MAXLINESIZE) {
1282                                         odata[out++] = '=';
1283                                         if (crlf) odata[out++] = '\r';
1284                                         odata[out++] = '\n';
1285                                         linelen = 0;
1286                                 }
1287                                 linelen++;
1288                                 if (header && data[in] == ' ') {
1289                                         odata[out++] = '_';
1290                                         in++;
1291                                 }
1292                                 else {
1293                                         odata[out++] = data[in++];
1294                                 }
1295                         }
1296                 }
1297         }
1298         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1299                 PyMem_Free(odata);
1300                 return NULL;
1301         }
1302         PyMem_Free(odata);
1303         return rv;
1304 }
1305
1306 /* List of functions defined in the module */
1307
1308 static struct PyMethodDef binascii_module_methods[] = {
1309         {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1310         {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1311         {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1312         {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1313         {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1314         {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1315         {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1316         {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1317         {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1318         {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1319         {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1320         {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1321          doc_rledecode_hqx},
1322         {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1323         {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1324         {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1325           doc_a2b_qp},
1326         {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1327           doc_b2a_qp},
1328         {NULL, NULL}                         /* sentinel */
1329 };
1330
1331
1332 /* Initialization function for the module (*must* be called initbinascii) */
1333 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1334
1335 PyMODINIT_FUNC
1336 initbinascii(void)
1337 {
1338         PyObject *m, *d, *x;
1339
1340         /* Create the module and add the functions */
1341         m = Py_InitModule("binascii", binascii_module_methods);
1342         if (m == NULL)
1343                 return;
1344
1345         d = PyModule_GetDict(m);
1346         x = PyString_FromString(doc_binascii);
1347         PyDict_SetItemString(d, "__doc__", x);
1348         Py_XDECREF(x);
1349
1350         Error = PyErr_NewException("binascii.Error", NULL, NULL);
1351         PyDict_SetItemString(d, "Error", Error);
1352         Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1353         PyDict_SetItemString(d, "Incomplete", Incomplete);
1354 }