Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56
  57 #include "Python.h"
  58
  59 static PyObject *Error;
  60 static PyObject *Incomplete;
  61
  62 /*
  63 ** hqx lookup table, ascii->binary.
  64 */
  65
  66 #define RUNCHAR 0x90
  67
  68 #define DONE 0x7F
  69 #define SKIP 0x7E
  70 #define FAIL 0x7D
  71
  72 static unsigned char table_a2b_hqx[256] = {
  73 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  74 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  75 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  76 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  77 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  78 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  79 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  80 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  81 /*              !     "     #     $     %     &     '   */
  82 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  83 /*        (     )     *     +     ,     -     .     /   */
  84 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  85 /*        0     1     2     3     4     5     6     7   */
  86 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  87 /*        8     9     :     ;     <     =     >     ?   */
  88 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  89 /*        @     A     B     C     D     E     F     G   */
  90 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  91 /*        H     I     J     K     L     M     N     O   */
  92 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  93 /*        P     Q     R     S     T     U     V     W   */
  94 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  95 /*        X     Y     Z     [     \     ]     ^     _   */
  96 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
  97 /*        `     a     b     c     d     e     f     g   */
  98 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
  99 /*        h     i     j     k     l     m     n     o   */
 100 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 101 /*        p     q     r     s     t     u     v     w   */
 102 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 103 /*        x     y     z     {     |     }     ~    ^?   */
 104 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 105 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 106         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 107         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 108         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121 };
 122
 123 static unsigned char table_b2a_hqx[] =
 124 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 125
 126 static char table_a2b_base64[] = {
 127         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 128         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 129         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 130         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 131         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 132         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 133         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 134         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 135 };
 136
 137 #define BASE64_PAD '='
 138
 139 /* Max binary chunk size; limited only by available memory */
 140 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
 141
 142 static unsigned char table_b2a_base64[] =
 143 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 144
 145
 146
 147 static unsigned short crctab_hqx[256] = {
 148         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 149         0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 150         0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 151         0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 152         0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 153         0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 154         0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 155         0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 156         0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 157         0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 158         0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 159         0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 160         0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 161         0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 162         0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 163         0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 164         0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 165         0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 166         0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 167         0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 168         0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 169         0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 170         0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 171         0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 172         0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 173         0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 174         0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 175         0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 176         0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 177         0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 178         0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 179         0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 180 };
 181
 182 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 183
 184 static PyObject *
 185 binascii_a2b_uu(PyObject *self, PyObject *args)
 186 {
 187         unsigned char *ascii_data, *bin_data;
 188         int leftbits = 0;
 189         unsigned char this_ch;
 190         unsigned int leftchar = 0;
 191         PyObject *rv;
 192         int ascii_len, bin_len;
 193
 194         if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
 195                 return NULL;
 196
 197         /* First byte: binary data length (in bytes) */
 198         bin_len = (*ascii_data++ - ' ') & 077;
 199         ascii_len--;
 200
 201         /* Allocate the buffer */
 202         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 203                 return NULL;
 204         bin_data = (unsigned char *)PyString_AsString(rv);
 205
 206         for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 207                 /* XXX is it really best to add NULs if there's no more data */
 208                 this_ch = (ascii_len > 0) ? *ascii_data : 0;
 209                 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 210                         /*
 211                         ** Whitespace. Assume some spaces got eaten at
 212                         ** end-of-line. (We check this later)
 213                         */
 214                         this_ch = 0;
 215                 } else {
 216                         /* Check the character for legality
 217                         ** The 64 in stead of the expected 63 is because
 218                         ** there are a few uuencodes out there that use
 219                         ** '`' as zero instead of space.
 220                         */
 221                         if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 222                                 PyErr_SetString(Error, "Illegal char");
 223                                 Py_DECREF(rv);
 224                                 return NULL;
 225                         }
 226                         this_ch = (this_ch - ' ') & 077;
 227                 }
 228                 /*
 229                 ** Shift it in on the low end, and see if there's
 230                 ** a byte ready for output.
 231                 */
 232                 leftchar = (leftchar << 6) | (this_ch);
 233                 leftbits += 6;
 234                 if ( leftbits >= 8 ) {
 235                         leftbits -= 8;
 236                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 237                         leftchar &= ((1 << leftbits) - 1);
 238                         bin_len--;
 239                 }
 240         }
 241         /*
 242         ** Finally, check that if there's anything left on the line
 243         ** that it's whitespace only.
 244         */
 245         while( ascii_len-- > 0 ) {
 246                 this_ch = *ascii_data++;
 247                 /* Extra '`' may be written as padding in some cases */
 248                 if ( this_ch != ' ' && this_ch != ' '+64 &&
 249                      this_ch != '\n' && this_ch != '\r' ) {
 250                         PyErr_SetString(Error, "Trailing garbage");
 251                         Py_DECREF(rv);
 252                         return NULL;
 253                 }
 254         }
 255         return rv;
 256 }
 257
 258 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 259
 260 static PyObject *
 261 binascii_b2a_uu(PyObject *self, PyObject *args)
 262 {
 263         unsigned char *ascii_data, *bin_data;
 264         int leftbits = 0;
 265         unsigned char this_ch;
 266         unsigned int leftchar = 0;
 267         PyObject *rv;
 268         int bin_len;
 269
 270         if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
 271                 return NULL;
 272         if ( bin_len > 45 ) {
 273                 /* The 45 is a limit that appears in all uuencode's */
 274                 PyErr_SetString(Error, "At most 45 bytes at once");
 275                 return NULL;
 276         }
 277
 278         /* We're lazy and allocate to much (fixed up later) */
 279         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
 280                 return NULL;
 281         ascii_data = (unsigned char *)PyString_AsString(rv);
 282
 283         /* Store the length */
 284         *ascii_data++ = ' ' + (bin_len & 077);
 285
 286         for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 287                 /* Shift the data (or padding) into our buffer */
 288                 if ( bin_len > 0 )      /* Data */
 289                         leftchar = (leftchar << 8) | *bin_data;
 290                 else                    /* Padding */
 291                         leftchar <<= 8;
 292                 leftbits += 8;
 293
 294                 /* See if there are 6-bit groups ready */
 295                 while ( leftbits >= 6 ) {
 296                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 297                         leftbits -= 6;
 298                         *ascii_data++ = this_ch + ' ';
 299                 }
 300         }
 301         *ascii_data++ = '\n';   /* Append a courtesy newline */
 302
 303         _PyString_Resize(&rv, (ascii_data -
 304                                (unsigned char *)PyString_AsString(rv)));
 305         return rv;
 306 }
 307
 308
 309 static int
 310 binascii_find_valid(unsigned char *s, int slen, int num)
 311 {
 312         /* Finds & returns the (num+1)th
 313         ** valid character for base64, or -1 if none.
 314         */
 315
 316         int ret = -1;
 317         unsigned char c, b64val;
 318
 319         while ((slen > 0) && (ret == -1)) {
 320                 c = *s;
 321                 b64val = table_a2b_base64[c & 0x7f];
 322                 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 323                         if (num == 0)
 324                                 ret = *s;
 325                         num--;
 326                 }
 327
 328                 s++;
 329                 slen--;
 330         }
 331         return ret;
 332 }
 333
 334 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 335
 336 static PyObject *
 337 binascii_a2b_base64(PyObject *self, PyObject *args)
 338 {
 339         unsigned char *ascii_data, *bin_data;
 340         int leftbits = 0;
 341         unsigned char this_ch;
 342         unsigned int leftchar = 0;
 343         PyObject *rv;
 344         int ascii_len, bin_len;
 345         int quad_pos = 0;
 346
 347         if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
 348                 return NULL;
 349
 350         bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 351
 352         /* Allocate the buffer */
 353         if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
 354                 return NULL;
 355         bin_data = (unsigned char *)PyString_AsString(rv);
 356         bin_len = 0;
 357
 358         for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 359                 this_ch = *ascii_data;
 360
 361                 if (this_ch > 0x7f ||
 362                     this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 363                         continue;
 364
 365                 /* Check for pad sequences and ignore
 366                 ** the invalid ones.
 367                 */
 368                 if (this_ch == BASE64_PAD) {
 369                         if ( (quad_pos < 2) ||
 370                              ((quad_pos == 2) &&
 371                               (binascii_find_valid(ascii_data, ascii_len, 1)
 372                                != BASE64_PAD)) )
 373                         {
 374                                 continue;
 375                         }
 376                         else {
 377                                 /* A pad sequence means no more input.
 378                                 ** We've already interpreted the data
 379                                 ** from the quad at this point.
 380                                 */
 381                                 leftbits = 0;
 382                                 break;
 383                         }
 384                 }
 385
 386                 this_ch = table_a2b_base64[*ascii_data];
 387                 if ( this_ch == (unsigned char) -1 )
 388                         continue;
 389
 390                 /*
 391                 ** Shift it in on the low end, and see if there's
 392                 ** a byte ready for output.
 393                 */
 394                 quad_pos = (quad_pos + 1) & 0x03;
 395                 leftchar = (leftchar << 6) | (this_ch);
 396                 leftbits += 6;
 397
 398                 if ( leftbits >= 8 ) {
 399                         leftbits -= 8;
 400                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 401                         bin_len++;
 402                         leftchar &= ((1 << leftbits) - 1);
 403                 }
 404         }
 405
 406         if (leftbits != 0) {
 407                 PyErr_SetString(Error, "Incorrect padding");
 408                 Py_DECREF(rv);
 409                 return NULL;
 410         }
 411
 412         /* And set string size correctly. If the result string is empty
 413         ** (because the input was all invalid) return the shared empty
 414         ** string instead; _PyString_Resize() won't do this for us.
 415         */
 416         if (bin_len > 0)
 417                 _PyString_Resize(&rv, bin_len);
 418         else {
 419                 Py_DECREF(rv);
 420                 rv = PyString_FromString("");
 421         }
 422         return rv;
 423 }
 424
 425 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 426
 427 static PyObject *
 428 binascii_b2a_base64(PyObject *self, PyObject *args)
 429 {
 430         unsigned char *ascii_data, *bin_data;
 431         int leftbits = 0;
 432         unsigned char this_ch;
 433         unsigned int leftchar = 0;
 434         PyObject *rv;
 435         int bin_len;
 436
 437         if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
 438                 return NULL;
 439         if ( bin_len > BASE64_MAXBIN ) {
 440                 PyErr_SetString(Error, "Too much data for base64 line");
 441                 return NULL;
 442         }
 443
 444         /* We're lazy and allocate too much (fixed up later).
 445            "+3" leaves room for up to two pad characters and a trailing
 446            newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 447         if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
 448                 return NULL;
 449         ascii_data = (unsigned char *)PyString_AsString(rv);
 450
 451         for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 452                 /* Shift the data into our buffer */
 453                 leftchar = (leftchar << 8) | *bin_data;
 454                 leftbits += 8;
 455
 456                 /* See if there are 6-bit groups ready */
 457                 while ( leftbits >= 6 ) {
 458                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 459                         leftbits -= 6;
 460                         *ascii_data++ = table_b2a_base64[this_ch];
 461                 }
 462         }
 463         if ( leftbits == 2 ) {
 464                 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 465                 *ascii_data++ = BASE64_PAD;
 466                 *ascii_data++ = BASE64_PAD;
 467         } else if ( leftbits == 4 ) {
 468                 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 469                 *ascii_data++ = BASE64_PAD;
 470         }
 471         *ascii_data++ = '\n';   /* Append a courtesy newline */
 472
 473         _PyString_Resize(&rv, (ascii_data -
 474                                (unsigned char *)PyString_AsString(rv)));
 475         return rv;
 476 }
 477
 478 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 479
 480 static PyObject *
 481 binascii_a2b_hqx(PyObject *self, PyObject *args)
 482 {
 483         unsigned char *ascii_data, *bin_data;
 484         int leftbits = 0;
 485         unsigned char this_ch;
 486         unsigned int leftchar = 0;
 487         PyObject *rv;
 488         int len;
 489         int done = 0;
 490
 491         if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 492                 return NULL;
 493
 494         /* Allocate a string that is too big (fixed later)
 495            Add two to the initial length to prevent interning which
 496            would preclude subsequent resizing.  */
 497         if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
 498                 return NULL;
 499         bin_data = (unsigned char *)PyString_AsString(rv);
 500
 501         for( ; len > 0 ; len--, ascii_data++ ) {
 502                 /* Get the byte and look it up */
 503                 this_ch = table_a2b_hqx[*ascii_data];
 504                 if ( this_ch == SKIP )
 505                         continue;
 506                 if ( this_ch == FAIL ) {
 507                         PyErr_SetString(Error, "Illegal char");
 508                         Py_DECREF(rv);
 509                         return NULL;
 510                 }
 511                 if ( this_ch == DONE ) {
 512                         /* The terminating colon */
 513                         done = 1;
 514                         break;
 515                 }
 516
 517                 /* Shift it into the buffer and see if any bytes are ready */
 518                 leftchar = (leftchar << 6) | (this_ch);
 519                 leftbits += 6;
 520                 if ( leftbits >= 8 ) {
 521                         leftbits -= 8;
 522                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 523                         leftchar &= ((1 << leftbits) - 1);
 524                 }
 525         }
 526
 527         if ( leftbits && !done ) {
 528                 PyErr_SetString(Incomplete,
 529                                 "String has incomplete number of bytes");
 530                 Py_DECREF(rv);
 531                 return NULL;
 532         }
 533         _PyString_Resize(
 534                 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
 535         if (rv) {
 536                 PyObject *rrv = Py_BuildValue("Oi", rv, done);
 537                 Py_DECREF(rv);
 538                 return rrv;
 539         }
 540
 541         return NULL;
 542 }
 543
 544 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 545
 546 static PyObject *
 547 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 548 {
 549         unsigned char *in_data, *out_data;
 550         PyObject *rv;
 551         unsigned char ch;
 552         int in, inend, len;
 553
 554         if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
 555                 return NULL;
 556
 557         /* Worst case: output is twice as big as input (fixed later) */
 558         if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
 559                 return NULL;
 560         out_data = (unsigned char *)PyString_AsString(rv);
 561
 562         for( in=0; in<len; in++) {
 563                 ch = in_data[in];
 564                 if ( ch == RUNCHAR ) {
 565                         /* RUNCHAR. Escape it. */
 566                         *out_data++ = RUNCHAR;
 567                         *out_data++ = 0;
 568                 } else {
 569                         /* Check how many following are the same */
 570                         for(inend=in+1;
 571                             inend<len && in_data[inend] == ch &&
 572                                     inend < in+255;
 573                             inend++) ;
 574                         if ( inend - in > 3 ) {
 575                                 /* More than 3 in a row. Output RLE. */
 576                                 *out_data++ = ch;
 577                                 *out_data++ = RUNCHAR;
 578                                 *out_data++ = inend-in;
 579                                 in = inend-1;
 580                         } else {
 581                                 /* Less than 3. Output the byte itself */
 582                                 *out_data++ = ch;
 583                         }
 584                 }
 585         }
 586         _PyString_Resize(&rv, (out_data -
 587                                (unsigned char *)PyString_AsString(rv)));
 588         return rv;
 589 }
 590
 591 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 592
 593 static PyObject *
 594 binascii_b2a_hqx(PyObject *self, PyObject *args)
 595 {
 596         unsigned char *ascii_data, *bin_data;
 597         int leftbits = 0;
 598         unsigned char this_ch;
 599         unsigned int leftchar = 0;
 600         PyObject *rv;
 601         int len;
 602
 603         if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
 604                 return NULL;
 605
 606         /* Allocate a buffer that is at least large enough */
 607         if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
 608                 return NULL;
 609         ascii_data = (unsigned char *)PyString_AsString(rv);
 610
 611         for( ; len > 0 ; len--, bin_data++ ) {
 612                 /* Shift into our buffer, and output any 6bits ready */
 613                 leftchar = (leftchar << 8) | *bin_data;
 614                 leftbits += 8;
 615                 while ( leftbits >= 6 ) {
 616                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 617                         leftbits -= 6;
 618                         *ascii_data++ = table_b2a_hqx[this_ch];
 619                 }
 620         }
 621         /* Output a possible runt byte */
 622         if ( leftbits ) {
 623                 leftchar <<= (6-leftbits);
 624                 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 625         }
 626         _PyString_Resize(&rv, (ascii_data -
 627                                (unsigned char *)PyString_AsString(rv)));
 628         return rv;
 629 }
 630
 631 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 632
 633 static PyObject *
 634 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 635 {
 636         unsigned char *in_data, *out_data;
 637         unsigned char in_byte, in_repeat;
 638         PyObject *rv;
 639         int in_len, out_len, out_len_left;
 640
 641         if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
 642                 return NULL;
 643
 644         /* Empty string is a special case */
 645         if ( in_len == 0 )
 646                 return Py_BuildValue("s", "");
 647
 648         /* Allocate a buffer of reasonable size. Resized when needed */
 649         out_len = in_len*2;
 650         if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
 651                 return NULL;
 652         out_len_left = out_len;
 653         out_data = (unsigned char *)PyString_AsString(rv);
 654
 655         /*
 656         ** We need two macros here to get/put bytes and handle
 657         ** end-of-buffer for input and output strings.
 658         */
 659 #define INBYTE(b) \
 660         do { \
 661                  if ( --in_len < 0 ) { \
 662                            PyErr_SetString(Incomplete, ""); \
 663                            Py_DECREF(rv); \
 664                            return NULL; \
 665                  } \
 666                  b = *in_data++; \
 667         } while(0)
 668
 669 #define OUTBYTE(b) \
 670         do { \
 671                  if ( --out_len_left < 0 ) { \
 672                           _PyString_Resize(&rv, 2*out_len); \
 673                           if ( rv == NULL ) return NULL; \
 674                           out_data = (unsigned char *)PyString_AsString(rv) \
 675                                                                  + out_len; \
 676                           out_len_left = out_len-1; \
 677                           out_len = out_len * 2; \
 678                  } \
 679                  *out_data++ = b; \
 680         } while(0)
 681
 682                 /*
 683                 ** Handle first byte separately (since we have to get angry
 684                 ** in case of an orphaned RLE code).
 685                 */
 686                 INBYTE(in_byte);
 687
 688         if (in_byte == RUNCHAR) {
 689                 INBYTE(in_repeat);
 690                 if (in_repeat != 0) {
 691                         /* Note Error, not Incomplete (which is at the end
 692                         ** of the string only). This is a programmer error.
 693                         */
 694                         PyErr_SetString(Error, "Orphaned RLE code at start");
 695                         Py_DECREF(rv);
 696                         return NULL;
 697                 }
 698                 OUTBYTE(RUNCHAR);
 699         } else {
 700                 OUTBYTE(in_byte);
 701         }
 702
 703         while( in_len > 0 ) {
 704                 INBYTE(in_byte);
 705
 706                 if (in_byte == RUNCHAR) {
 707                         INBYTE(in_repeat);
 708                         if ( in_repeat == 0 ) {
 709                                 /* Just an escaped RUNCHAR value */
 710                                 OUTBYTE(RUNCHAR);
 711                         } else {
 712                                 /* Pick up value and output a sequence of it */
 713                                 in_byte = out_data[-1];
 714                                 while ( --in_repeat > 0 )
 715                                         OUTBYTE(in_byte);
 716                         }
 717                 } else {
 718                         /* Normal byte */
 719                         OUTBYTE(in_byte);
 720                 }
 721         }
 722         _PyString_Resize(&rv, (out_data -
 723                                (unsigned char *)PyString_AsString(rv)));
 724         return rv;
 725 }
 726
 727 PyDoc_STRVAR(doc_crc_hqx,
 728 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 729
 730 static PyObject *
 731 binascii_crc_hqx(PyObject *self, PyObject *args)
 732 {
 733         unsigned char *bin_data;
 734         unsigned int crc;
 735         int len;
 736
 737         if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
 738                 return NULL;
 739
 740         while(len--) {
 741                 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 742         }
 743
 744         return Py_BuildValue("i", crc);
 745 }
 746
 747 PyDoc_STRVAR(doc_crc32,
 748 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 749
 750 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 751     Also known as: ISO 3307
 752 **********************************************************************|
 753 *                                                                    *|
 754 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 755 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 756 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 757 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 758 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 759 * this polynomial is or will be included in CCITT V.41, which        *|
 760 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 761 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 762 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 763 *                                                                    *|
 764 **********************************************************************|
 765
 766  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 767  code or tables extracted from it, as desired without restriction.
 768
 769  First, the polynomial itself and its table of feedback terms.  The
 770  polynomial is
 771  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 772  Note that we take it "backwards" and put the highest-order term in
 773  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 774  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 775  the MSB being 1.
 776
 777  Note that the usual hardware shift register implementation, which
 778  is what we're using (we're merely optimizing it by doing eight-bit
 779  chunks at a time) shifts bits into the lowest-order term.  In our
 780  implementation, that means shifting towards the right.  Why do we
 781  do it this way?  Because the calculated CRC must be transmitted in
 782  order from highest-order term to lowest-order term.  UARTs transmit
 783  characters in order from LSB to MSB.  By storing the CRC this way,
 784  we hand it to the UART in the order low-byte to high-byte; the UART
 785  sends each low-bit to hight-bit; and the result is transmission bit
 786  by bit from highest- to lowest-order term without requiring any bit
 787  shuffling on our part.  Reception works similarly.
 788
 789  The feedback terms table consists of 256, 32-bit entries.  Notes:
 790
 791   1. The table can be generated at runtime if desired; code to do so
 792      is shown later.  It might not be obvious, but the feedback
 793      terms simply represent the results of eight shift/xor opera-
 794      tions for all combinations of data and CRC register values.
 795
 796   2. The CRC accumulation logic is the same for all CRC polynomials,
 797      be they sixteen or thirty-two bits wide.  You simply choose the
 798      appropriate table.  Alternatively, because the table can be
 799      generated at runtime, you can start by generating the table for
 800      the polynomial in question and use exactly the same "updcrc",
 801      if your application needn't simultaneously handle two CRC
 802      polynomials.  (Note, however, that XMODEM is strange.)
 803
 804   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 805      of course, 32-bit entries work OK if the high 16 bits are zero.
 806
 807   4. The values must be right-shifted by eight bits by the "updcrc"
 808      logic; the shift must be unsigned (bring in zeroes).  On some
 809      hardware you could probably optimize the shift in assembler by
 810      using byte-swap instructions.
 811 ********************************************************************/
 812
 813 static unsigned long crc_32_tab[256] = {
 814 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
 815 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
 816 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
 817 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
 818 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
 819 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
 820 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
 821 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
 822 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
 823 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
 824 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
 825 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
 826 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
 827 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
 828 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
 829 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
 830 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
 831 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
 832 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
 833 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
 834 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
 835 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
 836 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
 837 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
 838 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
 839 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
 840 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
 841 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
 842 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
 843 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
 844 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
 845 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
 846 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
 847 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
 848 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
 849 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
 850 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
 851 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
 852 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
 853 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
 854 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
 855 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
 856 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
 857 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
 858 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
 859 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
 860 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
 861 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
 862 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
 863 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
 864 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
 865 0x2d02ef8dUL
 866 };
 867
 868 static PyObject *
 869 binascii_crc32(PyObject *self, PyObject *args)
 870 { /* By Jim Ahlstrom; All rights transferred to CNRI */
 871         unsigned char *bin_data;
 872         unsigned long crc = 0UL;        /* initial value of CRC */
 873         int len;
 874         long result;
 875
 876         if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
 877                 return NULL;
 878
 879         crc = ~ crc;
 880 #if SIZEOF_LONG > 4
 881         /* only want the trailing 32 bits */
 882         crc &= 0xFFFFFFFFUL;
 883 #endif
 884         while (len--)
 885                 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
 886                 /* Note:  (crc >> 8) MUST zero fill on left */
 887
 888         result = (long)(crc ^ 0xFFFFFFFFUL);
 889 #if SIZEOF_LONG > 4
 890         /* Extend the sign bit.  This is one way to ensure the result is the
 891          * same across platforms.  The other way would be to return an
 892          * unbounded unsigned long, but the evidence suggests that lots of
 893          * code outside this treats the result as if it were a signed 4-byte
 894          * integer.
 895          */
 896         result |= -(result & (1L << 31));
 897 #endif
 898         return PyInt_FromLong(result);
 899 }
 900
 901
 902 static PyObject *
 903 binascii_hexlify(PyObject *self, PyObject *args)
 904 {
 905         char* argbuf;
 906         int arglen;
 907         PyObject *retval;
 908         char* retbuf;
 909         int i, j;
 910
 911         if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
 912                 return NULL;
 913
 914         retval = PyString_FromStringAndSize(NULL, arglen*2);
 915         if (!retval)
 916                 return NULL;
 917         retbuf = PyString_AsString(retval);
 918         if (!retbuf)
 919                 goto finally;
 920
 921         /* make hex version of string, taken from shamodule.c */
 922         for (i=j=0; i < arglen; i++) {
 923                 char c;
 924                 c = (argbuf[i] >> 4) & 0xf;
 925                 c = (c>9) ? c+'a'-10 : c + '0';
 926                 retbuf[j++] = c;
 927                 c = argbuf[i] & 0xf;
 928                 c = (c>9) ? c+'a'-10 : c + '0';
 929                 retbuf[j++] = c;
 930         }
 931         return retval;
 932
 933   finally:
 934         Py_DECREF(retval);
 935         return NULL;
 936 }
 937
 938 PyDoc_STRVAR(doc_hexlify,
 939 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
 940 \n\
 941 This function is also available as \"hexlify()\".");
 942
 943
 944 static int
 945 to_int(int c)
 946 {
 947         if (isdigit(c))
 948                 return c - '0';
 949         else {
 950                 if (isupper(c))
 951                         c = tolower(c);
 952                 if (c >= 'a' && c <= 'f')
 953                         return c - 'a' + 10;
 954         }
 955         return -1;
 956 }
 957
 958
 959 static PyObject *
 960 binascii_unhexlify(PyObject *self, PyObject *args)
 961 {
 962         char* argbuf;
 963         int arglen;
 964         PyObject *retval;
 965         char* retbuf;
 966         int i, j;
 967
 968         if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
 969                 return NULL;
 970
 971         /* XXX What should we do about strings with an odd length?  Should
 972          * we add an implicit leading zero, or a trailing zero?  For now,
 973          * raise an exception.
 974          */
 975         if (arglen % 2) {
 976                 PyErr_SetString(PyExc_TypeError, "Odd-length string");
 977                 return NULL;
 978         }
 979
 980         retval = PyString_FromStringAndSize(NULL, (arglen/2));
 981         if (!retval)
 982                 return NULL;
 983         retbuf = PyString_AsString(retval);
 984         if (!retbuf)
 985                 goto finally;
 986
 987         for (i=j=0; i < arglen; i += 2) {
 988                 int top = to_int(Py_CHARMASK(argbuf[i]));
 989                 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
 990                 if (top == -1 || bot == -1) {
 991                         PyErr_SetString(PyExc_TypeError,
 992                                         "Non-hexadecimal digit found");
 993                         goto finally;
 994                 }
 995                 retbuf[j++] = (top << 4) + bot;
 996         }
 997         return retval;
 998
 999   finally:
1000         Py_DECREF(retval);
1001         return NULL;
1002 }
1003
1004 PyDoc_STRVAR(doc_unhexlify,
1005 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1006 \n\
1007 hexstr must contain an even number of hex digits (upper or lower case).\n\
1008 This function is also available as \"unhexlify()\"");
1009
1010 static int table_hex[128] = {
1011   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1012   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1013   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1014    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1015   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1016   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1017   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1018   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1019 };
1020
1021 #define hexval(c) table_hex[(unsigned int)(c)]
1022
1023 #define MAXLINESIZE 76
1024
1025 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1026
1027 static PyObject*
1028 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1029 {
1030         unsigned int in, out;
1031         char ch;
1032         unsigned char *data, *odata;
1033         unsigned int datalen = 0;
1034         PyObject *rv;
1035         static const char *kwlist[] = {"data", "header", NULL};
1036         int header = 0;
1037
1038         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1039               &datalen, &header))
1040                 return NULL;
1041
1042         /* We allocate the output same size as input, this is overkill.
1043          * The previous implementation used calloc() so we'll zero out the
1044          * memory here too, since PyMem_Malloc() does not guarantee that.
1045          */
1046         odata = (unsigned char *) PyMem_Malloc(datalen);
1047         if (odata == NULL) {
1048                 PyErr_NoMemory();
1049                 return NULL;
1050         }
1051         memset(odata, 0, datalen);
1052
1053         in = out = 0;
1054         while (in < datalen) {
1055                 if (data[in] == '=') {
1056                         in++;
1057                         if (in >= datalen) break;
1058                         /* Soft line breaks */
1059                         if ((data[in] == '\n') || (data[in] == '\r') ||
1060                             (data[in] == ' ') || (data[in] == '\t')) {
1061                                 if (data[in] != '\n') {
1062                                         while (in < datalen && data[in] != '\n') in++;
1063                                 }
1064                                 if (in < datalen) in++;
1065                         }
1066                         else if (data[in] == '=') {
1067                                 /* broken case from broken python qp */
1068                                 odata[out++] = '=';
1069                                 in++;
1070                         }
1071                         else if (((data[in] >= 'A' && data[in] <= 'F') ||
1072                                   (data[in] >= 'a' && data[in] <= 'f') ||
1073                                   (data[in] >= '0' && data[in] <= '9')) &&
1074                                  ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1075                                   (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1076                                   (data[in+1] >= '0' && data[in+1] <= '9'))) {
1077                                 /* hexval */
1078                                 ch = hexval(data[in]) << 4;
1079                                 in++;
1080                                 ch |= hexval(data[in]);
1081                                 in++;
1082                                 odata[out++] = ch;
1083                         }
1084                         else {
1085                           odata[out++] = '=';
1086                         }
1087                 }
1088                 else if (header && data[in] == '_') {
1089                         odata[out++] = ' ';
1090                         in++;
1091                 }
1092                 else {
1093                         odata[out] = data[in];
1094                         in++;
1095                         out++;
1096                 }
1097         }
1098         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1099                 PyMem_Free(odata);
1100                 return NULL;
1101         }
1102         PyMem_Free(odata);
1103         return rv;
1104 }
1105
1106 static int
1107 to_hex (unsigned char ch, unsigned char *s)
1108 {
1109         unsigned int uvalue = ch;
1110
1111         s[1] = "0123456789ABCDEF"[uvalue % 16];
1112         uvalue = (uvalue / 16);
1113         s[0] = "0123456789ABCDEF"[uvalue % 16];
1114         return 0;
1115 }
1116
1117 PyDoc_STRVAR(doc_b2a_qp,
1118 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1119  Encode a string using quoted-printable encoding. \n\
1120 \n\
1121 On encoding, when istext is set, newlines are not encoded, and white \n\
1122 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1123 both encoded.  When quotetabs is set, space and tabs are encoded.");
1124
1125 /* XXX: This is ridiculously complicated to be backward compatible
1126  * (mostly) with the quopri module.  It doesn't re-create the quopri
1127  * module bug where text ending in CRLF has the CR encoded */
1128 static PyObject*
1129 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1130 {
1131         unsigned int in, out;
1132         unsigned char *data, *odata;
1133         unsigned int datalen = 0, odatalen = 0;
1134         PyObject *rv;
1135         unsigned int linelen = 0;
1136         static const char *kwlist[] = {"data", "quotetabs", "istext",
1137                                        "header", NULL};
1138         int istext = 1;
1139         int quotetabs = 0;
1140         int header = 0;
1141         unsigned char ch;
1142         int crlf = 0;
1143         unsigned char *p;
1144
1145         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1146               &datalen, &quotetabs, &istext, &header))
1147                 return NULL;
1148
1149         /* See if this string is using CRLF line ends */
1150         /* XXX: this function has the side effect of converting all of
1151          * the end of lines to be the same depending on this detection
1152          * here */
1153         p = (unsigned char *) strchr((char *)data, '\n');
1154         if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1155                 crlf = 1;
1156
1157         /* First, scan to see how many characters need to be encoded */
1158         in = 0;
1159         while (in < datalen) {
1160                 if ((data[in] > 126) ||
1161                     (data[in] == '=') ||
1162                     (header && data[in] == '_') ||
1163                     ((data[in] == '.') && (linelen == 1)) ||
1164                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1165                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1166                     ((data[in] < 33) &&
1167                      (data[in] != '\r') && (data[in] != '\n') &&
1168                      (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1169                 {
1170                         if ((linelen + 3) >= MAXLINESIZE) {
1171                                 linelen = 0;
1172                                 if (crlf)
1173                                         odatalen += 3;
1174                                 else
1175                                         odatalen += 2;
1176                         }
1177                         linelen += 3;
1178                         odatalen += 3;
1179                         in++;
1180                 }
1181                 else {
1182                         if (istext &&
1183                             ((data[in] == '\n') ||
1184                              ((in+1 < datalen) && (data[in] == '\r') &&
1185                              (data[in+1] == '\n'))))
1186                         {
1187                                 linelen = 0;
1188                                 /* Protect against whitespace on end of line */
1189                                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1190                                         odatalen += 2;
1191                                 if (crlf)
1192                                         odatalen += 2;
1193                                 else
1194                                         odatalen += 1;
1195                                 if (data[in] == '\r')
1196                                         in += 2;
1197                                 else
1198                                         in++;
1199                         }
1200                         else {
1201                                 if ((in + 1 != datalen) &&
1202                                     (data[in+1] != '\n') &&
1203                                     (linelen + 1) >= MAXLINESIZE) {
1204                                         linelen = 0;
1205                                         if (crlf)
1206                                                 odatalen += 3;
1207                                         else
1208                                                 odatalen += 2;
1209                                 }
1210                                 linelen++;
1211                                 odatalen++;
1212                                 in++;
1213                         }
1214                 }
1215         }
1216
1217         /* We allocate the output same size as input, this is overkill.
1218          * The previous implementation used calloc() so we'll zero out the
1219          * memory here too, since PyMem_Malloc() does not guarantee that.
1220          */
1221         odata = (unsigned char *) PyMem_Malloc(odatalen);
1222         if (odata == NULL) {
1223                 PyErr_NoMemory();
1224                 return NULL;
1225         }
1226         memset(odata, 0, odatalen);
1227
1228         in = out = linelen = 0;
1229         while (in < datalen) {
1230                 if ((data[in] > 126) ||
1231                     (data[in] == '=') ||
1232                     (header && data[in] == '_') ||
1233                     ((data[in] == '.') && (linelen == 1)) ||
1234                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1235                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1236                     ((data[in] < 33) &&
1237                      (data[in] != '\r') && (data[in] != '\n') &&
1238                      (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1239                 {
1240                         if ((linelen + 3 )>= MAXLINESIZE) {
1241                                 odata[out++] = '=';
1242                                 if (crlf) odata[out++] = '\r';
1243                                 odata[out++] = '\n';
1244                                 linelen = 0;
1245                         }
1246                         odata[out++] = '=';
1247                         to_hex(data[in], &odata[out]);
1248                         out += 2;
1249                         in++;
1250                         linelen += 3;
1251                 }
1252                 else {
1253                         if (istext &&
1254                             ((data[in] == '\n') ||
1255                              ((in+1 < datalen) && (data[in] == '\r') &&
1256                              (data[in+1] == '\n'))))
1257                         {
1258                                 linelen = 0;
1259                                 /* Protect against whitespace on end of line */
1260                                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1261                                         ch = odata[out-1];
1262                                         odata[out-1] = '=';
1263                                         to_hex(ch, &odata[out]);
1264                                         out += 2;
1265                                 }
1266
1267                                 if (crlf) odata[out++] = '\r';
1268                                 odata[out++] = '\n';
1269                                 if (data[in] == '\r')
1270                                         in += 2;
1271                                 else
1272                                         in++;
1273                         }
1274                         else {
1275                                 if ((in + 1 != datalen) &&
1276                                     (data[in+1] != '\n') &&
1277                                     (linelen + 1) >= MAXLINESIZE) {
1278                                         odata[out++] = '=';
1279                                         if (crlf) odata[out++] = '\r';
1280                                         odata[out++] = '\n';
1281                                         linelen = 0;
1282                                 }
1283                                 linelen++;
1284                                 if (header && data[in] == ' ') {
1285                                         odata[out++] = '_';
1286                                         in++;
1287                                 }
1288                                 else {
1289                                         odata[out++] = data[in++];
1290                                 }
1291                         }
1292                 }
1293         }
1294         if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1295                 PyMem_Free(odata);
1296                 return NULL;
1297         }
1298         PyMem_Free(odata);
1299         return rv;
1300 }
1301
1302 /* List of functions defined in the module */
1303
1304 static struct PyMethodDef binascii_module_methods[] = {
1305         {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1306         {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1307         {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1308         {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1309         {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1310         {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1311         {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1312         {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1313         {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1314         {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1315         {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1316         {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1317          doc_rledecode_hqx},
1318         {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1319         {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1320         {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1321           doc_a2b_qp},
1322         {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1323           doc_b2a_qp},
1324         {NULL, NULL}                         /* sentinel */
1325 };
1326
1327
1328 /* Initialization function for the module (*must* be called initbinascii) */
1329 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1330
1331 PyMODINIT_FUNC
1332 initbinascii(void)
1333 {
1334         PyObject *m, *d, *x;
1335
1336         /* Create the module and add the functions */
1337         m = Py_InitModule("binascii", binascii_module_methods);
1338         if (m == NULL)
1339                 return;
1340
1341         d = PyModule_GetDict(m);
1342         x = PyString_FromString(doc_binascii);
1343         PyDict_SetItemString(d, "__doc__", x);
1344         Py_XDECREF(x);
1345
1346         Error = PyErr_NewException("binascii.Error", NULL, NULL);
1347         PyDict_SetItemString(d, "Error", Error);
1348         Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1349         PyDict_SetItemString(d, "Incomplete", Incomplete);
1350 }