Modules/binascii.c

   1 /*
   2 ** Routines to represent binary data in ASCII and vice-versa
   3 **
   4 ** This module currently supports the following encodings:
   5 ** uuencode:
   6 **      each line encodes 45 bytes (except possibly the last)
   7 **      First char encodes (binary) length, rest data
   8 **      each char encodes 6 bits, as follows:
   9 **      binary: 01234567 abcdefgh ijklmnop
  10 **      ascii:  012345 67abcd efghij klmnop
  11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
  12 **      short binary data is zero-extended (so the bits are always in the
  13 **      right place), this does *not* reflect in the length.
  14 ** base64:
  15 **      Line breaks are insignificant, but lines are at most 76 chars
  16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
  17 **      is done via a table.
  18 **      Short binary data is filled (in ASCII) with '='.
  19 ** hqx:
  20 **      File starts with introductory text, real data starts and ends
  21 **      with colons.
  22 **      Data consists of three similar parts: info, datafork, resourcefork.
  23 **      Each part is protected (at the end) with a 16-bit crc
  24 **      The binary data is run-length encoded, and then ascii-fied:
  25 **      binary: 01234567 abcdefgh ijklmnop
  26 **      ascii:  012345 67abcd efghij klmnop
  27 **      ASCII encoding is table-driven, see the code.
  28 **      Short binary data results in the runt ascii-byte being output with
  29 **      the bits in the right place.
  30 **
  31 ** While I was reading dozens of programs that encode or decode the formats
  32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
  33 **
  34 **      Programs that encode binary data in ASCII are written in
  35 **      such a style that they are as unreadable as possible. Devices used
  36 **      include unnecessary global variables, burying important tables
  37 **      in unrelated sourcefiles, putting functions in include files,
  38 **      using seemingly-descriptive variable names for different purposes,
  39 **      calls to empty subroutines and a host of others.
  40 **
  41 ** I have attempted to break with this tradition, but I guess that that
  42 ** does make the performance sub-optimal. Oh well, too bad...
  43 **
  44 ** Jack Jansen, CWI, July 1995.
  45 **
  46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
  47 ** quoted-printable encoding specifies that non printable characters (anything
  48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
  49 ** of the character.  It also specifies some other behavior to enable 8bit data
  50 ** in a mail message with little difficulty (maximum line sizes, protecting
  51 ** some cases of whitespace, etc).
  52 **
  53 ** Brandon Long, September 2001.
  54 */
  55
  56 #define PY_SSIZE_T_CLEAN
  57
  58 #include "Python.h"
  59 #ifdef USE_ZLIB_CRC32
  60 #include "zlib.h"
  61 #endif
  62
  63 static PyObject *Error;
  64 static PyObject *Incomplete;
  65
  66 /*
  67 ** hqx lookup table, ascii->binary.
  68 */
  69
  70 #define RUNCHAR 0x90
  71
  72 #define DONE 0x7F
  73 #define SKIP 0x7E
  74 #define FAIL 0x7D
  75
  76 static unsigned char table_a2b_hqx[256] = {
  77 /*       ^@    ^A    ^B    ^C    ^D    ^E    ^F    ^G   */
  78 /* 0*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  79 /*       \b    \t    \n    ^K    ^L    \r    ^N    ^O   */
  80 /* 1*/  FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
  81 /*       ^P    ^Q    ^R    ^S    ^T    ^U    ^V    ^W   */
  82 /* 2*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  83 /*       ^X    ^Y    ^Z    ^[    ^\    ^]    ^^    ^_   */
  84 /* 3*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
  85 /*              !     "     #     $     %     &     '   */
  86 /* 4*/  FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
  87 /*        (     )     *     +     ,     -     .     /   */
  88 /* 5*/  0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
  89 /*        0     1     2     3     4     5     6     7   */
  90 /* 6*/  0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
  91 /*        8     9     :     ;     <     =     >     ?   */
  92 /* 7*/  0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
  93 /*        @     A     B     C     D     E     F     G   */
  94 /* 8*/  0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
  95 /*        H     I     J     K     L     M     N     O   */
  96 /* 9*/  0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
  97 /*        P     Q     R     S     T     U     V     W   */
  98 /*10*/  0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
  99 /*        X     Y     Z     [     \     ]     ^     _   */
 100 /*11*/  0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
 101 /*        `     a     b     c     d     e     f     g   */
 102 /*12*/  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
 103 /*        h     i     j     k     l     m     n     o   */
 104 /*13*/  0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
 105 /*        p     q     r     s     t     u     v     w   */
 106 /*14*/  0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
 107 /*        x     y     z     {     |     }     ~    ^?   */
 108 /*15*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 109 /*16*/  FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 110         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 111         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 112         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 113         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 114         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 115         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 116         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 117         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 118         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 119         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 120         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 121         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 122         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 123         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 124         FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
 125 };
 126
 127 static unsigned char table_b2a_hqx[] =
 128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
 129
 130 static char table_a2b_base64[] = {
 131         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 132         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
 133         -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
 134         52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
 135         -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
 136         15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
 137         -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
 138         41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
 139 };
 140
 141 #define BASE64_PAD '='
 142
 143 /* Max binary chunk size; limited only by available memory */
 144 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyBytesObject) - 3)
 145
 146 static unsigned char table_b2a_base64[] =
 147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 148
 149
 150
 151 static unsigned short crctab_hqx[256] = {
 152         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
 153         0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
 154         0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
 155         0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
 156         0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
 157         0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
 158         0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
 159         0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
 160         0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
 161         0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
 162         0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
 163         0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
 164         0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
 165         0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
 166         0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
 167         0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
 168         0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
 169         0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
 170         0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
 171         0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
 172         0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
 173         0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 174         0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
 175         0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
 176         0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
 177         0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
 178         0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
 179         0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
 180         0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
 181         0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
 182         0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
 183         0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
 184 };
 185
 186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
 187
 188 static PyObject *
 189 binascii_a2b_uu(PyObject *self, PyObject *args)
 190 {
 191         unsigned char *ascii_data, *bin_data;
 192         int leftbits = 0;
 193         unsigned char this_ch;
 194         unsigned int leftchar = 0;
 195         PyObject *rv;
 196         Py_ssize_t ascii_len, bin_len;
 197
 198         if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
 199                 return NULL;
 200
 201         /* First byte: binary data length (in bytes) */
 202         bin_len = (*ascii_data++ - ' ') & 077;
 203         ascii_len--;
 204
 205         /* Allocate the buffer */
 206         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
 207                 return NULL;
 208         bin_data = (unsigned char *)PyBytes_AsString(rv);
 209
 210         for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
 211                 /* XXX is it really best to add NULs if there's no more data */
 212                 this_ch = (ascii_len > 0) ? *ascii_data : 0;
 213                 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
 214                         /*
 215                         ** Whitespace. Assume some spaces got eaten at
 216                         ** end-of-line. (We check this later)
 217                         */
 218                         this_ch = 0;
 219                 } else {
 220                         /* Check the character for legality
 221                         ** The 64 in stead of the expected 63 is because
 222                         ** there are a few uuencodes out there that use
 223                         ** '`' as zero instead of space.
 224                         */
 225                         if ( this_ch < ' ' || this_ch > (' ' + 64)) {
 226                                 PyErr_SetString(Error, "Illegal char");
 227                                 Py_DECREF(rv);
 228                                 return NULL;
 229                         }
 230                         this_ch = (this_ch - ' ') & 077;
 231                 }
 232                 /*
 233                 ** Shift it in on the low end, and see if there's
 234                 ** a byte ready for output.
 235                 */
 236                 leftchar = (leftchar << 6) | (this_ch);
 237                 leftbits += 6;
 238                 if ( leftbits >= 8 ) {
 239                         leftbits -= 8;
 240                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 241                         leftchar &= ((1 << leftbits) - 1);
 242                         bin_len--;
 243                 }
 244         }
 245         /*
 246         ** Finally, check that if there's anything left on the line
 247         ** that it's whitespace only.
 248         */
 249         while( ascii_len-- > 0 ) {
 250                 this_ch = *ascii_data++;
 251                 /* Extra '`' may be written as padding in some cases */
 252                 if ( this_ch != ' ' && this_ch != ' '+64 &&
 253                      this_ch != '\n' && this_ch != '\r' ) {
 254                         PyErr_SetString(Error, "Trailing garbage");
 255                         Py_DECREF(rv);
 256                         return NULL;
 257                 }
 258         }
 259         return rv;
 260 }
 261
 262 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
 263
 264 static PyObject *
 265 binascii_b2a_uu(PyObject *self, PyObject *args)
 266 {
 267         unsigned char *ascii_data, *bin_data;
 268         int leftbits = 0;
 269         unsigned char this_ch;
 270         unsigned int leftchar = 0;
 271         PyObject *rv;
 272         Py_ssize_t bin_len;
 273
 274         if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
 275                 return NULL;
 276         if ( bin_len > 45 ) {
 277                 /* The 45 is a limit that appears in all uuencode's */
 278                 PyErr_SetString(Error, "At most 45 bytes at once");
 279                 return NULL;
 280         }
 281
 282         /* We're lazy and allocate to much (fixed up later) */
 283         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
 284                 return NULL;
 285         ascii_data = (unsigned char *)PyBytes_AsString(rv);
 286
 287         /* Store the length */
 288         *ascii_data++ = ' ' + (bin_len & 077);
 289
 290         for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
 291                 /* Shift the data (or padding) into our buffer */
 292                 if ( bin_len > 0 )      /* Data */
 293                         leftchar = (leftchar << 8) | *bin_data;
 294                 else                    /* Padding */
 295                         leftchar <<= 8;
 296                 leftbits += 8;
 297
 298                 /* See if there are 6-bit groups ready */
 299                 while ( leftbits >= 6 ) {
 300                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 301                         leftbits -= 6;
 302                         *ascii_data++ = this_ch + ' ';
 303                 }
 304         }
 305         *ascii_data++ = '\n';   /* Append a courtesy newline */
 306
 307         _PyBytes_Resize(&rv, (ascii_data -
 308                                (unsigned char *)PyBytes_AsString(rv)));
 309         return rv;
 310 }
 311
 312
 313 static int
 314 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
 315 {
 316         /* Finds & returns the (num+1)th
 317         ** valid character for base64, or -1 if none.
 318         */
 319
 320         int ret = -1;
 321         unsigned char c, b64val;
 322
 323         while ((slen > 0) && (ret == -1)) {
 324                 c = *s;
 325                 b64val = table_a2b_base64[c & 0x7f];
 326                 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
 327                         if (num == 0)
 328                                 ret = *s;
 329                         num--;
 330                 }
 331
 332                 s++;
 333                 slen--;
 334         }
 335         return ret;
 336 }
 337
 338 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
 339
 340 static PyObject *
 341 binascii_a2b_base64(PyObject *self, PyObject *args)
 342 {
 343         unsigned char *ascii_data, *bin_data;
 344         int leftbits = 0;
 345         unsigned char this_ch;
 346         unsigned int leftchar = 0;
 347         PyObject *rv;
 348         Py_ssize_t ascii_len, bin_len;
 349         int quad_pos = 0;
 350
 351         if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
 352                 return NULL;
 353
 354         bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 355
 356         /* Allocate the buffer */
 357         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
 358                 return NULL;
 359         bin_data = (unsigned char *)PyBytes_AsString(rv);
 360         bin_len = 0;
 361
 362         for( ; ascii_len > 0; ascii_len--, ascii_data++) {
 363                 this_ch = *ascii_data;
 364
 365                 if (this_ch > 0x7f ||
 366                     this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
 367                         continue;
 368
 369                 /* Check for pad sequences and ignore
 370                 ** the invalid ones.
 371                 */
 372                 if (this_ch == BASE64_PAD) {
 373                         if ( (quad_pos < 2) ||
 374                              ((quad_pos == 2) &&
 375                               (binascii_find_valid(ascii_data, ascii_len, 1)
 376                                != BASE64_PAD)) )
 377                         {
 378                                 continue;
 379                         }
 380                         else {
 381                                 /* A pad sequence means no more input.
 382                                 ** We've already interpreted the data
 383                                 ** from the quad at this point.
 384                                 */
 385                                 leftbits = 0;
 386                                 break;
 387                         }
 388                 }
 389
 390                 this_ch = table_a2b_base64[*ascii_data];
 391                 if ( this_ch == (unsigned char) -1 )
 392                         continue;
 393
 394                 /*
 395                 ** Shift it in on the low end, and see if there's
 396                 ** a byte ready for output.
 397                 */
 398                 quad_pos = (quad_pos + 1) & 0x03;
 399                 leftchar = (leftchar << 6) | (this_ch);
 400                 leftbits += 6;
 401
 402                 if ( leftbits >= 8 ) {
 403                         leftbits -= 8;
 404                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 405                         bin_len++;
 406                         leftchar &= ((1 << leftbits) - 1);
 407                 }
 408         }
 409
 410         if (leftbits != 0) {
 411                 PyErr_SetString(Error, "Incorrect padding");
 412                 Py_DECREF(rv);
 413                 return NULL;
 414         }
 415
 416         /* And set string size correctly. If the result string is empty
 417         ** (because the input was all invalid) return the shared empty
 418         ** string instead; _PyBytes_Resize() won't do this for us.
 419         */
 420         if (bin_len > 0)
 421                 _PyBytes_Resize(&rv, bin_len);
 422         else {
 423                 Py_DECREF(rv);
 424                 rv = PyBytes_FromString("");
 425         }
 426         return rv;
 427 }
 428
 429 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
 430
 431 static PyObject *
 432 binascii_b2a_base64(PyObject *self, PyObject *args)
 433 {
 434         unsigned char *ascii_data, *bin_data;
 435         int leftbits = 0;
 436         unsigned char this_ch;
 437         unsigned int leftchar = 0;
 438         PyObject *rv;
 439         Py_ssize_t bin_len;
 440
 441         if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
 442                 return NULL;
 443         if ( bin_len > BASE64_MAXBIN ) {
 444                 PyErr_SetString(Error, "Too much data for base64 line");
 445                 return NULL;
 446         }
 447
 448         /* We're lazy and allocate too much (fixed up later).
 449            "+3" leaves room for up to two pad characters and a trailing
 450            newline.  Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
 451         if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
 452                 return NULL;
 453         ascii_data = (unsigned char *)PyBytes_AsString(rv);
 454
 455         for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
 456                 /* Shift the data into our buffer */
 457                 leftchar = (leftchar << 8) | *bin_data;
 458                 leftbits += 8;
 459
 460                 /* See if there are 6-bit groups ready */
 461                 while ( leftbits >= 6 ) {
 462                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 463                         leftbits -= 6;
 464                         *ascii_data++ = table_b2a_base64[this_ch];
 465                 }
 466         }
 467         if ( leftbits == 2 ) {
 468                 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
 469                 *ascii_data++ = BASE64_PAD;
 470                 *ascii_data++ = BASE64_PAD;
 471         } else if ( leftbits == 4 ) {
 472                 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
 473                 *ascii_data++ = BASE64_PAD;
 474         }
 475         *ascii_data++ = '\n';   /* Append a courtesy newline */
 476
 477         _PyBytes_Resize(&rv, (ascii_data -
 478                                (unsigned char *)PyBytes_AsString(rv)));
 479         return rv;
 480 }
 481
 482 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
 483
 484 static PyObject *
 485 binascii_a2b_hqx(PyObject *self, PyObject *args)
 486 {
 487         unsigned char *ascii_data, *bin_data;
 488         int leftbits = 0;
 489         unsigned char this_ch;
 490         unsigned int leftchar = 0;
 491         PyObject *rv;
 492         Py_ssize_t len;
 493         int done = 0;
 494
 495         if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
 496                 return NULL;
 497
 498         /* Allocate a string that is too big (fixed later)
 499            Add two to the initial length to prevent interning which
 500            would preclude subsequent resizing.  */
 501         if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
 502                 return NULL;
 503         bin_data = (unsigned char *)PyBytes_AsString(rv);
 504
 505         for( ; len > 0 ; len--, ascii_data++ ) {
 506                 /* Get the byte and look it up */
 507                 this_ch = table_a2b_hqx[*ascii_data];
 508                 if ( this_ch == SKIP )
 509                         continue;
 510                 if ( this_ch == FAIL ) {
 511                         PyErr_SetString(Error, "Illegal char");
 512                         Py_DECREF(rv);
 513                         return NULL;
 514                 }
 515                 if ( this_ch == DONE ) {
 516                         /* The terminating colon */
 517                         done = 1;
 518                         break;
 519                 }
 520
 521                 /* Shift it into the buffer and see if any bytes are ready */
 522                 leftchar = (leftchar << 6) | (this_ch);
 523                 leftbits += 6;
 524                 if ( leftbits >= 8 ) {
 525                         leftbits -= 8;
 526                         *bin_data++ = (leftchar >> leftbits) & 0xff;
 527                         leftchar &= ((1 << leftbits) - 1);
 528                 }
 529         }
 530
 531         if ( leftbits && !done ) {
 532                 PyErr_SetString(Incomplete,
 533                                 "String has incomplete number of bytes");
 534                 Py_DECREF(rv);
 535                 return NULL;
 536         }
 537         _PyBytes_Resize(
 538                 &rv, (bin_data - (unsigned char *)PyBytes_AsString(rv)));
 539         if (rv) {
 540                 PyObject *rrv = Py_BuildValue("Oi", rv, done);
 541                 Py_DECREF(rv);
 542                 return rrv;
 543         }
 544
 545         return NULL;
 546 }
 547
 548 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
 549
 550 static PyObject *
 551 binascii_rlecode_hqx(PyObject *self, PyObject *args)
 552 {
 553         unsigned char *in_data, *out_data;
 554         PyObject *rv;
 555         unsigned char ch;
 556         Py_ssize_t in, inend, len;
 557
 558         if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
 559                 return NULL;
 560
 561         /* Worst case: output is twice as big as input (fixed later) */
 562         if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
 563                 return NULL;
 564         out_data = (unsigned char *)PyBytes_AsString(rv);
 565
 566         for( in=0; in<len; in++) {
 567                 ch = in_data[in];
 568                 if ( ch == RUNCHAR ) {
 569                         /* RUNCHAR. Escape it. */
 570                         *out_data++ = RUNCHAR;
 571                         *out_data++ = 0;
 572                 } else {
 573                         /* Check how many following are the same */
 574                         for(inend=in+1;
 575                             inend<len && in_data[inend] == ch &&
 576                                     inend < in+255;
 577                             inend++) ;
 578                         if ( inend - in > 3 ) {
 579                                 /* More than 3 in a row. Output RLE. */
 580                                 *out_data++ = ch;
 581                                 *out_data++ = RUNCHAR;
 582                                 *out_data++ = inend-in;
 583                                 in = inend-1;
 584                         } else {
 585                                 /* Less than 3. Output the byte itself */
 586                                 *out_data++ = ch;
 587                         }
 588                 }
 589         }
 590         _PyBytes_Resize(&rv, (out_data -
 591                                (unsigned char *)PyBytes_AsString(rv)));
 592         return rv;
 593 }
 594
 595 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
 596
 597 static PyObject *
 598 binascii_b2a_hqx(PyObject *self, PyObject *args)
 599 {
 600         unsigned char *ascii_data, *bin_data;
 601         int leftbits = 0;
 602         unsigned char this_ch;
 603         unsigned int leftchar = 0;
 604         PyObject *rv;
 605         Py_ssize_t len;
 606
 607         if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
 608                 return NULL;
 609
 610         /* Allocate a buffer that is at least large enough */
 611         if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
 612                 return NULL;
 613         ascii_data = (unsigned char *)PyBytes_AsString(rv);
 614
 615         for( ; len > 0 ; len--, bin_data++ ) {
 616                 /* Shift into our buffer, and output any 6bits ready */
 617                 leftchar = (leftchar << 8) | *bin_data;
 618                 leftbits += 8;
 619                 while ( leftbits >= 6 ) {
 620                         this_ch = (leftchar >> (leftbits-6)) & 0x3f;
 621                         leftbits -= 6;
 622                         *ascii_data++ = table_b2a_hqx[this_ch];
 623                 }
 624         }
 625         /* Output a possible runt byte */
 626         if ( leftbits ) {
 627                 leftchar <<= (6-leftbits);
 628                 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
 629         }
 630         _PyBytes_Resize(&rv, (ascii_data -
 631                                (unsigned char *)PyBytes_AsString(rv)));
 632         return rv;
 633 }
 634
 635 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
 636
 637 static PyObject *
 638 binascii_rledecode_hqx(PyObject *self, PyObject *args)
 639 {
 640         unsigned char *in_data, *out_data;
 641         unsigned char in_byte, in_repeat;
 642         PyObject *rv;
 643         Py_ssize_t in_len, out_len, out_len_left;
 644
 645         if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
 646                 return NULL;
 647
 648         /* Empty string is a special case */
 649         if ( in_len == 0 )
 650                 return PyBytes_FromString("");
 651
 652         /* Allocate a buffer of reasonable size. Resized when needed */
 653         out_len = in_len*2;
 654         if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
 655                 return NULL;
 656         out_len_left = out_len;
 657         out_data = (unsigned char *)PyBytes_AsString(rv);
 658
 659         /*
 660         ** We need two macros here to get/put bytes and handle
 661         ** end-of-buffer for input and output strings.
 662         */
 663 #define INBYTE(b) \
 664         do { \
 665                  if ( --in_len < 0 ) { \
 666                            PyErr_SetString(Incomplete, ""); \
 667                            Py_DECREF(rv); \
 668                            return NULL; \
 669                  } \
 670                  b = *in_data++; \
 671         } while(0)
 672
 673 #define OUTBYTE(b) \
 674         do { \
 675                  if ( --out_len_left < 0 ) { \
 676                           _PyBytes_Resize(&rv, 2*out_len); \
 677                           if ( rv == NULL ) return NULL; \
 678                           out_data = (unsigned char *)PyBytes_AsString(rv) \
 679                                                                  + out_len; \
 680                           out_len_left = out_len-1; \
 681                           out_len = out_len * 2; \
 682                  } \
 683                  *out_data++ = b; \
 684         } while(0)
 685
 686                 /*
 687                 ** Handle first byte separately (since we have to get angry
 688                 ** in case of an orphaned RLE code).
 689                 */
 690                 INBYTE(in_byte);
 691
 692         if (in_byte == RUNCHAR) {
 693                 INBYTE(in_repeat);
 694                 if (in_repeat != 0) {
 695                         /* Note Error, not Incomplete (which is at the end
 696                         ** of the string only). This is a programmer error.
 697                         */
 698                         PyErr_SetString(Error, "Orphaned RLE code at start");
 699                         Py_DECREF(rv);
 700                         return NULL;
 701                 }
 702                 OUTBYTE(RUNCHAR);
 703         } else {
 704                 OUTBYTE(in_byte);
 705         }
 706
 707         while( in_len > 0 ) {
 708                 INBYTE(in_byte);
 709
 710                 if (in_byte == RUNCHAR) {
 711                         INBYTE(in_repeat);
 712                         if ( in_repeat == 0 ) {
 713                                 /* Just an escaped RUNCHAR value */
 714                                 OUTBYTE(RUNCHAR);
 715                         } else {
 716                                 /* Pick up value and output a sequence of it */
 717                                 in_byte = out_data[-1];
 718                                 while ( --in_repeat > 0 )
 719                                         OUTBYTE(in_byte);
 720                         }
 721                 } else {
 722                         /* Normal byte */
 723                         OUTBYTE(in_byte);
 724                 }
 725         }
 726         _PyBytes_Resize(&rv, (out_data -
 727                                (unsigned char *)PyBytes_AsString(rv)));
 728         return rv;
 729 }
 730
 731 PyDoc_STRVAR(doc_crc_hqx,
 732 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
 733
 734 static PyObject *
 735 binascii_crc_hqx(PyObject *self, PyObject *args)
 736 {
 737         unsigned char *bin_data;
 738         unsigned int crc;
 739         Py_ssize_t len;
 740
 741         if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
 742                 return NULL;
 743
 744         while(len--) {
 745                 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
 746         }
 747
 748         return Py_BuildValue("i", crc);
 749 }
 750
 751 PyDoc_STRVAR(doc_crc32,
 752 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
 753
 754 #ifdef USE_ZLIB_CRC32
 755 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
 756 static PyObject *
 757 binascii_crc32(PyObject *self, PyObject *args)
 758 {
 759     unsigned int crc32val = 0;  /* crc32(0L, Z_NULL, 0) */
 760     Byte *buf;
 761     Py_ssize_t len;
 762     int signed_val;
 763
 764     if (!PyArg_ParseTuple(args, "s#|I:crc32", &buf, &len, &crc32val))
 765         return NULL;
 766     /* In Python 2.x we return a signed integer regardless of native platform
 767      * long size (the 32bit unsigned long is treated as 32-bit signed and sign
 768      * extended into a 64-bit long inside the integer object).  3.0 does the
 769      * right thing and returns unsigned. http://bugs.python.org/issue1202 */
 770     signed_val = crc32(crc32val, buf, len);
 771     return PyInt_FromLong(signed_val);
 772 }
 773 #else  /* USE_ZLIB_CRC32 */
 774 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
 775     Also known as: ISO 3307
 776 **********************************************************************|
 777 *                                                                    *|
 778 * Demonstration program to compute the 32-bit CRC used as the frame  *|
 779 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
 780 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
 781 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
 782 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
 783 * this polynomial is or will be included in CCITT V.41, which        *|
 784 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
 785 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
 786 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
 787 *                                                                    *|
 788 **********************************************************************|
 789
 790  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
 791  code or tables extracted from it, as desired without restriction.
 792
 793  First, the polynomial itself and its table of feedback terms.  The
 794  polynomial is
 795  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
 796  Note that we take it "backwards" and put the highest-order term in
 797  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
 798  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
 799  the MSB being 1.
 800
 801  Note that the usual hardware shift register implementation, which
 802  is what we're using (we're merely optimizing it by doing eight-bit
 803  chunks at a time) shifts bits into the lowest-order term.  In our
 804  implementation, that means shifting towards the right.  Why do we
 805  do it this way?  Because the calculated CRC must be transmitted in
 806  order from highest-order term to lowest-order term.  UARTs transmit
 807  characters in order from LSB to MSB.  By storing the CRC this way,
 808  we hand it to the UART in the order low-byte to high-byte; the UART
 809  sends each low-bit to hight-bit; and the result is transmission bit
 810  by bit from highest- to lowest-order term without requiring any bit
 811  shuffling on our part.  Reception works similarly.
 812
 813  The feedback terms table consists of 256, 32-bit entries.  Notes:
 814
 815   1. The table can be generated at runtime if desired; code to do so
 816      is shown later.  It might not be obvious, but the feedback
 817      terms simply represent the results of eight shift/xor opera-
 818      tions for all combinations of data and CRC register values.
 819
 820   2. The CRC accumulation logic is the same for all CRC polynomials,
 821      be they sixteen or thirty-two bits wide.  You simply choose the
 822      appropriate table.  Alternatively, because the table can be
 823      generated at runtime, you can start by generating the table for
 824      the polynomial in question and use exactly the same "updcrc",
 825      if your application needn't simultaneously handle two CRC
 826      polynomials.  (Note, however, that XMODEM is strange.)
 827
 828   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
 829      of course, 32-bit entries work OK if the high 16 bits are zero.
 830
 831   4. The values must be right-shifted by eight bits by the "updcrc"
 832      logic; the shift must be unsigned (bring in zeroes).  On some
 833      hardware you could probably optimize the shift in assembler by
 834      using byte-swap instructions.
 835 ********************************************************************/
 836
 837 static unsigned int crc_32_tab[256] = {
 838 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
 839 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
 840 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
 841 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
 842 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
 843 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
 844 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
 845 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
 846 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
 847 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
 848 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
 849 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
 850 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
 851 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
 852 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
 853 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
 854 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
 855 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
 856 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
 857 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
 858 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
 859 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
 860 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
 861 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
 862 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
 863 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
 864 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
 865 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
 866 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
 867 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
 868 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
 869 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
 870 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
 871 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
 872 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
 873 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
 874 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
 875 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
 876 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
 877 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
 878 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
 879 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
 880 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
 881 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
 882 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
 883 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
 884 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
 885 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
 886 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
 887 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
 888 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
 889 0x2d02ef8dU
 890 };
 891
 892 static PyObject *
 893 binascii_crc32(PyObject *self, PyObject *args)
 894 { /* By Jim Ahlstrom; All rights transferred to CNRI */
 895         unsigned char *bin_data;
 896         unsigned int crc = 0U;  /* initial value of CRC */
 897         Py_ssize_t len;
 898         int result;
 899
 900         if ( !PyArg_ParseTuple(args, "s#|I:crc32", &bin_data, &len, &crc) )
 901                 return NULL;
 902
 903         crc = ~ crc;
 904         while (len--)
 905                 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
 906                 /* Note:  (crc >> 8) MUST zero fill on left */
 907
 908         result = (int)(crc ^ 0xFFFFFFFFU);
 909         return PyInt_FromLong(result);
 910 }
 911 #endif  /* USE_ZLIB_CRC32 */
 912
 913
 914 static PyObject *
 915 binascii_hexlify(PyObject *self, PyObject *args)
 916 {
 917         char* argbuf;
 918         Py_ssize_t arglen;
 919         PyObject *retval;
 920         char* retbuf;
 921         Py_ssize_t i, j;
 922
 923         if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
 924                 return NULL;
 925
 926         retval = PyBytes_FromStringAndSize(NULL, arglen*2);
 927         if (!retval)
 928                 return NULL;
 929         retbuf = PyBytes_AsString(retval);
 930         if (!retbuf)
 931                 goto finally;
 932
 933         /* make hex version of string, taken from shamodule.c */
 934         for (i=j=0; i < arglen; i++) {
 935                 char c;
 936                 c = (argbuf[i] >> 4) & 0xf;
 937                 c = (c>9) ? c+'a'-10 : c + '0';
 938                 retbuf[j++] = c;
 939                 c = argbuf[i] & 0xf;
 940                 c = (c>9) ? c+'a'-10 : c + '0';
 941                 retbuf[j++] = c;
 942         }
 943         return retval;
 944
 945   finally:
 946         Py_DECREF(retval);
 947         return NULL;
 948 }
 949
 950 PyDoc_STRVAR(doc_hexlify,
 951 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
 952 \n\
 953 This function is also available as \"hexlify()\".");
 954
 955
 956 static int
 957 to_int(int c)
 958 {
 959         if (isdigit(c))
 960                 return c - '0';
 961         else {
 962                 if (isupper(c))
 963                         c = tolower(c);
 964                 if (c >= 'a' && c <= 'f')
 965                         return c - 'a' + 10;
 966         }
 967         return -1;
 968 }
 969
 970
 971 static PyObject *
 972 binascii_unhexlify(PyObject *self, PyObject *args)
 973 {
 974         char* argbuf;
 975         Py_ssize_t arglen;
 976         PyObject *retval;
 977         char* retbuf;
 978         Py_ssize_t i, j;
 979
 980         if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
 981                 return NULL;
 982
 983         /* XXX What should we do about strings with an odd length?  Should
 984          * we add an implicit leading zero, or a trailing zero?  For now,
 985          * raise an exception.
 986          */
 987         if (arglen % 2) {
 988                 PyErr_SetString(PyExc_TypeError, "Odd-length string");
 989                 return NULL;
 990         }
 991
 992         retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
 993         if (!retval)
 994                 return NULL;
 995         retbuf = PyBytes_AsString(retval);
 996         if (!retbuf)
 997                 goto finally;
 998
 999         for (i=j=0; i < arglen; i += 2) {
1000                 int top = to_int(Py_CHARMASK(argbuf[i]));
1001                 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1002                 if (top == -1 || bot == -1) {
1003                         PyErr_SetString(PyExc_TypeError,
1004                                         "Non-hexadecimal digit found");
1005                         goto finally;
1006                 }
1007                 retbuf[j++] = (top << 4) + bot;
1008         }
1009         return retval;
1010
1011   finally:
1012         Py_DECREF(retval);
1013         return NULL;
1014 }
1015
1016 PyDoc_STRVAR(doc_unhexlify,
1017 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1018 \n\
1019 hexstr must contain an even number of hex digits (upper or lower case).\n\
1020 This function is also available as \"unhexlify()\"");
1021
1022 static int table_hex[128] = {
1023   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1024   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1025   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1026    0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1027   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1028   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1029   -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1030   -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1031 };
1032
1033 #define hexval(c) table_hex[(unsigned int)(c)]
1034
1035 #define MAXLINESIZE 76
1036
1037 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1038
1039 static PyObject*
1040 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1041 {
1042         Py_ssize_t in, out;
1043         char ch;
1044         unsigned char *data, *odata;
1045         Py_ssize_t datalen = 0;
1046         PyObject *rv;
1047         static char *kwlist[] = {"data", "header", NULL};
1048         int header = 0;
1049
1050         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1051               &datalen, &header))
1052                 return NULL;
1053
1054         /* We allocate the output same size as input, this is overkill.
1055          * The previous implementation used calloc() so we'll zero out the
1056          * memory here too, since PyMem_Malloc() does not guarantee that.
1057          */
1058         odata = (unsigned char *) PyMem_Malloc(datalen);
1059         if (odata == NULL) {
1060                 PyErr_NoMemory();
1061                 return NULL;
1062         }
1063         memset(odata, 0, datalen);
1064
1065         in = out = 0;
1066         while (in < datalen) {
1067                 if (data[in] == '=') {
1068                         in++;
1069                         if (in >= datalen) break;
1070                         /* Soft line breaks */
1071                         if ((data[in] == '\n') || (data[in] == '\r')) {
1072                                 if (data[in] != '\n') {
1073                                         while (in < datalen && data[in] != '\n') in++;
1074                                 }
1075                                 if (in < datalen) in++;
1076                         }
1077                         else if (data[in] == '=') {
1078                                 /* broken case from broken python qp */
1079                                 odata[out++] = '=';
1080                                 in++;
1081                         }
1082                         else if (((data[in] >= 'A' && data[in] <= 'F') ||
1083                                   (data[in] >= 'a' && data[in] <= 'f') ||
1084                                   (data[in] >= '0' && data[in] <= '9')) &&
1085                                  ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1086                                   (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1087                                   (data[in+1] >= '0' && data[in+1] <= '9'))) {
1088                                 /* hexval */
1089                                 ch = hexval(data[in]) << 4;
1090                                 in++;
1091                                 ch |= hexval(data[in]);
1092                                 in++;
1093                                 odata[out++] = ch;
1094                         }
1095                         else {
1096                           odata[out++] = '=';
1097                         }
1098                 }
1099                 else if (header && data[in] == '_') {
1100                         odata[out++] = ' ';
1101                         in++;
1102                 }
1103                 else {
1104                         odata[out] = data[in];
1105                         in++;
1106                         out++;
1107                 }
1108         }
1109         if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1110                 PyMem_Free(odata);
1111                 return NULL;
1112         }
1113         PyMem_Free(odata);
1114         return rv;
1115 }
1116
1117 static int
1118 to_hex (unsigned char ch, unsigned char *s)
1119 {
1120         unsigned int uvalue = ch;
1121
1122         s[1] = "0123456789ABCDEF"[uvalue % 16];
1123         uvalue = (uvalue / 16);
1124         s[0] = "0123456789ABCDEF"[uvalue % 16];
1125         return 0;
1126 }
1127
1128 PyDoc_STRVAR(doc_b2a_qp,
1129 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1130  Encode a string using quoted-printable encoding. \n\
1131 \n\
1132 On encoding, when istext is set, newlines are not encoded, and white \n\
1133 space at end of lines is.  When istext is not set, \\r and \\n (CR/LF) are \n\
1134 both encoded.  When quotetabs is set, space and tabs are encoded.");
1135
1136 /* XXX: This is ridiculously complicated to be backward compatible
1137  * (mostly) with the quopri module.  It doesn't re-create the quopri
1138  * module bug where text ending in CRLF has the CR encoded */
1139 static PyObject*
1140 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1141 {
1142         Py_ssize_t in, out;
1143         unsigned char *data, *odata;
1144         Py_ssize_t datalen = 0, odatalen = 0;
1145         PyObject *rv;
1146         unsigned int linelen = 0;
1147         static char *kwlist[] = {"data", "quotetabs", "istext",
1148                                        "header", NULL};
1149         int istext = 1;
1150         int quotetabs = 0;
1151         int header = 0;
1152         unsigned char ch;
1153         int crlf = 0;
1154         unsigned char *p;
1155
1156         if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1157               &datalen, &quotetabs, &istext, &header))
1158                 return NULL;
1159
1160         /* See if this string is using CRLF line ends */
1161         /* XXX: this function has the side effect of converting all of
1162          * the end of lines to be the same depending on this detection
1163          * here */
1164         p = (unsigned char *) memchr(data, '\n', datalen);
1165         if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1166                 crlf = 1;
1167
1168         /* First, scan to see how many characters need to be encoded */
1169         in = 0;
1170         while (in < datalen) {
1171                 if ((data[in] > 126) ||
1172                     (data[in] == '=') ||
1173                     (header && data[in] == '_') ||
1174                     ((data[in] == '.') && (linelen == 0) &&
1175                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1176                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1177                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1178                     ((data[in] < 33) &&
1179                      (data[in] != '\r') && (data[in] != '\n') &&
1180                      (quotetabs ||
1181                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1182                 {
1183                         if ((linelen + 3) >= MAXLINESIZE) {
1184                                 linelen = 0;
1185                                 if (crlf)
1186                                         odatalen += 3;
1187                                 else
1188                                         odatalen += 2;
1189                         }
1190                         linelen += 3;
1191                         odatalen += 3;
1192                         in++;
1193                 }
1194                 else {
1195                         if (istext &&
1196                             ((data[in] == '\n') ||
1197                              ((in+1 < datalen) && (data[in] == '\r') &&
1198                              (data[in+1] == '\n'))))
1199                         {
1200                                 linelen = 0;
1201                                 /* Protect against whitespace on end of line */
1202                                 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1203                                         odatalen += 2;
1204                                 if (crlf)
1205                                         odatalen += 2;
1206                                 else
1207                                         odatalen += 1;
1208                                 if (data[in] == '\r')
1209                                         in += 2;
1210                                 else
1211                                         in++;
1212                         }
1213                         else {
1214                                 if ((in + 1 != datalen) &&
1215                                     (data[in+1] != '\n') &&
1216                                     (linelen + 1) >= MAXLINESIZE) {
1217                                         linelen = 0;
1218                                         if (crlf)
1219                                                 odatalen += 3;
1220                                         else
1221                                                 odatalen += 2;
1222                                 }
1223                                 linelen++;
1224                                 odatalen++;
1225                                 in++;
1226                         }
1227                 }
1228         }
1229
1230         /* We allocate the output same size as input, this is overkill.
1231          * The previous implementation used calloc() so we'll zero out the
1232          * memory here too, since PyMem_Malloc() does not guarantee that.
1233          */
1234         odata = (unsigned char *) PyMem_Malloc(odatalen);
1235         if (odata == NULL) {
1236                 PyErr_NoMemory();
1237                 return NULL;
1238         }
1239         memset(odata, 0, odatalen);
1240
1241         in = out = linelen = 0;
1242         while (in < datalen) {
1243                 if ((data[in] > 126) ||
1244                     (data[in] == '=') ||
1245                     (header && data[in] == '_') ||
1246                     ((data[in] == '.') && (linelen == 0) &&
1247                      (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1248                     (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1249                     ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1250                     ((data[in] < 33) &&
1251                      (data[in] != '\r') && (data[in] != '\n') &&
1252                      (quotetabs ||
1253                         (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1254                 {
1255                         if ((linelen + 3 )>= MAXLINESIZE) {
1256                                 odata[out++] = '=';
1257                                 if (crlf) odata[out++] = '\r';
1258                                 odata[out++] = '\n';
1259                                 linelen = 0;
1260                         }
1261                         odata[out++] = '=';
1262                         to_hex(data[in], &odata[out]);
1263                         out += 2;
1264                         in++;
1265                         linelen += 3;
1266                 }
1267                 else {
1268                         if (istext &&
1269                             ((data[in] == '\n') ||
1270                              ((in+1 < datalen) && (data[in] == '\r') &&
1271                              (data[in+1] == '\n'))))
1272                         {
1273                                 linelen = 0;
1274                                 /* Protect against whitespace on end of line */
1275                                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1276                                         ch = odata[out-1];
1277                                         odata[out-1] = '=';
1278                                         to_hex(ch, &odata[out]);
1279                                         out += 2;
1280                                 }
1281
1282                                 if (crlf) odata[out++] = '\r';
1283                                 odata[out++] = '\n';
1284                                 if (data[in] == '\r')
1285                                         in += 2;
1286                                 else
1287                                         in++;
1288                         }
1289                         else {
1290                                 if ((in + 1 != datalen) &&
1291                                     (data[in+1] != '\n') &&
1292                                     (linelen + 1) >= MAXLINESIZE) {
1293                                         odata[out++] = '=';
1294                                         if (crlf) odata[out++] = '\r';
1295                                         odata[out++] = '\n';
1296                                         linelen = 0;
1297                                 }
1298                                 linelen++;
1299                                 if (header && data[in] == ' ') {
1300                                         odata[out++] = '_';
1301                                         in++;
1302                                 }
1303                                 else {
1304                                         odata[out++] = data[in++];
1305                                 }
1306                         }
1307                 }
1308         }
1309         if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1310                 PyMem_Free(odata);
1311                 return NULL;
1312         }
1313         PyMem_Free(odata);
1314         return rv;
1315 }
1316
1317 /* List of functions defined in the module */
1318
1319 static struct PyMethodDef binascii_module_methods[] = {
1320         {"a2b_uu",     binascii_a2b_uu,     METH_VARARGS, doc_a2b_uu},
1321         {"b2a_uu",     binascii_b2a_uu,     METH_VARARGS, doc_b2a_uu},
1322         {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1323         {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1324         {"a2b_hqx",    binascii_a2b_hqx,    METH_VARARGS, doc_a2b_hqx},
1325         {"b2a_hqx",    binascii_b2a_hqx,    METH_VARARGS, doc_b2a_hqx},
1326         {"b2a_hex",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1327         {"a2b_hex",    binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1328         {"hexlify",    binascii_hexlify,    METH_VARARGS, doc_hexlify},
1329         {"unhexlify",  binascii_unhexlify,  METH_VARARGS, doc_unhexlify},
1330         {"rlecode_hqx",   binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1331         {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1332          doc_rledecode_hqx},
1333         {"crc_hqx",    binascii_crc_hqx,    METH_VARARGS, doc_crc_hqx},
1334         {"crc32",      binascii_crc32,      METH_VARARGS, doc_crc32},
1335         {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1336           doc_a2b_qp},
1337         {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1338           doc_b2a_qp},
1339         {NULL, NULL}                         /* sentinel */
1340 };
1341
1342
1343 /* Initialization function for the module (*must* be called initbinascii) */
1344 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1345
1346 PyMODINIT_FUNC
1347 initbinascii(void)
1348 {
1349         PyObject *m, *d, *x;
1350
1351         /* Create the module and add the functions */
1352         m = Py_InitModule("binascii", binascii_module_methods);
1353         if (m == NULL)
1354                 return;
1355
1356         d = PyModule_GetDict(m);
1357         x = PyBytes_FromString(doc_binascii);
1358         PyDict_SetItemString(d, "__doc__", x);
1359         Py_XDECREF(x);
1360
1361         Error = PyErr_NewException("binascii.Error", NULL, NULL);
1362         PyDict_SetItemString(d, "Error", Error);
1363         Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1364         PyDict_SetItemString(d, "Incomplete", Incomplete);
1365 }