Added information on function name added to LogRecord, and the 'extra' keyword parameter.
[python.git] / Modules / binascii.c
blob4a2c268f2b9170af378eda9939063ba9926ba550
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
44 ** Jack Jansen, CWI, July 1995.
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
53 ** Brandon Long, September 2001.
57 #include "Python.h"
59 static PyObject *Error;
60 static PyObject *Incomplete;
63 ** hqx lookup table, ascii->binary.
66 #define RUNCHAR 0x90
68 #define DONE 0x7F
69 #define SKIP 0x7E
70 #define FAIL 0x7D
72 static unsigned char table_a2b_hqx[256] = {
73 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
74 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
75 /* \b \t \n ^K ^L \r ^N ^O */
76 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
77 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
78 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
80 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
81 /* ! " # $ % & ' */
82 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
83 /* ( ) * + , - . / */
84 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
85 /* 0 1 2 3 4 5 6 7 */
86 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
87 /* 8 9 : ; < = > ? */
88 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
89 /* @ A B C D E F G */
90 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
91 /* H I J K L M N O */
92 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
93 /* P Q R S T U V W */
94 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
95 /* X Y Z [ \ ] ^ _ */
96 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
97 /* ` a b c d e f g */
98 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
99 /* h i j k l m n o */
100 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
101 /* p q r s t u v w */
102 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
103 /* x y z { | } ~ ^? */
104 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
105 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 static unsigned char table_b2a_hqx[] =
124 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
126 static char table_a2b_base64[] = {
127 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
130 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
131 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
132 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
133 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
134 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
137 #define BASE64_PAD '='
139 /* Max binary chunk size; limited only by available memory */
140 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
142 static unsigned char table_b2a_base64[] =
143 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
147 static unsigned short crctab_hqx[256] = {
148 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
149 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
150 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
151 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
152 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
153 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
154 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
155 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
156 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
157 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
158 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
159 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
160 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
161 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
162 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
163 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
164 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
165 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
166 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
167 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
168 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
169 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
170 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
171 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
172 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
173 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
174 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
175 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
176 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
177 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
178 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
179 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
182 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
184 static PyObject *
185 binascii_a2b_uu(PyObject *self, PyObject *args)
187 unsigned char *ascii_data, *bin_data;
188 int leftbits = 0;
189 unsigned char this_ch;
190 unsigned int leftchar = 0;
191 PyObject *rv;
192 int ascii_len, bin_len;
194 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
195 return NULL;
197 /* First byte: binary data length (in bytes) */
198 bin_len = (*ascii_data++ - ' ') & 077;
199 ascii_len--;
201 /* Allocate the buffer */
202 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
203 return NULL;
204 bin_data = (unsigned char *)PyString_AsString(rv);
206 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
207 /* XXX is it really best to add NULs if there's no more data */
208 this_ch = (ascii_len > 0) ? *ascii_data : 0;
209 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
211 ** Whitespace. Assume some spaces got eaten at
212 ** end-of-line. (We check this later)
214 this_ch = 0;
215 } else {
216 /* Check the character for legality
217 ** The 64 in stead of the expected 63 is because
218 ** there are a few uuencodes out there that use
219 ** '`' as zero instead of space.
221 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
222 PyErr_SetString(Error, "Illegal char");
223 Py_DECREF(rv);
224 return NULL;
226 this_ch = (this_ch - ' ') & 077;
229 ** Shift it in on the low end, and see if there's
230 ** a byte ready for output.
232 leftchar = (leftchar << 6) | (this_ch);
233 leftbits += 6;
234 if ( leftbits >= 8 ) {
235 leftbits -= 8;
236 *bin_data++ = (leftchar >> leftbits) & 0xff;
237 leftchar &= ((1 << leftbits) - 1);
238 bin_len--;
242 ** Finally, check that if there's anything left on the line
243 ** that it's whitespace only.
245 while( ascii_len-- > 0 ) {
246 this_ch = *ascii_data++;
247 /* Extra '`' may be written as padding in some cases */
248 if ( this_ch != ' ' && this_ch != ' '+64 &&
249 this_ch != '\n' && this_ch != '\r' ) {
250 PyErr_SetString(Error, "Trailing garbage");
251 Py_DECREF(rv);
252 return NULL;
255 return rv;
258 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
260 static PyObject *
261 binascii_b2a_uu(PyObject *self, PyObject *args)
263 unsigned char *ascii_data, *bin_data;
264 int leftbits = 0;
265 unsigned char this_ch;
266 unsigned int leftchar = 0;
267 PyObject *rv;
268 int bin_len;
270 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
271 return NULL;
272 if ( bin_len > 45 ) {
273 /* The 45 is a limit that appears in all uuencode's */
274 PyErr_SetString(Error, "At most 45 bytes at once");
275 return NULL;
278 /* We're lazy and allocate to much (fixed up later) */
279 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
280 return NULL;
281 ascii_data = (unsigned char *)PyString_AsString(rv);
283 /* Store the length */
284 *ascii_data++ = ' ' + (bin_len & 077);
286 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
287 /* Shift the data (or padding) into our buffer */
288 if ( bin_len > 0 ) /* Data */
289 leftchar = (leftchar << 8) | *bin_data;
290 else /* Padding */
291 leftchar <<= 8;
292 leftbits += 8;
294 /* See if there are 6-bit groups ready */
295 while ( leftbits >= 6 ) {
296 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
297 leftbits -= 6;
298 *ascii_data++ = this_ch + ' ';
301 *ascii_data++ = '\n'; /* Append a courtesy newline */
303 _PyString_Resize(&rv, (ascii_data -
304 (unsigned char *)PyString_AsString(rv)));
305 return rv;
309 static int
310 binascii_find_valid(unsigned char *s, int slen, int num)
312 /* Finds & returns the (num+1)th
313 ** valid character for base64, or -1 if none.
316 int ret = -1;
317 unsigned char c, b64val;
319 while ((slen > 0) && (ret == -1)) {
320 c = *s;
321 b64val = table_a2b_base64[c & 0x7f];
322 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
323 if (num == 0)
324 ret = *s;
325 num--;
328 s++;
329 slen--;
331 return ret;
334 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
336 static PyObject *
337 binascii_a2b_base64(PyObject *self, PyObject *args)
339 unsigned char *ascii_data, *bin_data;
340 int leftbits = 0;
341 unsigned char this_ch;
342 unsigned int leftchar = 0;
343 PyObject *rv;
344 int ascii_len, bin_len;
345 int quad_pos = 0;
347 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
348 return NULL;
350 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
352 /* Allocate the buffer */
353 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
354 return NULL;
355 bin_data = (unsigned char *)PyString_AsString(rv);
356 bin_len = 0;
358 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
359 this_ch = *ascii_data;
361 if (this_ch > 0x7f ||
362 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
363 continue;
365 /* Check for pad sequences and ignore
366 ** the invalid ones.
368 if (this_ch == BASE64_PAD) {
369 if ( (quad_pos < 2) ||
370 ((quad_pos == 2) &&
371 (binascii_find_valid(ascii_data, ascii_len, 1)
372 != BASE64_PAD)) )
374 continue;
376 else {
377 /* A pad sequence means no more input.
378 ** We've already interpreted the data
379 ** from the quad at this point.
381 leftbits = 0;
382 break;
386 this_ch = table_a2b_base64[*ascii_data];
387 if ( this_ch == (unsigned char) -1 )
388 continue;
391 ** Shift it in on the low end, and see if there's
392 ** a byte ready for output.
394 quad_pos = (quad_pos + 1) & 0x03;
395 leftchar = (leftchar << 6) | (this_ch);
396 leftbits += 6;
398 if ( leftbits >= 8 ) {
399 leftbits -= 8;
400 *bin_data++ = (leftchar >> leftbits) & 0xff;
401 bin_len++;
402 leftchar &= ((1 << leftbits) - 1);
406 if (leftbits != 0) {
407 PyErr_SetString(Error, "Incorrect padding");
408 Py_DECREF(rv);
409 return NULL;
412 /* And set string size correctly. If the result string is empty
413 ** (because the input was all invalid) return the shared empty
414 ** string instead; _PyString_Resize() won't do this for us.
416 if (bin_len > 0)
417 _PyString_Resize(&rv, bin_len);
418 else {
419 Py_DECREF(rv);
420 rv = PyString_FromString("");
422 return rv;
425 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
427 static PyObject *
428 binascii_b2a_base64(PyObject *self, PyObject *args)
430 unsigned char *ascii_data, *bin_data;
431 int leftbits = 0;
432 unsigned char this_ch;
433 unsigned int leftchar = 0;
434 PyObject *rv;
435 int bin_len;
437 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
438 return NULL;
439 if ( bin_len > BASE64_MAXBIN ) {
440 PyErr_SetString(Error, "Too much data for base64 line");
441 return NULL;
444 /* We're lazy and allocate too much (fixed up later).
445 "+3" leaves room for up to two pad characters and a trailing
446 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
447 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
448 return NULL;
449 ascii_data = (unsigned char *)PyString_AsString(rv);
451 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
452 /* Shift the data into our buffer */
453 leftchar = (leftchar << 8) | *bin_data;
454 leftbits += 8;
456 /* See if there are 6-bit groups ready */
457 while ( leftbits >= 6 ) {
458 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
459 leftbits -= 6;
460 *ascii_data++ = table_b2a_base64[this_ch];
463 if ( leftbits == 2 ) {
464 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
465 *ascii_data++ = BASE64_PAD;
466 *ascii_data++ = BASE64_PAD;
467 } else if ( leftbits == 4 ) {
468 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
469 *ascii_data++ = BASE64_PAD;
471 *ascii_data++ = '\n'; /* Append a courtesy newline */
473 _PyString_Resize(&rv, (ascii_data -
474 (unsigned char *)PyString_AsString(rv)));
475 return rv;
478 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
480 static PyObject *
481 binascii_a2b_hqx(PyObject *self, PyObject *args)
483 unsigned char *ascii_data, *bin_data;
484 int leftbits = 0;
485 unsigned char this_ch;
486 unsigned int leftchar = 0;
487 PyObject *rv;
488 int len;
489 int done = 0;
491 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
492 return NULL;
494 /* Allocate a string that is too big (fixed later)
495 Add two to the initial length to prevent interning which
496 would preclude subsequent resizing. */
497 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
498 return NULL;
499 bin_data = (unsigned char *)PyString_AsString(rv);
501 for( ; len > 0 ; len--, ascii_data++ ) {
502 /* Get the byte and look it up */
503 this_ch = table_a2b_hqx[*ascii_data];
504 if ( this_ch == SKIP )
505 continue;
506 if ( this_ch == FAIL ) {
507 PyErr_SetString(Error, "Illegal char");
508 Py_DECREF(rv);
509 return NULL;
511 if ( this_ch == DONE ) {
512 /* The terminating colon */
513 done = 1;
514 break;
517 /* Shift it into the buffer and see if any bytes are ready */
518 leftchar = (leftchar << 6) | (this_ch);
519 leftbits += 6;
520 if ( leftbits >= 8 ) {
521 leftbits -= 8;
522 *bin_data++ = (leftchar >> leftbits) & 0xff;
523 leftchar &= ((1 << leftbits) - 1);
527 if ( leftbits && !done ) {
528 PyErr_SetString(Incomplete,
529 "String has incomplete number of bytes");
530 Py_DECREF(rv);
531 return NULL;
533 _PyString_Resize(
534 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
535 if (rv) {
536 PyObject *rrv = Py_BuildValue("Oi", rv, done);
537 Py_DECREF(rv);
538 return rrv;
541 return NULL;
544 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
546 static PyObject *
547 binascii_rlecode_hqx(PyObject *self, PyObject *args)
549 unsigned char *in_data, *out_data;
550 PyObject *rv;
551 unsigned char ch;
552 int in, inend, len;
554 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
555 return NULL;
557 /* Worst case: output is twice as big as input (fixed later) */
558 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
559 return NULL;
560 out_data = (unsigned char *)PyString_AsString(rv);
562 for( in=0; in<len; in++) {
563 ch = in_data[in];
564 if ( ch == RUNCHAR ) {
565 /* RUNCHAR. Escape it. */
566 *out_data++ = RUNCHAR;
567 *out_data++ = 0;
568 } else {
569 /* Check how many following are the same */
570 for(inend=in+1;
571 inend<len && in_data[inend] == ch &&
572 inend < in+255;
573 inend++) ;
574 if ( inend - in > 3 ) {
575 /* More than 3 in a row. Output RLE. */
576 *out_data++ = ch;
577 *out_data++ = RUNCHAR;
578 *out_data++ = inend-in;
579 in = inend-1;
580 } else {
581 /* Less than 3. Output the byte itself */
582 *out_data++ = ch;
586 _PyString_Resize(&rv, (out_data -
587 (unsigned char *)PyString_AsString(rv)));
588 return rv;
591 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
593 static PyObject *
594 binascii_b2a_hqx(PyObject *self, PyObject *args)
596 unsigned char *ascii_data, *bin_data;
597 int leftbits = 0;
598 unsigned char this_ch;
599 unsigned int leftchar = 0;
600 PyObject *rv;
601 int len;
603 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
604 return NULL;
606 /* Allocate a buffer that is at least large enough */
607 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
608 return NULL;
609 ascii_data = (unsigned char *)PyString_AsString(rv);
611 for( ; len > 0 ; len--, bin_data++ ) {
612 /* Shift into our buffer, and output any 6bits ready */
613 leftchar = (leftchar << 8) | *bin_data;
614 leftbits += 8;
615 while ( leftbits >= 6 ) {
616 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
617 leftbits -= 6;
618 *ascii_data++ = table_b2a_hqx[this_ch];
621 /* Output a possible runt byte */
622 if ( leftbits ) {
623 leftchar <<= (6-leftbits);
624 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
626 _PyString_Resize(&rv, (ascii_data -
627 (unsigned char *)PyString_AsString(rv)));
628 return rv;
631 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
633 static PyObject *
634 binascii_rledecode_hqx(PyObject *self, PyObject *args)
636 unsigned char *in_data, *out_data;
637 unsigned char in_byte, in_repeat;
638 PyObject *rv;
639 int in_len, out_len, out_len_left;
641 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
642 return NULL;
644 /* Empty string is a special case */
645 if ( in_len == 0 )
646 return Py_BuildValue("s", "");
648 /* Allocate a buffer of reasonable size. Resized when needed */
649 out_len = in_len*2;
650 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
651 return NULL;
652 out_len_left = out_len;
653 out_data = (unsigned char *)PyString_AsString(rv);
656 ** We need two macros here to get/put bytes and handle
657 ** end-of-buffer for input and output strings.
659 #define INBYTE(b) \
660 do { \
661 if ( --in_len < 0 ) { \
662 PyErr_SetString(Incomplete, ""); \
663 Py_DECREF(rv); \
664 return NULL; \
666 b = *in_data++; \
667 } while(0)
669 #define OUTBYTE(b) \
670 do { \
671 if ( --out_len_left < 0 ) { \
672 _PyString_Resize(&rv, 2*out_len); \
673 if ( rv == NULL ) return NULL; \
674 out_data = (unsigned char *)PyString_AsString(rv) \
675 + out_len; \
676 out_len_left = out_len-1; \
677 out_len = out_len * 2; \
679 *out_data++ = b; \
680 } while(0)
683 ** Handle first byte separately (since we have to get angry
684 ** in case of an orphaned RLE code).
686 INBYTE(in_byte);
688 if (in_byte == RUNCHAR) {
689 INBYTE(in_repeat);
690 if (in_repeat != 0) {
691 /* Note Error, not Incomplete (which is at the end
692 ** of the string only). This is a programmer error.
694 PyErr_SetString(Error, "Orphaned RLE code at start");
695 Py_DECREF(rv);
696 return NULL;
698 OUTBYTE(RUNCHAR);
699 } else {
700 OUTBYTE(in_byte);
703 while( in_len > 0 ) {
704 INBYTE(in_byte);
706 if (in_byte == RUNCHAR) {
707 INBYTE(in_repeat);
708 if ( in_repeat == 0 ) {
709 /* Just an escaped RUNCHAR value */
710 OUTBYTE(RUNCHAR);
711 } else {
712 /* Pick up value and output a sequence of it */
713 in_byte = out_data[-1];
714 while ( --in_repeat > 0 )
715 OUTBYTE(in_byte);
717 } else {
718 /* Normal byte */
719 OUTBYTE(in_byte);
722 _PyString_Resize(&rv, (out_data -
723 (unsigned char *)PyString_AsString(rv)));
724 return rv;
727 PyDoc_STRVAR(doc_crc_hqx,
728 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
730 static PyObject *
731 binascii_crc_hqx(PyObject *self, PyObject *args)
733 unsigned char *bin_data;
734 unsigned int crc;
735 int len;
737 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
738 return NULL;
740 while(len--) {
741 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
744 return Py_BuildValue("i", crc);
747 PyDoc_STRVAR(doc_crc32,
748 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
750 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
751 Also known as: ISO 3307
752 **********************************************************************|
753 * *|
754 * Demonstration program to compute the 32-bit CRC used as the frame *|
755 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
756 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
757 * protocol). The 32-bit FCS was added via the Federal Register, *|
758 * 1 June 1982, p.23798. I presume but don't know for certain that *|
759 * this polynomial is or will be included in CCITT V.41, which *|
760 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
761 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
762 * errors by a factor of 10^-5 over 16-bit FCS. *|
763 * *|
764 **********************************************************************|
766 Copyright (C) 1986 Gary S. Brown. You may use this program, or
767 code or tables extracted from it, as desired without restriction.
769 First, the polynomial itself and its table of feedback terms. The
770 polynomial is
771 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
772 Note that we take it "backwards" and put the highest-order term in
773 the lowest-order bit. The X^32 term is "implied"; the LSB is the
774 X^31 term, etc. The X^0 term (usually shown as "+1") results in
775 the MSB being 1.
777 Note that the usual hardware shift register implementation, which
778 is what we're using (we're merely optimizing it by doing eight-bit
779 chunks at a time) shifts bits into the lowest-order term. In our
780 implementation, that means shifting towards the right. Why do we
781 do it this way? Because the calculated CRC must be transmitted in
782 order from highest-order term to lowest-order term. UARTs transmit
783 characters in order from LSB to MSB. By storing the CRC this way,
784 we hand it to the UART in the order low-byte to high-byte; the UART
785 sends each low-bit to hight-bit; and the result is transmission bit
786 by bit from highest- to lowest-order term without requiring any bit
787 shuffling on our part. Reception works similarly.
789 The feedback terms table consists of 256, 32-bit entries. Notes:
791 1. The table can be generated at runtime if desired; code to do so
792 is shown later. It might not be obvious, but the feedback
793 terms simply represent the results of eight shift/xor opera-
794 tions for all combinations of data and CRC register values.
796 2. The CRC accumulation logic is the same for all CRC polynomials,
797 be they sixteen or thirty-two bits wide. You simply choose the
798 appropriate table. Alternatively, because the table can be
799 generated at runtime, you can start by generating the table for
800 the polynomial in question and use exactly the same "updcrc",
801 if your application needn't simultaneously handle two CRC
802 polynomials. (Note, however, that XMODEM is strange.)
804 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
805 of course, 32-bit entries work OK if the high 16 bits are zero.
807 4. The values must be right-shifted by eight bits by the "updcrc"
808 logic; the shift must be unsigned (bring in zeroes). On some
809 hardware you could probably optimize the shift in assembler by
810 using byte-swap instructions.
811 ********************************************************************/
813 static unsigned long crc_32_tab[256] = {
814 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
815 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
816 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
817 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
818 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
819 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
820 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
821 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
822 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
823 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
824 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
825 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
826 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
827 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
828 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
829 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
830 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
831 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
832 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
833 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
834 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
835 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
836 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
837 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
838 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
839 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
840 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
841 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
842 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
843 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
844 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
845 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
846 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
847 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
848 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
849 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
850 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
851 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
852 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
853 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
854 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
855 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
856 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
857 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
858 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
859 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
860 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
861 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
862 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
863 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
864 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
865 0x2d02ef8dUL
868 static PyObject *
869 binascii_crc32(PyObject *self, PyObject *args)
870 { /* By Jim Ahlstrom; All rights transferred to CNRI */
871 unsigned char *bin_data;
872 unsigned long crc = 0UL; /* initial value of CRC */
873 int len;
874 long result;
876 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
877 return NULL;
879 crc = ~ crc;
880 #if SIZEOF_LONG > 4
881 /* only want the trailing 32 bits */
882 crc &= 0xFFFFFFFFUL;
883 #endif
884 while (len--)
885 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
886 /* Note: (crc >> 8) MUST zero fill on left */
888 result = (long)(crc ^ 0xFFFFFFFFUL);
889 #if SIZEOF_LONG > 4
890 /* Extend the sign bit. This is one way to ensure the result is the
891 * same across platforms. The other way would be to return an
892 * unbounded unsigned long, but the evidence suggests that lots of
893 * code outside this treats the result as if it were a signed 4-byte
894 * integer.
896 result |= -(result & (1L << 31));
897 #endif
898 return PyInt_FromLong(result);
902 static PyObject *
903 binascii_hexlify(PyObject *self, PyObject *args)
905 char* argbuf;
906 int arglen;
907 PyObject *retval;
908 char* retbuf;
909 int i, j;
911 if (!PyArg_ParseTuple(args, "t#:b2a_hex", &argbuf, &arglen))
912 return NULL;
914 retval = PyString_FromStringAndSize(NULL, arglen*2);
915 if (!retval)
916 return NULL;
917 retbuf = PyString_AsString(retval);
918 if (!retbuf)
919 goto finally;
921 /* make hex version of string, taken from shamodule.c */
922 for (i=j=0; i < arglen; i++) {
923 char c;
924 c = (argbuf[i] >> 4) & 0xf;
925 c = (c>9) ? c+'a'-10 : c + '0';
926 retbuf[j++] = c;
927 c = argbuf[i] & 0xf;
928 c = (c>9) ? c+'a'-10 : c + '0';
929 retbuf[j++] = c;
931 return retval;
933 finally:
934 Py_DECREF(retval);
935 return NULL;
938 PyDoc_STRVAR(doc_hexlify,
939 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
941 This function is also available as \"hexlify()\".");
944 static int
945 to_int(int c)
947 if (isdigit(c))
948 return c - '0';
949 else {
950 if (isupper(c))
951 c = tolower(c);
952 if (c >= 'a' && c <= 'f')
953 return c - 'a' + 10;
955 return -1;
959 static PyObject *
960 binascii_unhexlify(PyObject *self, PyObject *args)
962 char* argbuf;
963 int arglen;
964 PyObject *retval;
965 char* retbuf;
966 int i, j;
968 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
969 return NULL;
971 /* XXX What should we do about strings with an odd length? Should
972 * we add an implicit leading zero, or a trailing zero? For now,
973 * raise an exception.
975 if (arglen % 2) {
976 PyErr_SetString(PyExc_TypeError, "Odd-length string");
977 return NULL;
980 retval = PyString_FromStringAndSize(NULL, (arglen/2));
981 if (!retval)
982 return NULL;
983 retbuf = PyString_AsString(retval);
984 if (!retbuf)
985 goto finally;
987 for (i=j=0; i < arglen; i += 2) {
988 int top = to_int(Py_CHARMASK(argbuf[i]));
989 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
990 if (top == -1 || bot == -1) {
991 PyErr_SetString(PyExc_TypeError,
992 "Non-hexadecimal digit found");
993 goto finally;
995 retbuf[j++] = (top << 4) + bot;
997 return retval;
999 finally:
1000 Py_DECREF(retval);
1001 return NULL;
1004 PyDoc_STRVAR(doc_unhexlify,
1005 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1007 hexstr must contain an even number of hex digits (upper or lower case).\n\
1008 This function is also available as \"unhexlify()\"");
1010 static int table_hex[128] = {
1011 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1012 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1013 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1014 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1015 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1016 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1017 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1018 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1021 #define hexval(c) table_hex[(unsigned int)(c)]
1023 #define MAXLINESIZE 76
1025 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1027 static PyObject*
1028 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1030 unsigned int in, out;
1031 char ch;
1032 unsigned char *data, *odata;
1033 unsigned int datalen = 0;
1034 PyObject *rv;
1035 static const char *kwlist[] = {"data", "header", NULL};
1036 int header = 0;
1038 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1039 &datalen, &header))
1040 return NULL;
1042 /* We allocate the output same size as input, this is overkill.
1043 * The previous implementation used calloc() so we'll zero out the
1044 * memory here too, since PyMem_Malloc() does not guarantee that.
1046 odata = (unsigned char *) PyMem_Malloc(datalen);
1047 if (odata == NULL) {
1048 PyErr_NoMemory();
1049 return NULL;
1051 memset(odata, 0, datalen);
1053 in = out = 0;
1054 while (in < datalen) {
1055 if (data[in] == '=') {
1056 in++;
1057 if (in >= datalen) break;
1058 /* Soft line breaks */
1059 if ((data[in] == '\n') || (data[in] == '\r') ||
1060 (data[in] == ' ') || (data[in] == '\t')) {
1061 if (data[in] != '\n') {
1062 while (in < datalen && data[in] != '\n') in++;
1064 if (in < datalen) in++;
1066 else if (data[in] == '=') {
1067 /* broken case from broken python qp */
1068 odata[out++] = '=';
1069 in++;
1071 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1072 (data[in] >= 'a' && data[in] <= 'f') ||
1073 (data[in] >= '0' && data[in] <= '9')) &&
1074 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1075 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1076 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1077 /* hexval */
1078 ch = hexval(data[in]) << 4;
1079 in++;
1080 ch |= hexval(data[in]);
1081 in++;
1082 odata[out++] = ch;
1084 else {
1085 odata[out++] = '=';
1088 else if (header && data[in] == '_') {
1089 odata[out++] = ' ';
1090 in++;
1092 else {
1093 odata[out] = data[in];
1094 in++;
1095 out++;
1098 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1099 PyMem_Free(odata);
1100 return NULL;
1102 PyMem_Free(odata);
1103 return rv;
1106 static int
1107 to_hex (unsigned char ch, unsigned char *s)
1109 unsigned int uvalue = ch;
1111 s[1] = "0123456789ABCDEF"[uvalue % 16];
1112 uvalue = (uvalue / 16);
1113 s[0] = "0123456789ABCDEF"[uvalue % 16];
1114 return 0;
1117 PyDoc_STRVAR(doc_b2a_qp,
1118 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1119 Encode a string using quoted-printable encoding. \n\
1121 On encoding, when istext is set, newlines are not encoded, and white \n\
1122 space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1123 both encoded. When quotetabs is set, space and tabs are encoded.");
1125 /* XXX: This is ridiculously complicated to be backward compatible
1126 * (mostly) with the quopri module. It doesn't re-create the quopri
1127 * module bug where text ending in CRLF has the CR encoded */
1128 static PyObject*
1129 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1131 unsigned int in, out;
1132 unsigned char *data, *odata;
1133 unsigned int datalen = 0, odatalen = 0;
1134 PyObject *rv;
1135 unsigned int linelen = 0;
1136 static const char *kwlist[] = {"data", "quotetabs", "istext",
1137 "header", NULL};
1138 int istext = 1;
1139 int quotetabs = 0;
1140 int header = 0;
1141 unsigned char ch;
1142 int crlf = 0;
1143 unsigned char *p;
1145 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1146 &datalen, &quotetabs, &istext, &header))
1147 return NULL;
1149 /* See if this string is using CRLF line ends */
1150 /* XXX: this function has the side effect of converting all of
1151 * the end of lines to be the same depending on this detection
1152 * here */
1153 p = (unsigned char *) strchr((char *)data, '\n');
1154 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1155 crlf = 1;
1157 /* First, scan to see how many characters need to be encoded */
1158 in = 0;
1159 while (in < datalen) {
1160 if ((data[in] > 126) ||
1161 (data[in] == '=') ||
1162 (header && data[in] == '_') ||
1163 ((data[in] == '.') && (linelen == 1)) ||
1164 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1165 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1166 ((data[in] < 33) &&
1167 (data[in] != '\r') && (data[in] != '\n') &&
1168 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1170 if ((linelen + 3) >= MAXLINESIZE) {
1171 linelen = 0;
1172 if (crlf)
1173 odatalen += 3;
1174 else
1175 odatalen += 2;
1177 linelen += 3;
1178 odatalen += 3;
1179 in++;
1181 else {
1182 if (istext &&
1183 ((data[in] == '\n') ||
1184 ((in+1 < datalen) && (data[in] == '\r') &&
1185 (data[in+1] == '\n'))))
1187 linelen = 0;
1188 /* Protect against whitespace on end of line */
1189 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1190 odatalen += 2;
1191 if (crlf)
1192 odatalen += 2;
1193 else
1194 odatalen += 1;
1195 if (data[in] == '\r')
1196 in += 2;
1197 else
1198 in++;
1200 else {
1201 if ((in + 1 != datalen) &&
1202 (data[in+1] != '\n') &&
1203 (linelen + 1) >= MAXLINESIZE) {
1204 linelen = 0;
1205 if (crlf)
1206 odatalen += 3;
1207 else
1208 odatalen += 2;
1210 linelen++;
1211 odatalen++;
1212 in++;
1217 /* We allocate the output same size as input, this is overkill.
1218 * The previous implementation used calloc() so we'll zero out the
1219 * memory here too, since PyMem_Malloc() does not guarantee that.
1221 odata = (unsigned char *) PyMem_Malloc(odatalen);
1222 if (odata == NULL) {
1223 PyErr_NoMemory();
1224 return NULL;
1226 memset(odata, 0, odatalen);
1228 in = out = linelen = 0;
1229 while (in < datalen) {
1230 if ((data[in] > 126) ||
1231 (data[in] == '=') ||
1232 (header && data[in] == '_') ||
1233 ((data[in] == '.') && (linelen == 1)) ||
1234 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1235 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1236 ((data[in] < 33) &&
1237 (data[in] != '\r') && (data[in] != '\n') &&
1238 (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
1240 if ((linelen + 3 )>= MAXLINESIZE) {
1241 odata[out++] = '=';
1242 if (crlf) odata[out++] = '\r';
1243 odata[out++] = '\n';
1244 linelen = 0;
1246 odata[out++] = '=';
1247 to_hex(data[in], &odata[out]);
1248 out += 2;
1249 in++;
1250 linelen += 3;
1252 else {
1253 if (istext &&
1254 ((data[in] == '\n') ||
1255 ((in+1 < datalen) && (data[in] == '\r') &&
1256 (data[in+1] == '\n'))))
1258 linelen = 0;
1259 /* Protect against whitespace on end of line */
1260 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1261 ch = odata[out-1];
1262 odata[out-1] = '=';
1263 to_hex(ch, &odata[out]);
1264 out += 2;
1267 if (crlf) odata[out++] = '\r';
1268 odata[out++] = '\n';
1269 if (data[in] == '\r')
1270 in += 2;
1271 else
1272 in++;
1274 else {
1275 if ((in + 1 != datalen) &&
1276 (data[in+1] != '\n') &&
1277 (linelen + 1) >= MAXLINESIZE) {
1278 odata[out++] = '=';
1279 if (crlf) odata[out++] = '\r';
1280 odata[out++] = '\n';
1281 linelen = 0;
1283 linelen++;
1284 if (header && data[in] == ' ') {
1285 odata[out++] = '_';
1286 in++;
1288 else {
1289 odata[out++] = data[in++];
1294 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1295 PyMem_Free(odata);
1296 return NULL;
1298 PyMem_Free(odata);
1299 return rv;
1302 /* List of functions defined in the module */
1304 static struct PyMethodDef binascii_module_methods[] = {
1305 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1306 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1307 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1308 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1309 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1310 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1311 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1312 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1313 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1314 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1315 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1316 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1317 doc_rledecode_hqx},
1318 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1319 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1320 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1321 doc_a2b_qp},
1322 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1323 doc_b2a_qp},
1324 {NULL, NULL} /* sentinel */
1328 /* Initialization function for the module (*must* be called initbinascii) */
1329 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1331 PyMODINIT_FUNC
1332 initbinascii(void)
1334 PyObject *m, *d, *x;
1336 /* Create the module and add the functions */
1337 m = Py_InitModule("binascii", binascii_module_methods);
1338 if (m == NULL)
1339 return;
1341 d = PyModule_GetDict(m);
1342 x = PyString_FromString(doc_binascii);
1343 PyDict_SetItemString(d, "__doc__", x);
1344 Py_XDECREF(x);
1346 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1347 PyDict_SetItemString(d, "Error", Error);
1348 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1349 PyDict_SetItemString(d, "Incomplete", Incomplete);