Added section on passing contextual information to logging and documentation for...
[python.git] / Modules / binascii.c
blob00f950d19da426bc4d282004f3be270b0e1ee248
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
44 ** Jack Jansen, CWI, July 1995.
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
53 ** Brandon Long, September 2001.
56 #define PY_SSIZE_T_CLEAN
58 #include "Python.h"
60 static PyObject *Error;
61 static PyObject *Incomplete;
64 ** hqx lookup table, ascii->binary.
67 #define RUNCHAR 0x90
69 #define DONE 0x7F
70 #define SKIP 0x7E
71 #define FAIL 0x7D
73 static unsigned char table_a2b_hqx[256] = {
74 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
75 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76 /* \b \t \n ^K ^L \r ^N ^O */
77 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
79 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82 /* ! " # $ % & ' */
83 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84 /* ( ) * + , - . / */
85 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86 /* 0 1 2 3 4 5 6 7 */
87 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88 /* 8 9 : ; < = > ? */
89 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90 /* @ A B C D E F G */
91 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92 /* H I J K L M N O */
93 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94 /* P Q R S T U V W */
95 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96 /* X Y Z [ \ ] ^ _ */
97 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98 /* ` a b c d e f g */
99 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100 /* h i j k l m n o */
101 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102 /* p q r s t u v w */
103 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104 /* x y z { | } ~ ^? */
105 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 static unsigned char table_b2a_hqx[] =
125 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
127 static char table_a2b_base64[] = {
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
138 #define BASE64_PAD '='
140 /* Max binary chunk size; limited only by available memory */
141 #define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
143 static unsigned char table_b2a_base64[] =
144 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
148 static unsigned short crctab_hqx[256] = {
149 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
183 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
185 static PyObject *
186 binascii_a2b_uu(PyObject *self, PyObject *args)
188 unsigned char *ascii_data, *bin_data;
189 int leftbits = 0;
190 unsigned char this_ch;
191 unsigned int leftchar = 0;
192 PyObject *rv;
193 Py_ssize_t ascii_len, bin_len;
195 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
196 return NULL;
198 /* First byte: binary data length (in bytes) */
199 bin_len = (*ascii_data++ - ' ') & 077;
200 ascii_len--;
202 /* Allocate the buffer */
203 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
204 return NULL;
205 bin_data = (unsigned char *)PyString_AsString(rv);
207 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
208 /* XXX is it really best to add NULs if there's no more data */
209 this_ch = (ascii_len > 0) ? *ascii_data : 0;
210 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
212 ** Whitespace. Assume some spaces got eaten at
213 ** end-of-line. (We check this later)
215 this_ch = 0;
216 } else {
217 /* Check the character for legality
218 ** The 64 in stead of the expected 63 is because
219 ** there are a few uuencodes out there that use
220 ** '`' as zero instead of space.
222 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
223 PyErr_SetString(Error, "Illegal char");
224 Py_DECREF(rv);
225 return NULL;
227 this_ch = (this_ch - ' ') & 077;
230 ** Shift it in on the low end, and see if there's
231 ** a byte ready for output.
233 leftchar = (leftchar << 6) | (this_ch);
234 leftbits += 6;
235 if ( leftbits >= 8 ) {
236 leftbits -= 8;
237 *bin_data++ = (leftchar >> leftbits) & 0xff;
238 leftchar &= ((1 << leftbits) - 1);
239 bin_len--;
243 ** Finally, check that if there's anything left on the line
244 ** that it's whitespace only.
246 while( ascii_len-- > 0 ) {
247 this_ch = *ascii_data++;
248 /* Extra '`' may be written as padding in some cases */
249 if ( this_ch != ' ' && this_ch != ' '+64 &&
250 this_ch != '\n' && this_ch != '\r' ) {
251 PyErr_SetString(Error, "Trailing garbage");
252 Py_DECREF(rv);
253 return NULL;
256 return rv;
259 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
261 static PyObject *
262 binascii_b2a_uu(PyObject *self, PyObject *args)
264 unsigned char *ascii_data, *bin_data;
265 int leftbits = 0;
266 unsigned char this_ch;
267 unsigned int leftchar = 0;
268 PyObject *rv;
269 Py_ssize_t bin_len;
271 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
272 return NULL;
273 if ( bin_len > 45 ) {
274 /* The 45 is a limit that appears in all uuencode's */
275 PyErr_SetString(Error, "At most 45 bytes at once");
276 return NULL;
279 /* We're lazy and allocate to much (fixed up later) */
280 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
281 return NULL;
282 ascii_data = (unsigned char *)PyString_AsString(rv);
284 /* Store the length */
285 *ascii_data++ = ' ' + (bin_len & 077);
287 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
288 /* Shift the data (or padding) into our buffer */
289 if ( bin_len > 0 ) /* Data */
290 leftchar = (leftchar << 8) | *bin_data;
291 else /* Padding */
292 leftchar <<= 8;
293 leftbits += 8;
295 /* See if there are 6-bit groups ready */
296 while ( leftbits >= 6 ) {
297 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
298 leftbits -= 6;
299 *ascii_data++ = this_ch + ' ';
302 *ascii_data++ = '\n'; /* Append a courtesy newline */
304 _PyString_Resize(&rv, (ascii_data -
305 (unsigned char *)PyString_AsString(rv)));
306 return rv;
310 static int
311 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
313 /* Finds & returns the (num+1)th
314 ** valid character for base64, or -1 if none.
317 int ret = -1;
318 unsigned char c, b64val;
320 while ((slen > 0) && (ret == -1)) {
321 c = *s;
322 b64val = table_a2b_base64[c & 0x7f];
323 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
324 if (num == 0)
325 ret = *s;
326 num--;
329 s++;
330 slen--;
332 return ret;
335 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
337 static PyObject *
338 binascii_a2b_base64(PyObject *self, PyObject *args)
340 unsigned char *ascii_data, *bin_data;
341 int leftbits = 0;
342 unsigned char this_ch;
343 unsigned int leftchar = 0;
344 PyObject *rv;
345 Py_ssize_t ascii_len, bin_len;
346 int quad_pos = 0;
348 if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
349 return NULL;
351 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
353 /* Allocate the buffer */
354 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
355 return NULL;
356 bin_data = (unsigned char *)PyString_AsString(rv);
357 bin_len = 0;
359 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
360 this_ch = *ascii_data;
362 if (this_ch > 0x7f ||
363 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
364 continue;
366 /* Check for pad sequences and ignore
367 ** the invalid ones.
369 if (this_ch == BASE64_PAD) {
370 if ( (quad_pos < 2) ||
371 ((quad_pos == 2) &&
372 (binascii_find_valid(ascii_data, ascii_len, 1)
373 != BASE64_PAD)) )
375 continue;
377 else {
378 /* A pad sequence means no more input.
379 ** We've already interpreted the data
380 ** from the quad at this point.
382 leftbits = 0;
383 break;
387 this_ch = table_a2b_base64[*ascii_data];
388 if ( this_ch == (unsigned char) -1 )
389 continue;
392 ** Shift it in on the low end, and see if there's
393 ** a byte ready for output.
395 quad_pos = (quad_pos + 1) & 0x03;
396 leftchar = (leftchar << 6) | (this_ch);
397 leftbits += 6;
399 if ( leftbits >= 8 ) {
400 leftbits -= 8;
401 *bin_data++ = (leftchar >> leftbits) & 0xff;
402 bin_len++;
403 leftchar &= ((1 << leftbits) - 1);
407 if (leftbits != 0) {
408 PyErr_SetString(Error, "Incorrect padding");
409 Py_DECREF(rv);
410 return NULL;
413 /* And set string size correctly. If the result string is empty
414 ** (because the input was all invalid) return the shared empty
415 ** string instead; _PyString_Resize() won't do this for us.
417 if (bin_len > 0)
418 _PyString_Resize(&rv, bin_len);
419 else {
420 Py_DECREF(rv);
421 rv = PyString_FromString("");
423 return rv;
426 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
428 static PyObject *
429 binascii_b2a_base64(PyObject *self, PyObject *args)
431 unsigned char *ascii_data, *bin_data;
432 int leftbits = 0;
433 unsigned char this_ch;
434 unsigned int leftchar = 0;
435 PyObject *rv;
436 Py_ssize_t bin_len;
438 if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
439 return NULL;
440 if ( bin_len > BASE64_MAXBIN ) {
441 PyErr_SetString(Error, "Too much data for base64 line");
442 return NULL;
445 /* We're lazy and allocate too much (fixed up later).
446 "+3" leaves room for up to two pad characters and a trailing
447 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
448 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
449 return NULL;
450 ascii_data = (unsigned char *)PyString_AsString(rv);
452 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
453 /* Shift the data into our buffer */
454 leftchar = (leftchar << 8) | *bin_data;
455 leftbits += 8;
457 /* See if there are 6-bit groups ready */
458 while ( leftbits >= 6 ) {
459 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
460 leftbits -= 6;
461 *ascii_data++ = table_b2a_base64[this_ch];
464 if ( leftbits == 2 ) {
465 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
466 *ascii_data++ = BASE64_PAD;
467 *ascii_data++ = BASE64_PAD;
468 } else if ( leftbits == 4 ) {
469 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
470 *ascii_data++ = BASE64_PAD;
472 *ascii_data++ = '\n'; /* Append a courtesy newline */
474 _PyString_Resize(&rv, (ascii_data -
475 (unsigned char *)PyString_AsString(rv)));
476 return rv;
479 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
481 static PyObject *
482 binascii_a2b_hqx(PyObject *self, PyObject *args)
484 unsigned char *ascii_data, *bin_data;
485 int leftbits = 0;
486 unsigned char this_ch;
487 unsigned int leftchar = 0;
488 PyObject *rv;
489 Py_ssize_t len;
490 int done = 0;
492 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
493 return NULL;
495 /* Allocate a string that is too big (fixed later)
496 Add two to the initial length to prevent interning which
497 would preclude subsequent resizing. */
498 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
499 return NULL;
500 bin_data = (unsigned char *)PyString_AsString(rv);
502 for( ; len > 0 ; len--, ascii_data++ ) {
503 /* Get the byte and look it up */
504 this_ch = table_a2b_hqx[*ascii_data];
505 if ( this_ch == SKIP )
506 continue;
507 if ( this_ch == FAIL ) {
508 PyErr_SetString(Error, "Illegal char");
509 Py_DECREF(rv);
510 return NULL;
512 if ( this_ch == DONE ) {
513 /* The terminating colon */
514 done = 1;
515 break;
518 /* Shift it into the buffer and see if any bytes are ready */
519 leftchar = (leftchar << 6) | (this_ch);
520 leftbits += 6;
521 if ( leftbits >= 8 ) {
522 leftbits -= 8;
523 *bin_data++ = (leftchar >> leftbits) & 0xff;
524 leftchar &= ((1 << leftbits) - 1);
528 if ( leftbits && !done ) {
529 PyErr_SetString(Incomplete,
530 "String has incomplete number of bytes");
531 Py_DECREF(rv);
532 return NULL;
534 _PyString_Resize(
535 &rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
536 if (rv) {
537 PyObject *rrv = Py_BuildValue("Oi", rv, done);
538 Py_DECREF(rv);
539 return rrv;
542 return NULL;
545 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
547 static PyObject *
548 binascii_rlecode_hqx(PyObject *self, PyObject *args)
550 unsigned char *in_data, *out_data;
551 PyObject *rv;
552 unsigned char ch;
553 Py_ssize_t in, inend, len;
555 if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
556 return NULL;
558 /* Worst case: output is twice as big as input (fixed later) */
559 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
560 return NULL;
561 out_data = (unsigned char *)PyString_AsString(rv);
563 for( in=0; in<len; in++) {
564 ch = in_data[in];
565 if ( ch == RUNCHAR ) {
566 /* RUNCHAR. Escape it. */
567 *out_data++ = RUNCHAR;
568 *out_data++ = 0;
569 } else {
570 /* Check how many following are the same */
571 for(inend=in+1;
572 inend<len && in_data[inend] == ch &&
573 inend < in+255;
574 inend++) ;
575 if ( inend - in > 3 ) {
576 /* More than 3 in a row. Output RLE. */
577 *out_data++ = ch;
578 *out_data++ = RUNCHAR;
579 *out_data++ = inend-in;
580 in = inend-1;
581 } else {
582 /* Less than 3. Output the byte itself */
583 *out_data++ = ch;
587 _PyString_Resize(&rv, (out_data -
588 (unsigned char *)PyString_AsString(rv)));
589 return rv;
592 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
594 static PyObject *
595 binascii_b2a_hqx(PyObject *self, PyObject *args)
597 unsigned char *ascii_data, *bin_data;
598 int leftbits = 0;
599 unsigned char this_ch;
600 unsigned int leftchar = 0;
601 PyObject *rv;
602 Py_ssize_t len;
604 if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
605 return NULL;
607 /* Allocate a buffer that is at least large enough */
608 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
609 return NULL;
610 ascii_data = (unsigned char *)PyString_AsString(rv);
612 for( ; len > 0 ; len--, bin_data++ ) {
613 /* Shift into our buffer, and output any 6bits ready */
614 leftchar = (leftchar << 8) | *bin_data;
615 leftbits += 8;
616 while ( leftbits >= 6 ) {
617 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
618 leftbits -= 6;
619 *ascii_data++ = table_b2a_hqx[this_ch];
622 /* Output a possible runt byte */
623 if ( leftbits ) {
624 leftchar <<= (6-leftbits);
625 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
627 _PyString_Resize(&rv, (ascii_data -
628 (unsigned char *)PyString_AsString(rv)));
629 return rv;
632 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
634 static PyObject *
635 binascii_rledecode_hqx(PyObject *self, PyObject *args)
637 unsigned char *in_data, *out_data;
638 unsigned char in_byte, in_repeat;
639 PyObject *rv;
640 Py_ssize_t in_len, out_len, out_len_left;
642 if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
643 return NULL;
645 /* Empty string is a special case */
646 if ( in_len == 0 )
647 return PyString_FromString("");
649 /* Allocate a buffer of reasonable size. Resized when needed */
650 out_len = in_len*2;
651 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
652 return NULL;
653 out_len_left = out_len;
654 out_data = (unsigned char *)PyString_AsString(rv);
657 ** We need two macros here to get/put bytes and handle
658 ** end-of-buffer for input and output strings.
660 #define INBYTE(b) \
661 do { \
662 if ( --in_len < 0 ) { \
663 PyErr_SetString(Incomplete, ""); \
664 Py_DECREF(rv); \
665 return NULL; \
667 b = *in_data++; \
668 } while(0)
670 #define OUTBYTE(b) \
671 do { \
672 if ( --out_len_left < 0 ) { \
673 _PyString_Resize(&rv, 2*out_len); \
674 if ( rv == NULL ) return NULL; \
675 out_data = (unsigned char *)PyString_AsString(rv) \
676 + out_len; \
677 out_len_left = out_len-1; \
678 out_len = out_len * 2; \
680 *out_data++ = b; \
681 } while(0)
684 ** Handle first byte separately (since we have to get angry
685 ** in case of an orphaned RLE code).
687 INBYTE(in_byte);
689 if (in_byte == RUNCHAR) {
690 INBYTE(in_repeat);
691 if (in_repeat != 0) {
692 /* Note Error, not Incomplete (which is at the end
693 ** of the string only). This is a programmer error.
695 PyErr_SetString(Error, "Orphaned RLE code at start");
696 Py_DECREF(rv);
697 return NULL;
699 OUTBYTE(RUNCHAR);
700 } else {
701 OUTBYTE(in_byte);
704 while( in_len > 0 ) {
705 INBYTE(in_byte);
707 if (in_byte == RUNCHAR) {
708 INBYTE(in_repeat);
709 if ( in_repeat == 0 ) {
710 /* Just an escaped RUNCHAR value */
711 OUTBYTE(RUNCHAR);
712 } else {
713 /* Pick up value and output a sequence of it */
714 in_byte = out_data[-1];
715 while ( --in_repeat > 0 )
716 OUTBYTE(in_byte);
718 } else {
719 /* Normal byte */
720 OUTBYTE(in_byte);
723 _PyString_Resize(&rv, (out_data -
724 (unsigned char *)PyString_AsString(rv)));
725 return rv;
728 PyDoc_STRVAR(doc_crc_hqx,
729 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
731 static PyObject *
732 binascii_crc_hqx(PyObject *self, PyObject *args)
734 unsigned char *bin_data;
735 unsigned int crc;
736 Py_ssize_t len;
738 if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
739 return NULL;
741 while(len--) {
742 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
745 return Py_BuildValue("i", crc);
748 PyDoc_STRVAR(doc_crc32,
749 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
751 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
752 Also known as: ISO 3307
753 **********************************************************************|
754 * *|
755 * Demonstration program to compute the 32-bit CRC used as the frame *|
756 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
757 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
758 * protocol). The 32-bit FCS was added via the Federal Register, *|
759 * 1 June 1982, p.23798. I presume but don't know for certain that *|
760 * this polynomial is or will be included in CCITT V.41, which *|
761 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
762 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
763 * errors by a factor of 10^-5 over 16-bit FCS. *|
764 * *|
765 **********************************************************************|
767 Copyright (C) 1986 Gary S. Brown. You may use this program, or
768 code or tables extracted from it, as desired without restriction.
770 First, the polynomial itself and its table of feedback terms. The
771 polynomial is
772 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
773 Note that we take it "backwards" and put the highest-order term in
774 the lowest-order bit. The X^32 term is "implied"; the LSB is the
775 X^31 term, etc. The X^0 term (usually shown as "+1") results in
776 the MSB being 1.
778 Note that the usual hardware shift register implementation, which
779 is what we're using (we're merely optimizing it by doing eight-bit
780 chunks at a time) shifts bits into the lowest-order term. In our
781 implementation, that means shifting towards the right. Why do we
782 do it this way? Because the calculated CRC must be transmitted in
783 order from highest-order term to lowest-order term. UARTs transmit
784 characters in order from LSB to MSB. By storing the CRC this way,
785 we hand it to the UART in the order low-byte to high-byte; the UART
786 sends each low-bit to hight-bit; and the result is transmission bit
787 by bit from highest- to lowest-order term without requiring any bit
788 shuffling on our part. Reception works similarly.
790 The feedback terms table consists of 256, 32-bit entries. Notes:
792 1. The table can be generated at runtime if desired; code to do so
793 is shown later. It might not be obvious, but the feedback
794 terms simply represent the results of eight shift/xor opera-
795 tions for all combinations of data and CRC register values.
797 2. The CRC accumulation logic is the same for all CRC polynomials,
798 be they sixteen or thirty-two bits wide. You simply choose the
799 appropriate table. Alternatively, because the table can be
800 generated at runtime, you can start by generating the table for
801 the polynomial in question and use exactly the same "updcrc",
802 if your application needn't simultaneously handle two CRC
803 polynomials. (Note, however, that XMODEM is strange.)
805 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
806 of course, 32-bit entries work OK if the high 16 bits are zero.
808 4. The values must be right-shifted by eight bits by the "updcrc"
809 logic; the shift must be unsigned (bring in zeroes). On some
810 hardware you could probably optimize the shift in assembler by
811 using byte-swap instructions.
812 ********************************************************************/
814 static unsigned long crc_32_tab[256] = {
815 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
816 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
817 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
818 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
819 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
820 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
821 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
822 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
823 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
824 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
825 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
826 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
827 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
828 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
829 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
830 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
831 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
832 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
833 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
834 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
835 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
836 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
837 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
838 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
839 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
840 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
841 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
842 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
843 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
844 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
845 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
846 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
847 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
848 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
849 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
850 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
851 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
852 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
853 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
854 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
855 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
856 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
857 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
858 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
859 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
860 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
861 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
862 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
863 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
864 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
865 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
866 0x2d02ef8dUL
869 static PyObject *
870 binascii_crc32(PyObject *self, PyObject *args)
871 { /* By Jim Ahlstrom; All rights transferred to CNRI */
872 unsigned char *bin_data;
873 unsigned long crc = 0UL; /* initial value of CRC */
874 Py_ssize_t len;
875 long result;
877 if ( !PyArg_ParseTuple(args, "s#|l:crc32", &bin_data, &len, &crc) )
878 return NULL;
880 crc = ~ crc;
881 #if SIZEOF_LONG > 4
882 /* only want the trailing 32 bits */
883 crc &= 0xFFFFFFFFUL;
884 #endif
885 while (len--)
886 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
887 /* Note: (crc >> 8) MUST zero fill on left */
889 result = (long)(crc ^ 0xFFFFFFFFUL);
890 #if SIZEOF_LONG > 4
891 /* Extend the sign bit. This is one way to ensure the result is the
892 * same across platforms. The other way would be to return an
893 * unbounded unsigned long, but the evidence suggests that lots of
894 * code outside this treats the result as if it were a signed 4-byte
895 * integer.
897 result |= -(result & (1L << 31));
898 #endif
899 return PyInt_FromLong(result);
903 static PyObject *
904 binascii_hexlify(PyObject *self, PyObject *args)
906 char* argbuf;
907 Py_ssize_t arglen;
908 PyObject *retval;
909 char* retbuf;
910 Py_ssize_t i, j;
912 if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
913 return NULL;
915 retval = PyString_FromStringAndSize(NULL, arglen*2);
916 if (!retval)
917 return NULL;
918 retbuf = PyString_AsString(retval);
919 if (!retbuf)
920 goto finally;
922 /* make hex version of string, taken from shamodule.c */
923 for (i=j=0; i < arglen; i++) {
924 char c;
925 c = (argbuf[i] >> 4) & 0xf;
926 c = (c>9) ? c+'a'-10 : c + '0';
927 retbuf[j++] = c;
928 c = argbuf[i] & 0xf;
929 c = (c>9) ? c+'a'-10 : c + '0';
930 retbuf[j++] = c;
932 return retval;
934 finally:
935 Py_DECREF(retval);
936 return NULL;
939 PyDoc_STRVAR(doc_hexlify,
940 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
942 This function is also available as \"hexlify()\".");
945 static int
946 to_int(int c)
948 if (isdigit(c))
949 return c - '0';
950 else {
951 if (isupper(c))
952 c = tolower(c);
953 if (c >= 'a' && c <= 'f')
954 return c - 'a' + 10;
956 return -1;
960 static PyObject *
961 binascii_unhexlify(PyObject *self, PyObject *args)
963 char* argbuf;
964 Py_ssize_t arglen;
965 PyObject *retval;
966 char* retbuf;
967 Py_ssize_t i, j;
969 if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
970 return NULL;
972 /* XXX What should we do about strings with an odd length? Should
973 * we add an implicit leading zero, or a trailing zero? For now,
974 * raise an exception.
976 if (arglen % 2) {
977 PyErr_SetString(PyExc_TypeError, "Odd-length string");
978 return NULL;
981 retval = PyString_FromStringAndSize(NULL, (arglen/2));
982 if (!retval)
983 return NULL;
984 retbuf = PyString_AsString(retval);
985 if (!retbuf)
986 goto finally;
988 for (i=j=0; i < arglen; i += 2) {
989 int top = to_int(Py_CHARMASK(argbuf[i]));
990 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
991 if (top == -1 || bot == -1) {
992 PyErr_SetString(PyExc_TypeError,
993 "Non-hexadecimal digit found");
994 goto finally;
996 retbuf[j++] = (top << 4) + bot;
998 return retval;
1000 finally:
1001 Py_DECREF(retval);
1002 return NULL;
1005 PyDoc_STRVAR(doc_unhexlify,
1006 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1008 hexstr must contain an even number of hex digits (upper or lower case).\n\
1009 This function is also available as \"unhexlify()\"");
1011 static int table_hex[128] = {
1012 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1013 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1014 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1015 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1016 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1017 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1018 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1019 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1022 #define hexval(c) table_hex[(unsigned int)(c)]
1024 #define MAXLINESIZE 76
1026 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1028 static PyObject*
1029 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1031 Py_ssize_t in, out;
1032 char ch;
1033 unsigned char *data, *odata;
1034 Py_ssize_t datalen = 0;
1035 PyObject *rv;
1036 static char *kwlist[] = {"data", "header", NULL};
1037 int header = 0;
1039 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
1040 &datalen, &header))
1041 return NULL;
1043 /* We allocate the output same size as input, this is overkill.
1044 * The previous implementation used calloc() so we'll zero out the
1045 * memory here too, since PyMem_Malloc() does not guarantee that.
1047 odata = (unsigned char *) PyMem_Malloc(datalen);
1048 if (odata == NULL) {
1049 PyErr_NoMemory();
1050 return NULL;
1052 memset(odata, 0, datalen);
1054 in = out = 0;
1055 while (in < datalen) {
1056 if (data[in] == '=') {
1057 in++;
1058 if (in >= datalen) break;
1059 /* Soft line breaks */
1060 if ((data[in] == '\n') || (data[in] == '\r')) {
1061 if (data[in] != '\n') {
1062 while (in < datalen && data[in] != '\n') in++;
1064 if (in < datalen) in++;
1066 else if (data[in] == '=') {
1067 /* broken case from broken python qp */
1068 odata[out++] = '=';
1069 in++;
1071 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1072 (data[in] >= 'a' && data[in] <= 'f') ||
1073 (data[in] >= '0' && data[in] <= '9')) &&
1074 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1075 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1076 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1077 /* hexval */
1078 ch = hexval(data[in]) << 4;
1079 in++;
1080 ch |= hexval(data[in]);
1081 in++;
1082 odata[out++] = ch;
1084 else {
1085 odata[out++] = '=';
1088 else if (header && data[in] == '_') {
1089 odata[out++] = ' ';
1090 in++;
1092 else {
1093 odata[out] = data[in];
1094 in++;
1095 out++;
1098 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1099 PyMem_Free(odata);
1100 return NULL;
1102 PyMem_Free(odata);
1103 return rv;
1106 static int
1107 to_hex (unsigned char ch, unsigned char *s)
1109 unsigned int uvalue = ch;
1111 s[1] = "0123456789ABCDEF"[uvalue % 16];
1112 uvalue = (uvalue / 16);
1113 s[0] = "0123456789ABCDEF"[uvalue % 16];
1114 return 0;
1117 PyDoc_STRVAR(doc_b2a_qp,
1118 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1119 Encode a string using quoted-printable encoding. \n\
1121 On encoding, when istext is set, newlines are not encoded, and white \n\
1122 space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1123 both encoded. When quotetabs is set, space and tabs are encoded.");
1125 /* XXX: This is ridiculously complicated to be backward compatible
1126 * (mostly) with the quopri module. It doesn't re-create the quopri
1127 * module bug where text ending in CRLF has the CR encoded */
1128 static PyObject*
1129 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1131 Py_ssize_t in, out;
1132 unsigned char *data, *odata;
1133 Py_ssize_t datalen = 0, odatalen = 0;
1134 PyObject *rv;
1135 unsigned int linelen = 0;
1136 static char *kwlist[] = {"data", "quotetabs", "istext",
1137 "header", NULL};
1138 int istext = 1;
1139 int quotetabs = 0;
1140 int header = 0;
1141 unsigned char ch;
1142 int crlf = 0;
1143 unsigned char *p;
1145 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
1146 &datalen, &quotetabs, &istext, &header))
1147 return NULL;
1149 /* See if this string is using CRLF line ends */
1150 /* XXX: this function has the side effect of converting all of
1151 * the end of lines to be the same depending on this detection
1152 * here */
1153 p = (unsigned char *) memchr(data, '\n', datalen);
1154 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1155 crlf = 1;
1157 /* First, scan to see how many characters need to be encoded */
1158 in = 0;
1159 while (in < datalen) {
1160 if ((data[in] > 126) ||
1161 (data[in] == '=') ||
1162 (header && data[in] == '_') ||
1163 ((data[in] == '.') && (linelen == 0) &&
1164 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1165 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1166 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1167 ((data[in] < 33) &&
1168 (data[in] != '\r') && (data[in] != '\n') &&
1169 (quotetabs ||
1170 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1172 if ((linelen + 3) >= MAXLINESIZE) {
1173 linelen = 0;
1174 if (crlf)
1175 odatalen += 3;
1176 else
1177 odatalen += 2;
1179 linelen += 3;
1180 odatalen += 3;
1181 in++;
1183 else {
1184 if (istext &&
1185 ((data[in] == '\n') ||
1186 ((in+1 < datalen) && (data[in] == '\r') &&
1187 (data[in+1] == '\n'))))
1189 linelen = 0;
1190 /* Protect against whitespace on end of line */
1191 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1192 odatalen += 2;
1193 if (crlf)
1194 odatalen += 2;
1195 else
1196 odatalen += 1;
1197 if (data[in] == '\r')
1198 in += 2;
1199 else
1200 in++;
1202 else {
1203 if ((in + 1 != datalen) &&
1204 (data[in+1] != '\n') &&
1205 (linelen + 1) >= MAXLINESIZE) {
1206 linelen = 0;
1207 if (crlf)
1208 odatalen += 3;
1209 else
1210 odatalen += 2;
1212 linelen++;
1213 odatalen++;
1214 in++;
1219 /* We allocate the output same size as input, this is overkill.
1220 * The previous implementation used calloc() so we'll zero out the
1221 * memory here too, since PyMem_Malloc() does not guarantee that.
1223 odata = (unsigned char *) PyMem_Malloc(odatalen);
1224 if (odata == NULL) {
1225 PyErr_NoMemory();
1226 return NULL;
1228 memset(odata, 0, odatalen);
1230 in = out = linelen = 0;
1231 while (in < datalen) {
1232 if ((data[in] > 126) ||
1233 (data[in] == '=') ||
1234 (header && data[in] == '_') ||
1235 ((data[in] == '.') && (linelen == 0) &&
1236 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1237 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1238 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1239 ((data[in] < 33) &&
1240 (data[in] != '\r') && (data[in] != '\n') &&
1241 (quotetabs ||
1242 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1244 if ((linelen + 3 )>= MAXLINESIZE) {
1245 odata[out++] = '=';
1246 if (crlf) odata[out++] = '\r';
1247 odata[out++] = '\n';
1248 linelen = 0;
1250 odata[out++] = '=';
1251 to_hex(data[in], &odata[out]);
1252 out += 2;
1253 in++;
1254 linelen += 3;
1256 else {
1257 if (istext &&
1258 ((data[in] == '\n') ||
1259 ((in+1 < datalen) && (data[in] == '\r') &&
1260 (data[in+1] == '\n'))))
1262 linelen = 0;
1263 /* Protect against whitespace on end of line */
1264 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1265 ch = odata[out-1];
1266 odata[out-1] = '=';
1267 to_hex(ch, &odata[out]);
1268 out += 2;
1271 if (crlf) odata[out++] = '\r';
1272 odata[out++] = '\n';
1273 if (data[in] == '\r')
1274 in += 2;
1275 else
1276 in++;
1278 else {
1279 if ((in + 1 != datalen) &&
1280 (data[in+1] != '\n') &&
1281 (linelen + 1) >= MAXLINESIZE) {
1282 odata[out++] = '=';
1283 if (crlf) odata[out++] = '\r';
1284 odata[out++] = '\n';
1285 linelen = 0;
1287 linelen++;
1288 if (header && data[in] == ' ') {
1289 odata[out++] = '_';
1290 in++;
1292 else {
1293 odata[out++] = data[in++];
1298 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1299 PyMem_Free(odata);
1300 return NULL;
1302 PyMem_Free(odata);
1303 return rv;
1306 /* List of functions defined in the module */
1308 static struct PyMethodDef binascii_module_methods[] = {
1309 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1310 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1311 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1312 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1313 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1314 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1315 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1316 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1317 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1318 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1319 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1320 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1321 doc_rledecode_hqx},
1322 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1323 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1324 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1325 doc_a2b_qp},
1326 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1327 doc_b2a_qp},
1328 {NULL, NULL} /* sentinel */
1332 /* Initialization function for the module (*must* be called initbinascii) */
1333 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1335 PyMODINIT_FUNC
1336 initbinascii(void)
1338 PyObject *m, *d, *x;
1340 /* Create the module and add the functions */
1341 m = Py_InitModule("binascii", binascii_module_methods);
1342 if (m == NULL)
1343 return;
1345 d = PyModule_GetDict(m);
1346 x = PyString_FromString(doc_binascii);
1347 PyDict_SetItemString(d, "__doc__", x);
1348 Py_XDECREF(x);
1350 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1351 PyDict_SetItemString(d, "Error", Error);
1352 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1353 PyDict_SetItemString(d, "Incomplete", Incomplete);