issue5063: Fixes for building RPM on CentOS plus misc .spec file enhancements.
[python.git] / Modules / binascii.c
blobcf027431fa6d2d2fe4349ec390790eee9ce5bdea
1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
44 ** Jack Jansen, CWI, July 1995.
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
53 ** Brandon Long, September 2001.
56 #define PY_SSIZE_T_CLEAN
58 #include "Python.h"
59 #ifdef USE_ZLIB_CRC32
60 #include "zlib.h"
61 #endif
63 static PyObject *Error;
64 static PyObject *Incomplete;
67 ** hqx lookup table, ascii->binary.
70 #define RUNCHAR 0x90
72 #define DONE 0x7F
73 #define SKIP 0x7E
74 #define FAIL 0x7D
76 static unsigned char table_a2b_hqx[256] = {
77 /* ^@ ^A ^B ^C ^D ^E ^F ^G */
78 /* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
79 /* \b \t \n ^K ^L \r ^N ^O */
80 /* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
81 /* ^P ^Q ^R ^S ^T ^U ^V ^W */
82 /* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
83 /* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
84 /* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
85 /* ! " # $ % & ' */
86 /* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
87 /* ( ) * + , - . / */
88 /* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
89 /* 0 1 2 3 4 5 6 7 */
90 /* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
91 /* 8 9 : ; < = > ? */
92 /* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
93 /* @ A B C D E F G */
94 /* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
95 /* H I J K L M N O */
96 /* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
97 /* P Q R S T U V W */
98 /*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
99 /* X Y Z [ \ ] ^ _ */
100 /*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
101 /* ` a b c d e f g */
102 /*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
103 /* h i j k l m n o */
104 /*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
105 /* p q r s t u v w */
106 /*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
107 /* x y z { | } ~ ^? */
108 /*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 /*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
123 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
124 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
127 static unsigned char table_b2a_hqx[] =
128 "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
130 static char table_a2b_base64[] = {
131 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
132 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
133 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
134 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
135 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
136 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
137 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
138 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
141 #define BASE64_PAD '='
143 /* Max binary chunk size; limited only by available memory */
144 #define BASE64_MAXBIN (PY_SSIZE_T_MAX/2 - sizeof(PyStringObject) - 3)
146 static unsigned char table_b2a_base64[] =
147 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
151 static unsigned short crctab_hqx[256] = {
152 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
153 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
154 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
155 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
156 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
157 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
158 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
159 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
160 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
161 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
162 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
163 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
164 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
165 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
166 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
167 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
168 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
169 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
170 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
171 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
172 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
173 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
174 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
175 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
176 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
177 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
178 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
179 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
180 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
181 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
182 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
183 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
186 PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
188 static PyObject *
189 binascii_a2b_uu(PyObject *self, PyObject *args)
191 Py_buffer pascii;
192 unsigned char *ascii_data, *bin_data;
193 int leftbits = 0;
194 unsigned char this_ch;
195 unsigned int leftchar = 0;
196 PyObject *rv;
197 Py_ssize_t ascii_len, bin_len;
199 if ( !PyArg_ParseTuple(args, "s*:a2b_uu", &pascii) )
200 return NULL;
201 ascii_data = pascii.buf;
202 ascii_len = pascii.len;
204 assert(ascii_len >= 0);
206 /* First byte: binary data length (in bytes) */
207 bin_len = (*ascii_data++ - ' ') & 077;
208 ascii_len--;
210 /* Allocate the buffer */
211 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
212 PyBuffer_Release(&pascii);
213 return NULL;
215 bin_data = (unsigned char *)PyString_AS_STRING(rv);
217 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
218 /* XXX is it really best to add NULs if there's no more data */
219 this_ch = (ascii_len > 0) ? *ascii_data : 0;
220 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
222 ** Whitespace. Assume some spaces got eaten at
223 ** end-of-line. (We check this later)
225 this_ch = 0;
226 } else {
227 /* Check the character for legality
228 ** The 64 in stead of the expected 63 is because
229 ** there are a few uuencodes out there that use
230 ** '`' as zero instead of space.
232 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
233 PyErr_SetString(Error, "Illegal char");
234 PyBuffer_Release(&pascii);
235 Py_DECREF(rv);
236 return NULL;
238 this_ch = (this_ch - ' ') & 077;
241 ** Shift it in on the low end, and see if there's
242 ** a byte ready for output.
244 leftchar = (leftchar << 6) | (this_ch);
245 leftbits += 6;
246 if ( leftbits >= 8 ) {
247 leftbits -= 8;
248 *bin_data++ = (leftchar >> leftbits) & 0xff;
249 leftchar &= ((1 << leftbits) - 1);
250 bin_len--;
254 ** Finally, check that if there's anything left on the line
255 ** that it's whitespace only.
257 while( ascii_len-- > 0 ) {
258 this_ch = *ascii_data++;
259 /* Extra '`' may be written as padding in some cases */
260 if ( this_ch != ' ' && this_ch != ' '+64 &&
261 this_ch != '\n' && this_ch != '\r' ) {
262 PyErr_SetString(Error, "Trailing garbage");
263 PyBuffer_Release(&pascii);
264 Py_DECREF(rv);
265 return NULL;
268 PyBuffer_Release(&pascii);
269 return rv;
272 PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
274 static PyObject *
275 binascii_b2a_uu(PyObject *self, PyObject *args)
277 Py_buffer pbin;
278 unsigned char *ascii_data, *bin_data;
279 int leftbits = 0;
280 unsigned char this_ch;
281 unsigned int leftchar = 0;
282 PyObject *rv;
283 Py_ssize_t bin_len;
285 if ( !PyArg_ParseTuple(args, "s*:b2a_uu", &pbin) )
286 return NULL;
287 bin_data = pbin.buf;
288 bin_len = pbin.len;
289 if ( bin_len > 45 ) {
290 /* The 45 is a limit that appears in all uuencode's */
291 PyErr_SetString(Error, "At most 45 bytes at once");
292 PyBuffer_Release(&pbin);
293 return NULL;
296 /* We're lazy and allocate to much (fixed up later) */
297 if ( (rv=PyString_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL ) {
298 PyBuffer_Release(&pbin);
299 return NULL;
301 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
303 /* Store the length */
304 *ascii_data++ = ' ' + (bin_len & 077);
306 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
307 /* Shift the data (or padding) into our buffer */
308 if ( bin_len > 0 ) /* Data */
309 leftchar = (leftchar << 8) | *bin_data;
310 else /* Padding */
311 leftchar <<= 8;
312 leftbits += 8;
314 /* See if there are 6-bit groups ready */
315 while ( leftbits >= 6 ) {
316 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
317 leftbits -= 6;
318 *ascii_data++ = this_ch + ' ';
321 *ascii_data++ = '\n'; /* Append a courtesy newline */
323 if (_PyString_Resize(&rv,
324 (ascii_data -
325 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
326 Py_DECREF(rv);
327 rv = NULL;
329 PyBuffer_Release(&pbin);
330 return rv;
334 static int
335 binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
337 /* Finds & returns the (num+1)th
338 ** valid character for base64, or -1 if none.
341 int ret = -1;
342 unsigned char c, b64val;
344 while ((slen > 0) && (ret == -1)) {
345 c = *s;
346 b64val = table_a2b_base64[c & 0x7f];
347 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
348 if (num == 0)
349 ret = *s;
350 num--;
353 s++;
354 slen--;
356 return ret;
359 PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
361 static PyObject *
362 binascii_a2b_base64(PyObject *self, PyObject *args)
364 Py_buffer pascii;
365 unsigned char *ascii_data, *bin_data;
366 int leftbits = 0;
367 unsigned char this_ch;
368 unsigned int leftchar = 0;
369 PyObject *rv;
370 Py_ssize_t ascii_len, bin_len;
371 int quad_pos = 0;
373 if ( !PyArg_ParseTuple(args, "s*:a2b_base64", &pascii) )
374 return NULL;
375 ascii_data = pascii.buf;
376 ascii_len = pascii.len;
378 assert(ascii_len >= 0);
380 if (ascii_len > PY_SSIZE_T_MAX - 3) {
381 PyBuffer_Release(&pascii);
382 return PyErr_NoMemory();
385 bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
387 /* Allocate the buffer */
388 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL ) {
389 PyBuffer_Release(&pascii);
390 return NULL;
392 bin_data = (unsigned char *)PyString_AS_STRING(rv);
393 bin_len = 0;
395 for( ; ascii_len > 0; ascii_len--, ascii_data++) {
396 this_ch = *ascii_data;
398 if (this_ch > 0x7f ||
399 this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
400 continue;
402 /* Check for pad sequences and ignore
403 ** the invalid ones.
405 if (this_ch == BASE64_PAD) {
406 if ( (quad_pos < 2) ||
407 ((quad_pos == 2) &&
408 (binascii_find_valid(ascii_data, ascii_len, 1)
409 != BASE64_PAD)) )
411 continue;
413 else {
414 /* A pad sequence means no more input.
415 ** We've already interpreted the data
416 ** from the quad at this point.
418 leftbits = 0;
419 break;
423 this_ch = table_a2b_base64[*ascii_data];
424 if ( this_ch == (unsigned char) -1 )
425 continue;
428 ** Shift it in on the low end, and see if there's
429 ** a byte ready for output.
431 quad_pos = (quad_pos + 1) & 0x03;
432 leftchar = (leftchar << 6) | (this_ch);
433 leftbits += 6;
435 if ( leftbits >= 8 ) {
436 leftbits -= 8;
437 *bin_data++ = (leftchar >> leftbits) & 0xff;
438 bin_len++;
439 leftchar &= ((1 << leftbits) - 1);
443 if (leftbits != 0) {
444 PyBuffer_Release(&pascii);
445 PyErr_SetString(Error, "Incorrect padding");
446 Py_DECREF(rv);
447 return NULL;
450 /* And set string size correctly. If the result string is empty
451 ** (because the input was all invalid) return the shared empty
452 ** string instead; _PyString_Resize() won't do this for us.
454 if (bin_len > 0) {
455 if (_PyString_Resize(&rv, bin_len) < 0) {
456 Py_DECREF(rv);
457 rv = NULL;
460 else {
461 Py_DECREF(rv);
462 rv = PyString_FromStringAndSize("", 0);
464 PyBuffer_Release(&pascii);
465 return rv;
468 PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
470 static PyObject *
471 binascii_b2a_base64(PyObject *self, PyObject *args)
473 Py_buffer pbuf;
474 unsigned char *ascii_data, *bin_data;
475 int leftbits = 0;
476 unsigned char this_ch;
477 unsigned int leftchar = 0;
478 PyObject *rv;
479 Py_ssize_t bin_len;
481 if ( !PyArg_ParseTuple(args, "s*:b2a_base64", &pbuf) )
482 return NULL;
483 bin_data = pbuf.buf;
484 bin_len = pbuf.len;
486 assert(bin_len >= 0);
488 if ( bin_len > BASE64_MAXBIN ) {
489 PyErr_SetString(Error, "Too much data for base64 line");
490 PyBuffer_Release(&pbuf);
491 return NULL;
494 /* We're lazy and allocate too much (fixed up later).
495 "+3" leaves room for up to two pad characters and a trailing
496 newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
497 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL ) {
498 PyBuffer_Release(&pbuf);
499 return NULL;
501 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
503 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
504 /* Shift the data into our buffer */
505 leftchar = (leftchar << 8) | *bin_data;
506 leftbits += 8;
508 /* See if there are 6-bit groups ready */
509 while ( leftbits >= 6 ) {
510 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
511 leftbits -= 6;
512 *ascii_data++ = table_b2a_base64[this_ch];
515 if ( leftbits == 2 ) {
516 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
517 *ascii_data++ = BASE64_PAD;
518 *ascii_data++ = BASE64_PAD;
519 } else if ( leftbits == 4 ) {
520 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
521 *ascii_data++ = BASE64_PAD;
523 *ascii_data++ = '\n'; /* Append a courtesy newline */
525 if (_PyString_Resize(&rv,
526 (ascii_data -
527 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
528 Py_DECREF(rv);
529 rv = NULL;
531 PyBuffer_Release(&pbuf);
532 return rv;
535 PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
537 static PyObject *
538 binascii_a2b_hqx(PyObject *self, PyObject *args)
540 unsigned char *ascii_data, *bin_data;
541 int leftbits = 0;
542 unsigned char this_ch;
543 unsigned int leftchar = 0;
544 PyObject *rv;
545 Py_ssize_t len;
546 int done = 0;
548 if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
549 return NULL;
551 assert(len >= 0);
553 if (len > PY_SSIZE_T_MAX - 2)
554 return PyErr_NoMemory();
556 /* Allocate a string that is too big (fixed later)
557 Add two to the initial length to prevent interning which
558 would preclude subsequent resizing. */
559 if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
560 return NULL;
561 bin_data = (unsigned char *)PyString_AS_STRING(rv);
563 for( ; len > 0 ; len--, ascii_data++ ) {
564 /* Get the byte and look it up */
565 this_ch = table_a2b_hqx[*ascii_data];
566 if ( this_ch == SKIP )
567 continue;
568 if ( this_ch == FAIL ) {
569 PyErr_SetString(Error, "Illegal char");
570 Py_DECREF(rv);
571 return NULL;
573 if ( this_ch == DONE ) {
574 /* The terminating colon */
575 done = 1;
576 break;
579 /* Shift it into the buffer and see if any bytes are ready */
580 leftchar = (leftchar << 6) | (this_ch);
581 leftbits += 6;
582 if ( leftbits >= 8 ) {
583 leftbits -= 8;
584 *bin_data++ = (leftchar >> leftbits) & 0xff;
585 leftchar &= ((1 << leftbits) - 1);
589 if ( leftbits && !done ) {
590 PyErr_SetString(Incomplete,
591 "String has incomplete number of bytes");
592 Py_DECREF(rv);
593 return NULL;
595 if (_PyString_Resize(&rv,
596 (bin_data -
597 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
598 Py_DECREF(rv);
599 rv = NULL;
601 if (rv) {
602 PyObject *rrv = Py_BuildValue("Oi", rv, done);
603 Py_DECREF(rv);
604 return rrv;
607 return NULL;
610 PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
612 static PyObject *
613 binascii_rlecode_hqx(PyObject *self, PyObject *args)
615 Py_buffer pbuf;
616 unsigned char *in_data, *out_data;
617 PyObject *rv;
618 unsigned char ch;
619 Py_ssize_t in, inend, len;
621 if ( !PyArg_ParseTuple(args, "s*:rlecode_hqx", &pbuf) )
622 return NULL;
623 in_data = pbuf.buf;
624 len = pbuf.len;
626 assert(len >= 0);
628 if (len > PY_SSIZE_T_MAX / 2 - 2) {
629 PyBuffer_Release(&pbuf);
630 return PyErr_NoMemory();
633 /* Worst case: output is twice as big as input (fixed later) */
634 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
635 PyBuffer_Release(&pbuf);
636 return NULL;
638 out_data = (unsigned char *)PyString_AS_STRING(rv);
640 for( in=0; in<len; in++) {
641 ch = in_data[in];
642 if ( ch == RUNCHAR ) {
643 /* RUNCHAR. Escape it. */
644 *out_data++ = RUNCHAR;
645 *out_data++ = 0;
646 } else {
647 /* Check how many following are the same */
648 for(inend=in+1;
649 inend<len && in_data[inend] == ch &&
650 inend < in+255;
651 inend++) ;
652 if ( inend - in > 3 ) {
653 /* More than 3 in a row. Output RLE. */
654 *out_data++ = ch;
655 *out_data++ = RUNCHAR;
656 *out_data++ = inend-in;
657 in = inend-1;
658 } else {
659 /* Less than 3. Output the byte itself */
660 *out_data++ = ch;
664 if (_PyString_Resize(&rv,
665 (out_data -
666 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
667 Py_DECREF(rv);
668 rv = NULL;
670 PyBuffer_Release(&pbuf);
671 return rv;
674 PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
676 static PyObject *
677 binascii_b2a_hqx(PyObject *self, PyObject *args)
679 Py_buffer pbin;
680 unsigned char *ascii_data, *bin_data;
681 int leftbits = 0;
682 unsigned char this_ch;
683 unsigned int leftchar = 0;
684 PyObject *rv;
685 Py_ssize_t len;
687 if ( !PyArg_ParseTuple(args, "s*:b2a_hqx", &pbin) )
688 return NULL;
689 bin_data = pbin.buf;
690 len = pbin.len;
692 assert(len >= 0);
694 if (len > PY_SSIZE_T_MAX / 2 - 2) {
695 PyBuffer_Release(&pbin);
696 return PyErr_NoMemory();
699 /* Allocate a buffer that is at least large enough */
700 if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL ) {
701 PyBuffer_Release(&pbin);
702 return NULL;
704 ascii_data = (unsigned char *)PyString_AS_STRING(rv);
706 for( ; len > 0 ; len--, bin_data++ ) {
707 /* Shift into our buffer, and output any 6bits ready */
708 leftchar = (leftchar << 8) | *bin_data;
709 leftbits += 8;
710 while ( leftbits >= 6 ) {
711 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
712 leftbits -= 6;
713 *ascii_data++ = table_b2a_hqx[this_ch];
716 /* Output a possible runt byte */
717 if ( leftbits ) {
718 leftchar <<= (6-leftbits);
719 *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
721 if (_PyString_Resize(&rv,
722 (ascii_data -
723 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
724 Py_DECREF(rv);
725 rv = NULL;
727 PyBuffer_Release(&pbin);
728 return rv;
731 PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
733 static PyObject *
734 binascii_rledecode_hqx(PyObject *self, PyObject *args)
736 Py_buffer pin;
737 unsigned char *in_data, *out_data;
738 unsigned char in_byte, in_repeat;
739 PyObject *rv;
740 Py_ssize_t in_len, out_len, out_len_left;
742 if ( !PyArg_ParseTuple(args, "s*:rledecode_hqx", &pin) )
743 return NULL;
744 in_data = pin.buf;
745 in_len = pin.len;
747 assert(in_len >= 0);
749 /* Empty string is a special case */
750 if ( in_len == 0 ) {
751 PyBuffer_Release(&pin);
752 return PyString_FromStringAndSize("", 0);
754 else if (in_len > PY_SSIZE_T_MAX / 2) {
755 PyBuffer_Release(&pin);
756 return PyErr_NoMemory();
759 /* Allocate a buffer of reasonable size. Resized when needed */
760 out_len = in_len*2;
761 if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL ) {
762 PyBuffer_Release(&pin);
763 return NULL;
765 out_len_left = out_len;
766 out_data = (unsigned char *)PyString_AS_STRING(rv);
769 ** We need two macros here to get/put bytes and handle
770 ** end-of-buffer for input and output strings.
772 #define INBYTE(b) \
773 do { \
774 if ( --in_len < 0 ) { \
775 PyErr_SetString(Incomplete, ""); \
776 Py_DECREF(rv); \
777 PyBuffer_Release(&pin); \
778 return NULL; \
780 b = *in_data++; \
781 } while(0)
783 #define OUTBYTE(b) \
784 do { \
785 if ( --out_len_left < 0 ) { \
786 if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
787 if (_PyString_Resize(&rv, 2*out_len) < 0) \
788 { Py_DECREF(rv); PyBuffer_Release(&pin); return NULL; } \
789 out_data = (unsigned char *)PyString_AS_STRING(rv) \
790 + out_len; \
791 out_len_left = out_len-1; \
792 out_len = out_len * 2; \
794 *out_data++ = b; \
795 } while(0)
798 ** Handle first byte separately (since we have to get angry
799 ** in case of an orphaned RLE code).
801 INBYTE(in_byte);
803 if (in_byte == RUNCHAR) {
804 INBYTE(in_repeat);
805 if (in_repeat != 0) {
806 /* Note Error, not Incomplete (which is at the end
807 ** of the string only). This is a programmer error.
809 PyErr_SetString(Error, "Orphaned RLE code at start");
810 PyBuffer_Release(&pin);
811 Py_DECREF(rv);
812 return NULL;
814 OUTBYTE(RUNCHAR);
815 } else {
816 OUTBYTE(in_byte);
819 while( in_len > 0 ) {
820 INBYTE(in_byte);
822 if (in_byte == RUNCHAR) {
823 INBYTE(in_repeat);
824 if ( in_repeat == 0 ) {
825 /* Just an escaped RUNCHAR value */
826 OUTBYTE(RUNCHAR);
827 } else {
828 /* Pick up value and output a sequence of it */
829 in_byte = out_data[-1];
830 while ( --in_repeat > 0 )
831 OUTBYTE(in_byte);
833 } else {
834 /* Normal byte */
835 OUTBYTE(in_byte);
838 if (_PyString_Resize(&rv,
839 (out_data -
840 (unsigned char *)PyString_AS_STRING(rv))) < 0) {
841 Py_DECREF(rv);
842 rv = NULL;
844 PyBuffer_Release(&pin);
845 return rv;
848 PyDoc_STRVAR(doc_crc_hqx,
849 "(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
851 static PyObject *
852 binascii_crc_hqx(PyObject *self, PyObject *args)
854 Py_buffer pin;
855 unsigned char *bin_data;
856 unsigned int crc;
857 Py_ssize_t len;
859 if ( !PyArg_ParseTuple(args, "s*i:crc_hqx", &pin, &crc) )
860 return NULL;
861 bin_data = pin.buf;
862 len = pin.len;
864 while(len-- > 0) {
865 crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
868 PyBuffer_Release(&pin);
869 return Py_BuildValue("i", crc);
872 PyDoc_STRVAR(doc_crc32,
873 "(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
875 #ifdef USE_ZLIB_CRC32
876 /* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */
877 static PyObject *
878 binascii_crc32(PyObject *self, PyObject *args)
880 unsigned int crc32val = 0; /* crc32(0L, Z_NULL, 0) */
881 Py_buffer pbuf;
882 Byte *buf;
883 Py_ssize_t len;
884 int signed_val;
886 if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val))
887 return NULL;
888 /* In Python 2.x we return a signed integer regardless of native platform
889 * long size (the 32bit unsigned long is treated as 32-bit signed and sign
890 * extended into a 64-bit long inside the integer object). 3.0 does the
891 * right thing and returns unsigned. http://bugs.python.org/issue1202 */
892 buf = (Byte*)pbuf.buf;
893 len = pbuf.len;
894 signed_val = crc32(crc32val, buf, len);
895 PyBuffer_Release(&pbuf);
896 return PyInt_FromLong(signed_val);
898 #else /* USE_ZLIB_CRC32 */
899 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
900 Also known as: ISO 3307
901 **********************************************************************|
902 * *|
903 * Demonstration program to compute the 32-bit CRC used as the frame *|
904 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
905 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
906 * protocol). The 32-bit FCS was added via the Federal Register, *|
907 * 1 June 1982, p.23798. I presume but don't know for certain that *|
908 * this polynomial is or will be included in CCITT V.41, which *|
909 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
910 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
911 * errors by a factor of 10^-5 over 16-bit FCS. *|
912 * *|
913 **********************************************************************|
915 Copyright (C) 1986 Gary S. Brown. You may use this program, or
916 code or tables extracted from it, as desired without restriction.
918 First, the polynomial itself and its table of feedback terms. The
919 polynomial is
920 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
921 Note that we take it "backwards" and put the highest-order term in
922 the lowest-order bit. The X^32 term is "implied"; the LSB is the
923 X^31 term, etc. The X^0 term (usually shown as "+1") results in
924 the MSB being 1.
926 Note that the usual hardware shift register implementation, which
927 is what we're using (we're merely optimizing it by doing eight-bit
928 chunks at a time) shifts bits into the lowest-order term. In our
929 implementation, that means shifting towards the right. Why do we
930 do it this way? Because the calculated CRC must be transmitted in
931 order from highest-order term to lowest-order term. UARTs transmit
932 characters in order from LSB to MSB. By storing the CRC this way,
933 we hand it to the UART in the order low-byte to high-byte; the UART
934 sends each low-bit to hight-bit; and the result is transmission bit
935 by bit from highest- to lowest-order term without requiring any bit
936 shuffling on our part. Reception works similarly.
938 The feedback terms table consists of 256, 32-bit entries. Notes:
940 1. The table can be generated at runtime if desired; code to do so
941 is shown later. It might not be obvious, but the feedback
942 terms simply represent the results of eight shift/xor opera-
943 tions for all combinations of data and CRC register values.
945 2. The CRC accumulation logic is the same for all CRC polynomials,
946 be they sixteen or thirty-two bits wide. You simply choose the
947 appropriate table. Alternatively, because the table can be
948 generated at runtime, you can start by generating the table for
949 the polynomial in question and use exactly the same "updcrc",
950 if your application needn't simultaneously handle two CRC
951 polynomials. (Note, however, that XMODEM is strange.)
953 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
954 of course, 32-bit entries work OK if the high 16 bits are zero.
956 4. The values must be right-shifted by eight bits by the "updcrc"
957 logic; the shift must be unsigned (bring in zeroes). On some
958 hardware you could probably optimize the shift in assembler by
959 using byte-swap instructions.
960 ********************************************************************/
962 static unsigned int crc_32_tab[256] = {
963 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
964 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
965 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
966 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
967 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
968 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
969 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
970 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
971 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
972 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
973 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
974 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
975 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
976 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
977 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
978 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
979 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
980 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
981 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
982 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
983 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
984 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
985 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
986 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
987 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
988 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
989 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
990 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
991 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
992 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
993 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
994 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
995 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
996 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
997 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
998 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
999 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
1000 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
1001 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
1002 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
1003 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
1004 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
1005 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
1006 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
1007 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
1008 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
1009 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
1010 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
1011 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
1012 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
1013 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
1014 0x2d02ef8dU
1017 static PyObject *
1018 binascii_crc32(PyObject *self, PyObject *args)
1019 { /* By Jim Ahlstrom; All rights transferred to CNRI */
1020 Py_buffer pbin;
1021 unsigned char *bin_data;
1022 unsigned int crc = 0U; /* initial value of CRC */
1023 Py_ssize_t len;
1024 int result;
1026 if ( !PyArg_ParseTuple(args, "s*|I:crc32", &pbin, &crc) )
1027 return NULL;
1028 bin_data = pbin.buf;
1029 len = pbin.len;
1031 crc = ~ crc;
1032 while (len-- > 0)
1033 crc = crc_32_tab[(crc ^ *bin_data++) & 0xffU] ^ (crc >> 8);
1034 /* Note: (crc >> 8) MUST zero fill on left */
1036 result = (int)(crc ^ 0xFFFFFFFFU);
1037 PyBuffer_Release(&pbin);
1038 return PyInt_FromLong(result);
1040 #endif /* USE_ZLIB_CRC32 */
1043 static PyObject *
1044 binascii_hexlify(PyObject *self, PyObject *args)
1046 Py_buffer parg;
1047 char* argbuf;
1048 Py_ssize_t arglen;
1049 PyObject *retval;
1050 char* retbuf;
1051 Py_ssize_t i, j;
1053 if (!PyArg_ParseTuple(args, "s*:b2a_hex", &parg))
1054 return NULL;
1055 argbuf = parg.buf;
1056 arglen = parg.len;
1058 assert(arglen >= 0);
1059 if (arglen > PY_SSIZE_T_MAX / 2) {
1060 PyBuffer_Release(&parg);
1061 return PyErr_NoMemory();
1064 retval = PyString_FromStringAndSize(NULL, arglen*2);
1065 if (!retval) {
1066 PyBuffer_Release(&parg);
1067 return NULL;
1069 retbuf = PyString_AS_STRING(retval);
1071 /* make hex version of string, taken from shamodule.c */
1072 for (i=j=0; i < arglen; i++) {
1073 char c;
1074 c = (argbuf[i] >> 4) & 0xf;
1075 c = (c>9) ? c+'a'-10 : c + '0';
1076 retbuf[j++] = c;
1077 c = argbuf[i] & 0xf;
1078 c = (c>9) ? c+'a'-10 : c + '0';
1079 retbuf[j++] = c;
1081 PyBuffer_Release(&parg);
1082 return retval;
1085 PyDoc_STRVAR(doc_hexlify,
1086 "b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
1088 This function is also available as \"hexlify()\".");
1091 static int
1092 to_int(int c)
1094 if (isdigit(c))
1095 return c - '0';
1096 else {
1097 if (isupper(c))
1098 c = tolower(c);
1099 if (c >= 'a' && c <= 'f')
1100 return c - 'a' + 10;
1102 return -1;
1106 static PyObject *
1107 binascii_unhexlify(PyObject *self, PyObject *args)
1109 Py_buffer parg;
1110 char* argbuf;
1111 Py_ssize_t arglen;
1112 PyObject *retval;
1113 char* retbuf;
1114 Py_ssize_t i, j;
1116 if (!PyArg_ParseTuple(args, "s*:a2b_hex", &parg))
1117 return NULL;
1118 argbuf = parg.buf;
1119 arglen = parg.len;
1121 assert(arglen >= 0);
1123 /* XXX What should we do about strings with an odd length? Should
1124 * we add an implicit leading zero, or a trailing zero? For now,
1125 * raise an exception.
1127 if (arglen % 2) {
1128 PyBuffer_Release(&parg);
1129 PyErr_SetString(PyExc_TypeError, "Odd-length string");
1130 return NULL;
1133 retval = PyString_FromStringAndSize(NULL, (arglen/2));
1134 if (!retval) {
1135 PyBuffer_Release(&parg);
1136 return NULL;
1138 retbuf = PyString_AS_STRING(retval);
1140 for (i=j=0; i < arglen; i += 2) {
1141 int top = to_int(Py_CHARMASK(argbuf[i]));
1142 int bot = to_int(Py_CHARMASK(argbuf[i+1]));
1143 if (top == -1 || bot == -1) {
1144 PyErr_SetString(PyExc_TypeError,
1145 "Non-hexadecimal digit found");
1146 goto finally;
1148 retbuf[j++] = (top << 4) + bot;
1150 PyBuffer_Release(&parg);
1151 return retval;
1153 finally:
1154 PyBuffer_Release(&parg);
1155 Py_DECREF(retval);
1156 return NULL;
1159 PyDoc_STRVAR(doc_unhexlify,
1160 "a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1162 hexstr must contain an even number of hex digits (upper or lower case).\n\
1163 This function is also available as \"unhexlify()\"");
1165 static int table_hex[128] = {
1166 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1167 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1168 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1169 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1170 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1171 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1172 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1173 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1176 #define hexval(c) table_hex[(unsigned int)(c)]
1178 #define MAXLINESIZE 76
1180 PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1182 static PyObject*
1183 binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
1185 Py_ssize_t in, out;
1186 char ch;
1187 Py_buffer pdata;
1188 unsigned char *data, *odata;
1189 Py_ssize_t datalen = 0;
1190 PyObject *rv;
1191 static char *kwlist[] = {"data", "header", NULL};
1192 int header = 0;
1194 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i", kwlist, &pdata,
1195 &header))
1196 return NULL;
1197 data = pdata.buf;
1198 datalen = pdata.len;
1200 /* We allocate the output same size as input, this is overkill.
1201 * The previous implementation used calloc() so we'll zero out the
1202 * memory here too, since PyMem_Malloc() does not guarantee that.
1204 odata = (unsigned char *) PyMem_Malloc(datalen);
1205 if (odata == NULL) {
1206 PyBuffer_Release(&pdata);
1207 PyErr_NoMemory();
1208 return NULL;
1210 memset(odata, 0, datalen);
1212 in = out = 0;
1213 while (in < datalen) {
1214 if (data[in] == '=') {
1215 in++;
1216 if (in >= datalen) break;
1217 /* Soft line breaks */
1218 if ((data[in] == '\n') || (data[in] == '\r')) {
1219 if (data[in] != '\n') {
1220 while (in < datalen && data[in] != '\n') in++;
1222 if (in < datalen) in++;
1224 else if (data[in] == '=') {
1225 /* broken case from broken python qp */
1226 odata[out++] = '=';
1227 in++;
1229 else if (((data[in] >= 'A' && data[in] <= 'F') ||
1230 (data[in] >= 'a' && data[in] <= 'f') ||
1231 (data[in] >= '0' && data[in] <= '9')) &&
1232 ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
1233 (data[in+1] >= 'a' && data[in+1] <= 'f') ||
1234 (data[in+1] >= '0' && data[in+1] <= '9'))) {
1235 /* hexval */
1236 ch = hexval(data[in]) << 4;
1237 in++;
1238 ch |= hexval(data[in]);
1239 in++;
1240 odata[out++] = ch;
1242 else {
1243 odata[out++] = '=';
1246 else if (header && data[in] == '_') {
1247 odata[out++] = ' ';
1248 in++;
1250 else {
1251 odata[out] = data[in];
1252 in++;
1253 out++;
1256 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1257 PyBuffer_Release(&pdata);
1258 PyMem_Free(odata);
1259 return NULL;
1261 PyBuffer_Release(&pdata);
1262 PyMem_Free(odata);
1263 return rv;
1266 static int
1267 to_hex (unsigned char ch, unsigned char *s)
1269 unsigned int uvalue = ch;
1271 s[1] = "0123456789ABCDEF"[uvalue % 16];
1272 uvalue = (uvalue / 16);
1273 s[0] = "0123456789ABCDEF"[uvalue % 16];
1274 return 0;
1277 PyDoc_STRVAR(doc_b2a_qp,
1278 "b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1279 Encode a string using quoted-printable encoding. \n\
1281 On encoding, when istext is set, newlines are not encoded, and white \n\
1282 space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1283 both encoded. When quotetabs is set, space and tabs are encoded.");
1285 /* XXX: This is ridiculously complicated to be backward compatible
1286 * (mostly) with the quopri module. It doesn't re-create the quopri
1287 * module bug where text ending in CRLF has the CR encoded */
1288 static PyObject*
1289 binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
1291 Py_ssize_t in, out;
1292 Py_buffer pdata;
1293 unsigned char *data, *odata;
1294 Py_ssize_t datalen = 0, odatalen = 0;
1295 PyObject *rv;
1296 unsigned int linelen = 0;
1297 static char *kwlist[] = {"data", "quotetabs", "istext",
1298 "header", NULL};
1299 int istext = 1;
1300 int quotetabs = 0;
1301 int header = 0;
1302 unsigned char ch;
1303 int crlf = 0;
1304 unsigned char *p;
1306 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|iii", kwlist, &pdata,
1307 &quotetabs, &istext, &header))
1308 return NULL;
1309 data = pdata.buf;
1310 datalen = pdata.len;
1312 /* See if this string is using CRLF line ends */
1313 /* XXX: this function has the side effect of converting all of
1314 * the end of lines to be the same depending on this detection
1315 * here */
1316 p = (unsigned char *) memchr(data, '\n', datalen);
1317 if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1318 crlf = 1;
1320 /* First, scan to see how many characters need to be encoded */
1321 in = 0;
1322 while (in < datalen) {
1323 if ((data[in] > 126) ||
1324 (data[in] == '=') ||
1325 (header && data[in] == '_') ||
1326 ((data[in] == '.') && (linelen == 0) &&
1327 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1328 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1329 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1330 ((data[in] < 33) &&
1331 (data[in] != '\r') && (data[in] != '\n') &&
1332 (quotetabs ||
1333 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1335 if ((linelen + 3) >= MAXLINESIZE) {
1336 linelen = 0;
1337 if (crlf)
1338 odatalen += 3;
1339 else
1340 odatalen += 2;
1342 linelen += 3;
1343 odatalen += 3;
1344 in++;
1346 else {
1347 if (istext &&
1348 ((data[in] == '\n') ||
1349 ((in+1 < datalen) && (data[in] == '\r') &&
1350 (data[in+1] == '\n'))))
1352 linelen = 0;
1353 /* Protect against whitespace on end of line */
1354 if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
1355 odatalen += 2;
1356 if (crlf)
1357 odatalen += 2;
1358 else
1359 odatalen += 1;
1360 if (data[in] == '\r')
1361 in += 2;
1362 else
1363 in++;
1365 else {
1366 if ((in + 1 != datalen) &&
1367 (data[in+1] != '\n') &&
1368 (linelen + 1) >= MAXLINESIZE) {
1369 linelen = 0;
1370 if (crlf)
1371 odatalen += 3;
1372 else
1373 odatalen += 2;
1375 linelen++;
1376 odatalen++;
1377 in++;
1382 /* We allocate the output same size as input, this is overkill.
1383 * The previous implementation used calloc() so we'll zero out the
1384 * memory here too, since PyMem_Malloc() does not guarantee that.
1386 odata = (unsigned char *) PyMem_Malloc(odatalen);
1387 if (odata == NULL) {
1388 PyBuffer_Release(&pdata);
1389 PyErr_NoMemory();
1390 return NULL;
1392 memset(odata, 0, odatalen);
1394 in = out = linelen = 0;
1395 while (in < datalen) {
1396 if ((data[in] > 126) ||
1397 (data[in] == '=') ||
1398 (header && data[in] == '_') ||
1399 ((data[in] == '.') && (linelen == 0) &&
1400 (data[in+1] == '\n' || data[in+1] == '\r' || data[in+1] == 0)) ||
1401 (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
1402 ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
1403 ((data[in] < 33) &&
1404 (data[in] != '\r') && (data[in] != '\n') &&
1405 (quotetabs ||
1406 (!quotetabs && ((data[in] != '\t') && (data[in] != ' '))))))
1408 if ((linelen + 3 )>= MAXLINESIZE) {
1409 odata[out++] = '=';
1410 if (crlf) odata[out++] = '\r';
1411 odata[out++] = '\n';
1412 linelen = 0;
1414 odata[out++] = '=';
1415 to_hex(data[in], &odata[out]);
1416 out += 2;
1417 in++;
1418 linelen += 3;
1420 else {
1421 if (istext &&
1422 ((data[in] == '\n') ||
1423 ((in+1 < datalen) && (data[in] == '\r') &&
1424 (data[in+1] == '\n'))))
1426 linelen = 0;
1427 /* Protect against whitespace on end of line */
1428 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1429 ch = odata[out-1];
1430 odata[out-1] = '=';
1431 to_hex(ch, &odata[out]);
1432 out += 2;
1435 if (crlf) odata[out++] = '\r';
1436 odata[out++] = '\n';
1437 if (data[in] == '\r')
1438 in += 2;
1439 else
1440 in++;
1442 else {
1443 if ((in + 1 != datalen) &&
1444 (data[in+1] != '\n') &&
1445 (linelen + 1) >= MAXLINESIZE) {
1446 odata[out++] = '=';
1447 if (crlf) odata[out++] = '\r';
1448 odata[out++] = '\n';
1449 linelen = 0;
1451 linelen++;
1452 if (header && data[in] == ' ') {
1453 odata[out++] = '_';
1454 in++;
1456 else {
1457 odata[out++] = data[in++];
1462 if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1463 PyBuffer_Release(&pdata);
1464 PyMem_Free(odata);
1465 return NULL;
1467 PyBuffer_Release(&pdata);
1468 PyMem_Free(odata);
1469 return rv;
1472 /* List of functions defined in the module */
1474 static struct PyMethodDef binascii_module_methods[] = {
1475 {"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1476 {"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1477 {"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1478 {"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1479 {"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1480 {"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1481 {"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1482 {"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1483 {"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1484 {"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1485 {"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1486 {"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1487 doc_rledecode_hqx},
1488 {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1489 {"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1490 {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
1491 doc_a2b_qp},
1492 {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
1493 doc_b2a_qp},
1494 {NULL, NULL} /* sentinel */
1498 /* Initialization function for the module (*must* be called initbinascii) */
1499 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1501 PyMODINIT_FUNC
1502 initbinascii(void)
1504 PyObject *m, *d, *x;
1506 /* Create the module and add the functions */
1507 m = Py_InitModule("binascii", binascii_module_methods);
1508 if (m == NULL)
1509 return;
1511 d = PyModule_GetDict(m);
1512 x = PyString_FromString(doc_binascii);
1513 PyDict_SetItemString(d, "__doc__", x);
1514 Py_XDECREF(x);
1516 Error = PyErr_NewException("binascii.Error", NULL, NULL);
1517 PyDict_SetItemString(d, "Error", Error);
1518 Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1519 PyDict_SetItemString(d, "Incomplete", Incomplete);