Imported Upstream version 2008.1+svn1553
[opeanno-debian-packaging.git] / game / ext / simplejson / _speedups.c
blobb05c7ddd262a3aeefc7385b035daf42e4958b1f4
1 #include "Python.h"
2 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
3 typedef int Py_ssize_t;
4 #define PY_SSIZE_T_MAX INT_MAX
5 #define PY_SSIZE_T_MIN INT_MIN
6 #endif
8 #ifdef __GNUC__
9 #define UNUSED __attribute__((__unused__))
10 #else
11 #define UNUSED
12 #endif
14 #define DEFAULT_ENCODING "utf-8"
16 static Py_ssize_t
17 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
18 static PyObject *
19 ascii_escape_unicode(PyObject *pystr);
20 static PyObject *
21 ascii_escape_str(PyObject *pystr);
22 static PyObject *
23 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
24 void init_speedups(void);
26 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
28 #define MIN_EXPANSION 6
29 #ifdef Py_UNICODE_WIDE
30 #define MAX_EXPANSION (2 * MIN_EXPANSION)
31 #else
32 #define MAX_EXPANSION MIN_EXPANSION
33 #endif
35 static Py_ssize_t
36 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
38 Py_UNICODE x;
39 output[chars++] = '\\';
40 switch (c) {
41 case '\\': output[chars++] = (char)c; break;
42 case '"': output[chars++] = (char)c; break;
43 case '\b': output[chars++] = 'b'; break;
44 case '\f': output[chars++] = 'f'; break;
45 case '\n': output[chars++] = 'n'; break;
46 case '\r': output[chars++] = 'r'; break;
47 case '\t': output[chars++] = 't'; break;
48 default:
49 #ifdef Py_UNICODE_WIDE
50 if (c >= 0x10000) {
51 /* UTF-16 surrogate pair */
52 Py_UNICODE v = c - 0x10000;
53 c = 0xd800 | ((v >> 10) & 0x3ff);
54 output[chars++] = 'u';
55 x = (c & 0xf000) >> 12;
56 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
57 x = (c & 0x0f00) >> 8;
58 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
59 x = (c & 0x00f0) >> 4;
60 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
61 x = (c & 0x000f);
62 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
63 c = 0xdc00 | (v & 0x3ff);
64 output[chars++] = '\\';
66 #endif
67 output[chars++] = 'u';
68 x = (c & 0xf000) >> 12;
69 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
70 x = (c & 0x0f00) >> 8;
71 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
72 x = (c & 0x00f0) >> 4;
73 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
74 x = (c & 0x000f);
75 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
77 return chars;
80 static PyObject *
81 ascii_escape_unicode(PyObject *pystr)
83 Py_ssize_t i;
84 Py_ssize_t input_chars;
85 Py_ssize_t output_size;
86 Py_ssize_t chars;
87 PyObject *rval;
88 char *output;
89 Py_UNICODE *input_unicode;
91 input_chars = PyUnicode_GET_SIZE(pystr);
92 input_unicode = PyUnicode_AS_UNICODE(pystr);
93 /* One char input can be up to 6 chars output, estimate 4 of these */
94 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
95 rval = PyString_FromStringAndSize(NULL, output_size);
96 if (rval == NULL) {
97 return NULL;
99 output = PyString_AS_STRING(rval);
100 chars = 0;
101 output[chars++] = '"';
102 for (i = 0; i < input_chars; i++) {
103 Py_UNICODE c = input_unicode[i];
104 if (S_CHAR(c)) {
105 output[chars++] = (char)c;
107 else {
108 chars = ascii_escape_char(c, output, chars);
110 if (output_size - chars < (1 + MAX_EXPANSION)) {
111 /* There's more than four, so let's resize by a lot */
112 output_size *= 2;
113 /* This is an upper bound */
114 if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
115 output_size = 2 + (input_chars * MAX_EXPANSION);
117 if (_PyString_Resize(&rval, output_size) == -1) {
118 return NULL;
120 output = PyString_AS_STRING(rval);
123 output[chars++] = '"';
124 if (_PyString_Resize(&rval, chars) == -1) {
125 return NULL;
127 return rval;
130 static PyObject *
131 ascii_escape_str(PyObject *pystr)
133 Py_ssize_t i;
134 Py_ssize_t input_chars;
135 Py_ssize_t output_size;
136 Py_ssize_t chars;
137 PyObject *rval;
138 char *output;
139 char *input_str;
141 input_chars = PyString_GET_SIZE(pystr);
142 input_str = PyString_AS_STRING(pystr);
143 /* One char input can be up to 6 chars output, estimate 4 of these */
144 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
145 rval = PyString_FromStringAndSize(NULL, output_size);
146 if (rval == NULL) {
147 return NULL;
149 output = PyString_AS_STRING(rval);
150 chars = 0;
151 output[chars++] = '"';
152 for (i = 0; i < input_chars; i++) {
153 Py_UNICODE c = (Py_UNICODE)input_str[i];
154 if (S_CHAR(c)) {
155 output[chars++] = (char)c;
157 else if (c > 0x7F) {
158 /* We hit a non-ASCII character, bail to unicode mode */
159 PyObject *uni;
160 Py_DECREF(rval);
161 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
162 if (uni == NULL) {
163 return NULL;
165 rval = ascii_escape_unicode(uni);
166 Py_DECREF(uni);
167 return rval;
169 else {
170 chars = ascii_escape_char(c, output, chars);
172 /* An ASCII char can't possibly expand to a surrogate! */
173 if (output_size - chars < (1 + MIN_EXPANSION)) {
174 /* There's more than four, so let's resize by a lot */
175 output_size *= 2;
176 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
177 output_size = 2 + (input_chars * MIN_EXPANSION);
179 if (_PyString_Resize(&rval, output_size) == -1) {
180 return NULL;
182 output = PyString_AS_STRING(rval);
185 output[chars++] = '"';
186 if (_PyString_Resize(&rval, chars) == -1) {
187 return NULL;
189 return rval;
192 void
193 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
195 static PyObject *errmsg_fn = NULL;
196 PyObject *pymsg;
197 if (errmsg_fn == NULL) {
198 PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
199 if (decoder == NULL) return;
200 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
201 if (errmsg_fn == NULL) return;
202 Py_XDECREF(decoder);
204 #if PY_VERSION_HEX < 0x02050000
205 pymsg = PyObject_CallFunction(errmsg_fn, "(zOi)", msg, s, end);
206 #else
207 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
208 #endif
209 PyErr_SetObject(PyExc_ValueError, pymsg);
210 Py_XDECREF(pymsg);
213 def linecol(doc, pos):
214 lineno = doc.count('\n', 0, pos) + 1
215 if lineno == 1:
216 colno = pos
217 else:
218 colno = pos - doc.rindex('\n', 0, pos)
219 return lineno, colno
221 def errmsg(msg, doc, pos, end=None):
222 lineno, colno = linecol(doc, pos)
223 if end is None:
224 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
225 endlineno, endcolno = linecol(doc, end)
226 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
227 msg, lineno, colno, endlineno, endcolno, pos, end)
232 static PyObject *
233 join_list_unicode(PyObject *lst)
235 static PyObject *ustr = NULL;
236 static PyObject *joinstr = NULL;
237 if (ustr == NULL) {
238 Py_UNICODE c = 0;
239 ustr = PyUnicode_FromUnicode(&c, 0);
241 if (joinstr == NULL) {
242 joinstr = PyString_FromString("join");
244 if (joinstr == NULL || ustr == NULL) {
245 return NULL;
247 return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);
250 static PyObject *
251 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict)
253 PyObject *rval;
254 Py_ssize_t len = PyString_GET_SIZE(pystr);
255 Py_ssize_t begin = end - 1;
256 Py_ssize_t next = begin;
257 char *buf = PyString_AS_STRING(pystr);
258 PyObject *chunks = PyList_New(0);
259 if (chunks == NULL) {
260 goto bail;
262 while (1) {
263 /* Find the end of the string or the next escape */
264 Py_UNICODE c = 0;
265 PyObject *chunk = NULL;
266 for (next = end; next < len; next++) {
267 c = buf[next];
268 if (c == '"' || c == '\\') {
269 break;
271 else if (strict && c <= 0x1f) {
272 raise_errmsg("Invalid control character at", pystr, begin);
273 goto bail;
276 if (!(c == '"' || c == '\\')) {
277 raise_errmsg("Unterminated string starting at", pystr, begin);
278 goto bail;
280 /* Pick up this chunk if it's not zero length */
281 if (next != end) {
282 PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end);
283 if (strchunk == NULL) {
284 goto bail;
286 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
287 Py_XDECREF(strchunk);
288 if (chunk == NULL) {
289 goto bail;
291 if (PyList_Append(chunks, chunk)) {
292 goto bail;
294 Py_DECREF(chunk);
296 next++;
297 if (c == '"') {
298 end = next;
299 break;
301 if (next == len) {
302 raise_errmsg("Unterminated string starting at", pystr, begin);
303 goto bail;
305 c = buf[next];
306 if (c != 'u') {
307 /* Non-unicode backslash escapes */
308 end = next + 1;
309 switch (c) {
310 case '"': break;
311 case '\\': break;
312 case '/': break;
313 case 'b': c = '\b'; break;
314 case 'f': c = '\f'; break;
315 case 'n': c = '\n'; break;
316 case 'r': c = '\r'; break;
317 case 't': c = '\t'; break;
318 default: c = 0;
320 if (c == 0) {
321 raise_errmsg("Invalid \\escape", pystr, end - 2);
322 goto bail;
325 else {
326 c = 0;
327 next++;
328 end = next + 4;
329 if (end >= len) {
330 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
331 goto bail;
333 /* Decode 4 hex digits */
334 for (; next < end; next++) {
335 Py_ssize_t shl = (end - next - 1) << 2;
336 Py_UNICODE digit = buf[next];
337 switch (digit) {
338 case '0': case '1': case '2': case '3': case '4':
339 case '5': case '6': case '7': case '8': case '9':
340 c |= (digit - '0') << shl; break;
341 case 'a': case 'b': case 'c': case 'd': case 'e':
342 case 'f':
343 c |= (digit - 'a' + 10) << shl; break;
344 case 'A': case 'B': case 'C': case 'D': case 'E':
345 case 'F':
346 c |= (digit - 'A' + 10) << shl; break;
347 default:
348 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
349 goto bail;
352 #ifdef Py_UNICODE_WIDE
353 /* Surrogate pair */
354 if (c >= 0xd800 && c <= 0xdbff) {
355 Py_UNICODE c2 = 0;
356 if (end + 6 >= len) {
357 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
358 end - 5);
360 if (buf[next++] != '\\' || buf[next++] != 'u') {
361 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
362 end - 5);
364 end += 6;
365 /* Decode 4 hex digits */
366 for (; next < end; next++) {
367 Py_ssize_t shl = (end - next - 1) << 2;
368 Py_UNICODE digit = buf[next];
369 switch (digit) {
370 case '0': case '1': case '2': case '3': case '4':
371 case '5': case '6': case '7': case '8': case '9':
372 c2 |= (digit - '0') << shl; break;
373 case 'a': case 'b': case 'c': case 'd': case 'e':
374 case 'f':
375 c2 |= (digit - 'a' + 10) << shl; break;
376 case 'A': case 'B': case 'C': case 'D': case 'E':
377 case 'F':
378 c2 |= (digit - 'A' + 10) << shl; break;
379 default:
380 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
381 goto bail;
384 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
386 #endif
388 chunk = PyUnicode_FromUnicode(&c, 1);
389 if (chunk == NULL) {
390 goto bail;
392 if (PyList_Append(chunks, chunk)) {
393 goto bail;
395 Py_DECREF(chunk);
398 rval = join_list_unicode(chunks);
399 if (rval == NULL) {
400 goto bail;
402 Py_DECREF(chunks);
403 chunks = NULL;
404 #if PY_VERSION_HEX < 0x02050000
405 return Py_BuildValue("(Ni)", rval, end);
406 #else
407 return Py_BuildValue("(Nn)", rval, end);
408 #endif
409 bail:
410 Py_XDECREF(chunks);
411 return NULL;
415 static PyObject *
416 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict)
418 PyObject *rval;
419 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
420 Py_ssize_t begin = end - 1;
421 Py_ssize_t next = begin;
422 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
423 PyObject *chunks = PyList_New(0);
424 if (chunks == NULL) {
425 goto bail;
427 while (1) {
428 /* Find the end of the string or the next escape */
429 Py_UNICODE c = 0;
430 PyObject *chunk = NULL;
431 for (next = end; next < len; next++) {
432 c = buf[next];
433 if (c == '"' || c == '\\') {
434 break;
436 else if (strict && c <= 0x1f) {
437 raise_errmsg("Invalid control character at", pystr, begin);
438 goto bail;
441 if (!(c == '"' || c == '\\')) {
442 raise_errmsg("Unterminated string starting at", pystr, begin);
443 goto bail;
445 /* Pick up this chunk if it's not zero length */
446 if (next != end) {
447 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
448 if (chunk == NULL) {
449 goto bail;
451 if (PyList_Append(chunks, chunk)) {
452 goto bail;
454 Py_DECREF(chunk);
456 next++;
457 if (c == '"') {
458 end = next;
459 break;
461 if (next == len) {
462 raise_errmsg("Unterminated string starting at", pystr, begin);
463 goto bail;
465 c = buf[next];
466 if (c != 'u') {
467 /* Non-unicode backslash escapes */
468 end = next + 1;
469 switch (c) {
470 case '"': break;
471 case '\\': break;
472 case '/': break;
473 case 'b': c = '\b'; break;
474 case 'f': c = '\f'; break;
475 case 'n': c = '\n'; break;
476 case 'r': c = '\r'; break;
477 case 't': c = '\t'; break;
478 default: c = 0;
480 if (c == 0) {
481 raise_errmsg("Invalid \\escape", pystr, end - 2);
482 goto bail;
485 else {
486 c = 0;
487 next++;
488 end = next + 4;
489 if (end >= len) {
490 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
491 goto bail;
493 /* Decode 4 hex digits */
494 for (; next < end; next++) {
495 Py_ssize_t shl = (end - next - 1) << 2;
496 Py_UNICODE digit = buf[next];
497 switch (digit) {
498 case '0': case '1': case '2': case '3': case '4':
499 case '5': case '6': case '7': case '8': case '9':
500 c |= (digit - '0') << shl; break;
501 case 'a': case 'b': case 'c': case 'd': case 'e':
502 case 'f':
503 c |= (digit - 'a' + 10) << shl; break;
504 case 'A': case 'B': case 'C': case 'D': case 'E':
505 case 'F':
506 c |= (digit - 'A' + 10) << shl; break;
507 default:
508 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
509 goto bail;
512 #ifdef Py_UNICODE_WIDE
513 /* Surrogate pair */
514 if (c >= 0xd800 && c <= 0xdbff) {
515 Py_UNICODE c2 = 0;
516 if (end + 6 >= len) {
517 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
518 end - 5);
520 if (buf[next++] != '\\' || buf[next++] != 'u') {
521 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
522 end - 5);
524 end += 6;
525 /* Decode 4 hex digits */
526 for (; next < end; next++) {
527 Py_ssize_t shl = (end - next - 1) << 2;
528 Py_UNICODE digit = buf[next];
529 switch (digit) {
530 case '0': case '1': case '2': case '3': case '4':
531 case '5': case '6': case '7': case '8': case '9':
532 c2 |= (digit - '0') << shl; break;
533 case 'a': case 'b': case 'c': case 'd': case 'e':
534 case 'f':
535 c2 |= (digit - 'a' + 10) << shl; break;
536 case 'A': case 'B': case 'C': case 'D': case 'E':
537 case 'F':
538 c2 |= (digit - 'A' + 10) << shl; break;
539 default:
540 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
541 goto bail;
544 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
546 #endif
548 chunk = PyUnicode_FromUnicode(&c, 1);
549 if (chunk == NULL) {
550 goto bail;
552 if (PyList_Append(chunks, chunk)) {
553 goto bail;
555 Py_DECREF(chunk);
558 rval = join_list_unicode(chunks);
559 if (rval == NULL) {
560 goto bail;
562 Py_DECREF(chunks);
563 chunks = NULL;
564 #if PY_VERSION_HEX < 0x02050000
565 return Py_BuildValue("(Ni)", rval, end);
566 #else
567 return Py_BuildValue("(Nn)", rval, end);
568 #endif
569 bail:
570 Py_XDECREF(chunks);
571 return NULL;
574 PyDoc_STRVAR(pydoc_scanstring,
575 "scanstring(basestring, end, encoding) -> (str, end)\n"
576 "\n"
577 "..."
580 static PyObject *
581 py_scanstring(PyObject* self UNUSED, PyObject *args)
583 PyObject *pystr;
584 Py_ssize_t end;
585 char *encoding = NULL;
586 int strict = 0;
587 #if PY_VERSION_HEX < 0x02050000
588 if (!PyArg_ParseTuple(args, "Oi|zi:scanstring", &pystr, &end, &encoding, &strict)) {
589 #else
590 if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &strict)) {
591 #endif
592 return NULL;
594 if (encoding == NULL) {
595 encoding = DEFAULT_ENCODING;
597 if (PyString_Check(pystr)) {
598 return scanstring_str(pystr, end, encoding, strict);
600 else if (PyUnicode_Check(pystr)) {
601 return scanstring_unicode(pystr, end, strict);
603 PyErr_SetString(PyExc_TypeError, "first argument must be a string");
604 return NULL;
607 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
608 "encode_basestring_ascii(basestring) -> str\n"
609 "\n"
610 "..."
613 static PyObject *
614 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
616 /* METH_O */
617 if (PyString_Check(pystr)) {
618 return ascii_escape_str(pystr);
620 else if (PyUnicode_Check(pystr)) {
621 return ascii_escape_unicode(pystr);
623 PyErr_SetString(PyExc_TypeError, "first argument must be a string");
624 return NULL;
627 static PyMethodDef speedups_methods[] = {
628 {"encode_basestring_ascii",
629 (PyCFunction)py_encode_basestring_ascii,
630 METH_O,
631 pydoc_encode_basestring_ascii},
632 {"scanstring",
633 (PyCFunction)py_scanstring,
634 METH_VARARGS,
635 pydoc_scanstring},
636 {NULL, NULL, 0, NULL}
639 void
640 init_speedups(void)
642 PyObject *m;
643 m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);