Issue #7042: Use a better mechanism for testing timers in test_signal.
[python.git] / Modules / _json.c
blob7c8abea8f0f278e09c28667e1a459f5581a85037
1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
23 #define DEFAULT_ENCODING "utf-8"
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
33 typedef struct _PyScannerObject {
34 PyObject_HEAD
35 PyObject *encoding;
36 PyObject *strict;
37 PyObject *object_hook;
38 PyObject *pairs_hook;
39 PyObject *parse_float;
40 PyObject *parse_int;
41 PyObject *parse_constant;
42 } PyScannerObject;
44 static PyMemberDef scanner_members[] = {
45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52 {NULL}
55 typedef struct _PyEncoderObject {
56 PyObject_HEAD
57 PyObject *markers;
58 PyObject *defaultfn;
59 PyObject *encoder;
60 PyObject *indent;
61 PyObject *key_separator;
62 PyObject *item_separator;
63 PyObject *sort_keys;
64 PyObject *skipkeys;
65 int fast_encode;
66 int allow_nan;
67 } PyEncoderObject;
69 static PyMemberDef encoder_members[] = {
70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78 {NULL}
81 static Py_ssize_t
82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83 static PyObject *
84 ascii_escape_unicode(PyObject *pystr);
85 static PyObject *
86 ascii_escape_str(PyObject *pystr);
87 static PyObject *
88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89 void init_json(void);
90 static PyObject *
91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94 static PyObject *
95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96 static PyObject *
97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98 static int
99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
100 static void
101 scanner_dealloc(PyObject *self);
102 static int
103 scanner_clear(PyObject *self);
104 static PyObject *
105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
106 static int
107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
108 static void
109 encoder_dealloc(PyObject *self);
110 static int
111 encoder_clear(PyObject *self);
112 static int
113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
116 static int
117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
118 static PyObject *
119 _encoded_const(PyObject *obj);
120 static void
121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
122 static PyObject *
123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124 static int
125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
126 static PyObject *
127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
128 static PyObject *
129 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
134 #define MIN_EXPANSION 6
135 #ifdef Py_UNICODE_WIDE
136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
137 #else
138 #define MAX_EXPANSION MIN_EXPANSION
139 #endif
141 static int
142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr = PyInt_AsSsize_t(o);
146 if (*size_ptr == -1 && PyErr_Occurred())
147 return 0;
148 return 1;
151 static PyObject *
152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr);
158 static Py_ssize_t
159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164 output[chars++] = '\\';
165 switch (c) {
166 case '\\': output[chars++] = (char)c; break;
167 case '"': output[chars++] = (char)c; break;
168 case '\b': output[chars++] = 'b'; break;
169 case '\f': output[chars++] = 'f'; break;
170 case '\n': output[chars++] = 'n'; break;
171 case '\r': output[chars++] = 'r'; break;
172 case '\t': output[chars++] = 't'; break;
173 default:
174 #ifdef Py_UNICODE_WIDE
175 if (c >= 0x10000) {
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v = c - 0x10000;
178 c = 0xd800 | ((v >> 10) & 0x3ff);
179 output[chars++] = 'u';
180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
183 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
184 c = 0xdc00 | (v & 0x3ff);
185 output[chars++] = '\\';
187 #endif
188 output[chars++] = 'u';
189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
192 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
194 return chars;
197 static PyObject *
198 ascii_escape_unicode(PyObject *pystr)
200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
201 Py_ssize_t i;
202 Py_ssize_t input_chars;
203 Py_ssize_t output_size;
204 Py_ssize_t max_output_size;
205 Py_ssize_t chars;
206 PyObject *rval;
207 char *output;
208 Py_UNICODE *input_unicode;
210 input_chars = PyUnicode_GET_SIZE(pystr);
211 input_unicode = PyUnicode_AS_UNICODE(pystr);
213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
215 max_output_size = 2 + (input_chars * MAX_EXPANSION);
216 rval = PyString_FromStringAndSize(NULL, output_size);
217 if (rval == NULL) {
218 return NULL;
220 output = PyString_AS_STRING(rval);
221 chars = 0;
222 output[chars++] = '"';
223 for (i = 0; i < input_chars; i++) {
224 Py_UNICODE c = input_unicode[i];
225 if (S_CHAR(c)) {
226 output[chars++] = (char)c;
228 else {
229 chars = ascii_escape_char(c, output, chars);
231 if (output_size - chars < (1 + MAX_EXPANSION)) {
232 /* There's more than four, so let's resize by a lot */
233 Py_ssize_t new_output_size = output_size * 2;
234 /* This is an upper bound */
235 if (new_output_size > max_output_size) {
236 new_output_size = max_output_size;
238 /* Make sure that the output size changed before resizing */
239 if (new_output_size != output_size) {
240 output_size = new_output_size;
241 if (_PyString_Resize(&rval, output_size) == -1) {
242 return NULL;
244 output = PyString_AS_STRING(rval);
248 output[chars++] = '"';
249 if (_PyString_Resize(&rval, chars) == -1) {
250 return NULL;
252 return rval;
255 static PyObject *
256 ascii_escape_str(PyObject *pystr)
258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
259 Py_ssize_t i;
260 Py_ssize_t input_chars;
261 Py_ssize_t output_size;
262 Py_ssize_t chars;
263 PyObject *rval;
264 char *output;
265 char *input_str;
267 input_chars = PyString_GET_SIZE(pystr);
268 input_str = PyString_AS_STRING(pystr);
270 /* Fast path for a string that's already ASCII */
271 for (i = 0; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
273 if (!S_CHAR(c)) {
274 /* If we have to escape something, scan the string for unicode */
275 Py_ssize_t j;
276 for (j = i; j < input_chars; j++) {
277 c = (Py_UNICODE)(unsigned char)input_str[j];
278 if (c > 0x7f) {
279 /* We hit a non-ASCII character, bail to unicode mode */
280 PyObject *uni;
281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
282 if (uni == NULL) {
283 return NULL;
285 rval = ascii_escape_unicode(uni);
286 Py_DECREF(uni);
287 return rval;
290 break;
294 if (i == input_chars) {
295 /* Input is already ASCII */
296 output_size = 2 + input_chars;
298 else {
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
302 rval = PyString_FromStringAndSize(NULL, output_size);
303 if (rval == NULL) {
304 return NULL;
306 output = PyString_AS_STRING(rval);
307 output[0] = '"';
309 /* We know that everything up to i is ASCII already */
310 chars = i + 1;
311 memcpy(&output[1], input_str, i);
313 for (; i < input_chars; i++) {
314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
315 if (S_CHAR(c)) {
316 output[chars++] = (char)c;
318 else {
319 chars = ascii_escape_char(c, output, chars);
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size - chars < (1 + MIN_EXPANSION)) {
323 /* There's more than four, so let's resize by a lot */
324 output_size *= 2;
325 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
326 output_size = 2 + (input_chars * MIN_EXPANSION);
328 if (_PyString_Resize(&rval, output_size) == -1) {
329 return NULL;
331 output = PyString_AS_STRING(rval);
334 output[chars++] = '"';
335 if (_PyString_Resize(&rval, chars) == -1) {
336 return NULL;
338 return rval;
341 static void
342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
346 static PyObject *errmsg_fn = NULL;
347 PyObject *pymsg;
348 if (errmsg_fn == NULL) {
349 PyObject *decoder = PyImport_ImportModule("json.decoder");
350 if (decoder == NULL)
351 return;
352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
353 Py_DECREF(decoder);
354 if (errmsg_fn == NULL)
355 return;
357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
358 if (pymsg) {
359 PyErr_SetObject(PyExc_ValueError, pymsg);
360 Py_DECREF(pymsg);
364 static PyObject *
365 join_list_unicode(PyObject *lst)
367 /* return u''.join(lst) */
368 static PyObject *joinfn = NULL;
369 if (joinfn == NULL) {
370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
371 if (ustr == NULL)
372 return NULL;
374 joinfn = PyObject_GetAttrString(ustr, "join");
375 Py_DECREF(ustr);
376 if (joinfn == NULL)
377 return NULL;
379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
382 static PyObject *
383 join_list_string(PyObject *lst)
385 /* return ''.join(lst) */
386 static PyObject *joinfn = NULL;
387 if (joinfn == NULL) {
388 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
389 if (ustr == NULL)
390 return NULL;
392 joinfn = PyObject_GetAttrString(ustr, "join");
393 Py_DECREF(ustr);
394 if (joinfn == NULL)
395 return NULL;
397 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
400 static PyObject *
401 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
402 /* return (rval, idx) tuple, stealing reference to rval */
403 PyObject *tpl;
404 PyObject *pyidx;
406 steal a reference to rval, returns (rval, idx)
408 if (rval == NULL) {
409 return NULL;
411 pyidx = PyInt_FromSsize_t(idx);
412 if (pyidx == NULL) {
413 Py_DECREF(rval);
414 return NULL;
416 tpl = PyTuple_New(2);
417 if (tpl == NULL) {
418 Py_DECREF(pyidx);
419 Py_DECREF(rval);
420 return NULL;
422 PyTuple_SET_ITEM(tpl, 0, rval);
423 PyTuple_SET_ITEM(tpl, 1, pyidx);
424 return tpl;
427 static PyObject *
428 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
430 /* Read the JSON string from PyString pystr.
431 end is the index of the first character after the quote.
432 encoding is the encoding of pystr (must be an ASCII superset)
433 if strict is zero then literal control characters are allowed
434 *next_end_ptr is a return-by-reference index of the character
435 after the end quote
437 Return value is a new PyString (if ASCII-only) or PyUnicode
439 PyObject *rval;
440 Py_ssize_t len = PyString_GET_SIZE(pystr);
441 Py_ssize_t begin = end - 1;
442 Py_ssize_t next = begin;
443 int has_unicode = 0;
444 char *buf = PyString_AS_STRING(pystr);
445 PyObject *chunks = PyList_New(0);
446 if (chunks == NULL) {
447 goto bail;
449 if (end < 0 || len <= end) {
450 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
451 goto bail;
453 while (1) {
454 /* Find the end of the string or the next escape */
455 Py_UNICODE c = 0;
456 PyObject *chunk = NULL;
457 for (next = end; next < len; next++) {
458 c = (unsigned char)buf[next];
459 if (c == '"' || c == '\\') {
460 break;
462 else if (strict && c <= 0x1f) {
463 raise_errmsg("Invalid control character at", pystr, next);
464 goto bail;
466 else if (c > 0x7f) {
467 has_unicode = 1;
470 if (!(c == '"' || c == '\\')) {
471 raise_errmsg("Unterminated string starting at", pystr, begin);
472 goto bail;
474 /* Pick up this chunk if it's not zero length */
475 if (next != end) {
476 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
477 if (strchunk == NULL) {
478 goto bail;
480 if (has_unicode) {
481 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
482 Py_DECREF(strchunk);
483 if (chunk == NULL) {
484 goto bail;
487 else {
488 chunk = strchunk;
490 if (PyList_Append(chunks, chunk)) {
491 Py_DECREF(chunk);
492 goto bail;
494 Py_DECREF(chunk);
496 next++;
497 if (c == '"') {
498 end = next;
499 break;
501 if (next == len) {
502 raise_errmsg("Unterminated string starting at", pystr, begin);
503 goto bail;
505 c = buf[next];
506 if (c != 'u') {
507 /* Non-unicode backslash escapes */
508 end = next + 1;
509 switch (c) {
510 case '"': break;
511 case '\\': break;
512 case '/': break;
513 case 'b': c = '\b'; break;
514 case 'f': c = '\f'; break;
515 case 'n': c = '\n'; break;
516 case 'r': c = '\r'; break;
517 case 't': c = '\t'; break;
518 default: c = 0;
520 if (c == 0) {
521 raise_errmsg("Invalid \\escape", pystr, end - 2);
522 goto bail;
525 else {
526 c = 0;
527 next++;
528 end = next + 4;
529 if (end >= len) {
530 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
531 goto bail;
533 /* Decode 4 hex digits */
534 for (; next < end; next++) {
535 Py_UNICODE digit = buf[next];
536 c <<= 4;
537 switch (digit) {
538 case '0': case '1': case '2': case '3': case '4':
539 case '5': case '6': case '7': case '8': case '9':
540 c |= (digit - '0'); break;
541 case 'a': case 'b': case 'c': case 'd': case 'e':
542 case 'f':
543 c |= (digit - 'a' + 10); break;
544 case 'A': case 'B': case 'C': case 'D': case 'E':
545 case 'F':
546 c |= (digit - 'A' + 10); break;
547 default:
548 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
549 goto bail;
552 #ifdef Py_UNICODE_WIDE
553 /* Surrogate pair */
554 if ((c & 0xfc00) == 0xd800) {
555 Py_UNICODE c2 = 0;
556 if (end + 6 >= len) {
557 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
558 goto bail;
560 if (buf[next++] != '\\' || buf[next++] != 'u') {
561 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
562 goto bail;
564 end += 6;
565 /* Decode 4 hex digits */
566 for (; next < end; next++) {
567 c2 <<= 4;
568 Py_UNICODE digit = buf[next];
569 switch (digit) {
570 case '0': case '1': case '2': case '3': case '4':
571 case '5': case '6': case '7': case '8': case '9':
572 c2 |= (digit - '0'); break;
573 case 'a': case 'b': case 'c': case 'd': case 'e':
574 case 'f':
575 c2 |= (digit - 'a' + 10); break;
576 case 'A': case 'B': case 'C': case 'D': case 'E':
577 case 'F':
578 c2 |= (digit - 'A' + 10); break;
579 default:
580 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
581 goto bail;
584 if ((c2 & 0xfc00) != 0xdc00) {
585 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
586 goto bail;
588 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
590 else if ((c & 0xfc00) == 0xdc00) {
591 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
592 goto bail;
594 #endif
596 if (c > 0x7f) {
597 has_unicode = 1;
599 if (has_unicode) {
600 chunk = PyUnicode_FromUnicode(&c, 1);
601 if (chunk == NULL) {
602 goto bail;
605 else {
606 char c_char = Py_CHARMASK(c);
607 chunk = PyString_FromStringAndSize(&c_char, 1);
608 if (chunk == NULL) {
609 goto bail;
612 if (PyList_Append(chunks, chunk)) {
613 Py_DECREF(chunk);
614 goto bail;
616 Py_DECREF(chunk);
619 rval = join_list_string(chunks);
620 if (rval == NULL) {
621 goto bail;
623 Py_CLEAR(chunks);
624 *next_end_ptr = end;
625 return rval;
626 bail:
627 *next_end_ptr = -1;
628 Py_XDECREF(chunks);
629 return NULL;
633 static PyObject *
634 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
636 /* Read the JSON string from PyUnicode pystr.
637 end is the index of the first character after the quote.
638 if strict is zero then literal control characters are allowed
639 *next_end_ptr is a return-by-reference index of the character
640 after the end quote
642 Return value is a new PyUnicode
644 PyObject *rval;
645 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
646 Py_ssize_t begin = end - 1;
647 Py_ssize_t next = begin;
648 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
649 PyObject *chunks = PyList_New(0);
650 if (chunks == NULL) {
651 goto bail;
653 if (end < 0 || len <= end) {
654 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
655 goto bail;
657 while (1) {
658 /* Find the end of the string or the next escape */
659 Py_UNICODE c = 0;
660 PyObject *chunk = NULL;
661 for (next = end; next < len; next++) {
662 c = buf[next];
663 if (c == '"' || c == '\\') {
664 break;
666 else if (strict && c <= 0x1f) {
667 raise_errmsg("Invalid control character at", pystr, next);
668 goto bail;
671 if (!(c == '"' || c == '\\')) {
672 raise_errmsg("Unterminated string starting at", pystr, begin);
673 goto bail;
675 /* Pick up this chunk if it's not zero length */
676 if (next != end) {
677 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
678 if (chunk == NULL) {
679 goto bail;
681 if (PyList_Append(chunks, chunk)) {
682 Py_DECREF(chunk);
683 goto bail;
685 Py_DECREF(chunk);
687 next++;
688 if (c == '"') {
689 end = next;
690 break;
692 if (next == len) {
693 raise_errmsg("Unterminated string starting at", pystr, begin);
694 goto bail;
696 c = buf[next];
697 if (c != 'u') {
698 /* Non-unicode backslash escapes */
699 end = next + 1;
700 switch (c) {
701 case '"': break;
702 case '\\': break;
703 case '/': break;
704 case 'b': c = '\b'; break;
705 case 'f': c = '\f'; break;
706 case 'n': c = '\n'; break;
707 case 'r': c = '\r'; break;
708 case 't': c = '\t'; break;
709 default: c = 0;
711 if (c == 0) {
712 raise_errmsg("Invalid \\escape", pystr, end - 2);
713 goto bail;
716 else {
717 c = 0;
718 next++;
719 end = next + 4;
720 if (end >= len) {
721 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
722 goto bail;
724 /* Decode 4 hex digits */
725 for (; next < end; next++) {
726 Py_UNICODE digit = buf[next];
727 c <<= 4;
728 switch (digit) {
729 case '0': case '1': case '2': case '3': case '4':
730 case '5': case '6': case '7': case '8': case '9':
731 c |= (digit - '0'); break;
732 case 'a': case 'b': case 'c': case 'd': case 'e':
733 case 'f':
734 c |= (digit - 'a' + 10); break;
735 case 'A': case 'B': case 'C': case 'D': case 'E':
736 case 'F':
737 c |= (digit - 'A' + 10); break;
738 default:
739 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
740 goto bail;
743 #ifdef Py_UNICODE_WIDE
744 /* Surrogate pair */
745 if ((c & 0xfc00) == 0xd800) {
746 Py_UNICODE c2 = 0;
747 if (end + 6 >= len) {
748 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
749 goto bail;
751 if (buf[next++] != '\\' || buf[next++] != 'u') {
752 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
753 goto bail;
755 end += 6;
756 /* Decode 4 hex digits */
757 for (; next < end; next++) {
758 c2 <<= 4;
759 Py_UNICODE digit = buf[next];
760 switch (digit) {
761 case '0': case '1': case '2': case '3': case '4':
762 case '5': case '6': case '7': case '8': case '9':
763 c2 |= (digit - '0'); break;
764 case 'a': case 'b': case 'c': case 'd': case 'e':
765 case 'f':
766 c2 |= (digit - 'a' + 10); break;
767 case 'A': case 'B': case 'C': case 'D': case 'E':
768 case 'F':
769 c2 |= (digit - 'A' + 10); break;
770 default:
771 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
772 goto bail;
775 if ((c2 & 0xfc00) != 0xdc00) {
776 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
777 goto bail;
779 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
781 else if ((c & 0xfc00) == 0xdc00) {
782 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
783 goto bail;
785 #endif
787 chunk = PyUnicode_FromUnicode(&c, 1);
788 if (chunk == NULL) {
789 goto bail;
791 if (PyList_Append(chunks, chunk)) {
792 Py_DECREF(chunk);
793 goto bail;
795 Py_DECREF(chunk);
798 rval = join_list_unicode(chunks);
799 if (rval == NULL) {
800 goto bail;
802 Py_DECREF(chunks);
803 *next_end_ptr = end;
804 return rval;
805 bail:
806 *next_end_ptr = -1;
807 Py_XDECREF(chunks);
808 return NULL;
811 PyDoc_STRVAR(pydoc_scanstring,
812 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
813 "\n"
814 "Scan the string s for a JSON string. End is the index of the\n"
815 "character in s after the quote that started the JSON string.\n"
816 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
817 "on attempt to decode an invalid string. If strict is False then literal\n"
818 "control characters are allowed in the string.\n"
819 "\n"
820 "Returns a tuple of the decoded string and the index of the character in s\n"
821 "after the end quote."
824 static PyObject *
825 py_scanstring(PyObject* self UNUSED, PyObject *args)
827 PyObject *pystr;
828 PyObject *rval;
829 Py_ssize_t end;
830 Py_ssize_t next_end = -1;
831 char *encoding = NULL;
832 int strict = 1;
833 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
834 return NULL;
836 if (encoding == NULL) {
837 encoding = DEFAULT_ENCODING;
839 if (PyString_Check(pystr)) {
840 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
842 else if (PyUnicode_Check(pystr)) {
843 rval = scanstring_unicode(pystr, end, strict, &next_end);
845 else {
846 PyErr_Format(PyExc_TypeError,
847 "first argument must be a string, not %.80s",
848 Py_TYPE(pystr)->tp_name);
849 return NULL;
851 return _build_rval_index_tuple(rval, next_end);
854 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
855 "encode_basestring_ascii(basestring) -> str\n"
856 "\n"
857 "Return an ASCII-only JSON representation of a Python string"
860 static PyObject *
861 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
863 /* Return an ASCII-only JSON representation of a Python string */
864 /* METH_O */
865 if (PyString_Check(pystr)) {
866 return ascii_escape_str(pystr);
868 else if (PyUnicode_Check(pystr)) {
869 return ascii_escape_unicode(pystr);
871 else {
872 PyErr_Format(PyExc_TypeError,
873 "first argument must be a string, not %.80s",
874 Py_TYPE(pystr)->tp_name);
875 return NULL;
879 static void
880 scanner_dealloc(PyObject *self)
882 /* Deallocate scanner object */
883 scanner_clear(self);
884 Py_TYPE(self)->tp_free(self);
887 static int
888 scanner_traverse(PyObject *self, visitproc visit, void *arg)
890 PyScannerObject *s;
891 assert(PyScanner_Check(self));
892 s = (PyScannerObject *)self;
893 Py_VISIT(s->encoding);
894 Py_VISIT(s->strict);
895 Py_VISIT(s->object_hook);
896 Py_VISIT(s->pairs_hook);
897 Py_VISIT(s->parse_float);
898 Py_VISIT(s->parse_int);
899 Py_VISIT(s->parse_constant);
900 return 0;
903 static int
904 scanner_clear(PyObject *self)
906 PyScannerObject *s;
907 assert(PyScanner_Check(self));
908 s = (PyScannerObject *)self;
909 Py_CLEAR(s->encoding);
910 Py_CLEAR(s->strict);
911 Py_CLEAR(s->object_hook);
912 Py_CLEAR(s->pairs_hook);
913 Py_CLEAR(s->parse_float);
914 Py_CLEAR(s->parse_int);
915 Py_CLEAR(s->parse_constant);
916 return 0;
919 static PyObject *
920 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
921 /* Read a JSON object from PyString pystr.
922 idx is the index of the first character after the opening curly brace.
923 *next_idx_ptr is a return-by-reference index to the first character after
924 the closing curly brace.
926 Returns a new PyObject (usually a dict, but object_hook can change that)
928 char *str = PyString_AS_STRING(pystr);
929 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
930 PyObject *rval;
931 PyObject *pairs;
932 PyObject *item;
933 PyObject *key = NULL;
934 PyObject *val = NULL;
935 char *encoding = PyString_AS_STRING(s->encoding);
936 int strict = PyObject_IsTrue(s->strict);
937 Py_ssize_t next_idx;
939 pairs = PyList_New(0);
940 if (pairs == NULL)
941 return NULL;
943 /* skip whitespace after { */
944 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
946 /* only loop if the object is non-empty */
947 if (idx <= end_idx && str[idx] != '}') {
948 while (idx <= end_idx) {
949 /* read key */
950 if (str[idx] != '"') {
951 raise_errmsg("Expecting property name", pystr, idx);
952 goto bail;
954 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
955 if (key == NULL)
956 goto bail;
957 idx = next_idx;
959 /* skip whitespace between key and : delimiter, read :, skip whitespace */
960 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
961 if (idx > end_idx || str[idx] != ':') {
962 raise_errmsg("Expecting : delimiter", pystr, idx);
963 goto bail;
965 idx++;
966 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
968 /* read any JSON data type */
969 val = scan_once_str(s, pystr, idx, &next_idx);
970 if (val == NULL)
971 goto bail;
973 item = PyTuple_Pack(2, key, val);
974 if (item == NULL)
975 goto bail;
976 Py_CLEAR(key);
977 Py_CLEAR(val);
978 if (PyList_Append(pairs, item) == -1) {
979 Py_DECREF(item);
980 goto bail;
982 Py_DECREF(item);
983 idx = next_idx;
985 /* skip whitespace before } or , */
986 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
988 /* bail if the object is closed or we didn't get the , delimiter */
989 if (idx > end_idx) break;
990 if (str[idx] == '}') {
991 break;
993 else if (str[idx] != ',') {
994 raise_errmsg("Expecting , delimiter", pystr, idx);
995 goto bail;
997 idx++;
999 /* skip whitespace after , delimiter */
1000 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1003 /* verify that idx < end_idx, str[idx] should be '}' */
1004 if (idx > end_idx || str[idx] != '}') {
1005 raise_errmsg("Expecting object", pystr, end_idx);
1006 goto bail;
1009 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1010 if (s->pairs_hook != Py_None) {
1011 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1012 if (val == NULL)
1013 goto bail;
1014 Py_DECREF(pairs);
1015 *next_idx_ptr = idx + 1;
1016 return val;
1019 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1020 pairs, NULL);
1021 if (rval == NULL)
1022 goto bail;
1023 Py_CLEAR(pairs);
1025 /* if object_hook is not None: rval = object_hook(rval) */
1026 if (s->object_hook != Py_None) {
1027 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1028 if (val == NULL)
1029 goto bail;
1030 Py_DECREF(rval);
1031 rval = val;
1032 val = NULL;
1034 *next_idx_ptr = idx + 1;
1035 return rval;
1036 bail:
1037 Py_XDECREF(key);
1038 Py_XDECREF(val);
1039 Py_XDECREF(pairs);
1040 return NULL;
1043 static PyObject *
1044 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1045 /* Read a JSON object from PyUnicode pystr.
1046 idx is the index of the first character after the opening curly brace.
1047 *next_idx_ptr is a return-by-reference index to the first character after
1048 the closing curly brace.
1050 Returns a new PyObject (usually a dict, but object_hook can change that)
1052 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1053 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1054 PyObject *rval;
1055 PyObject *pairs;
1056 PyObject *item;
1057 PyObject *key = NULL;
1058 PyObject *val = NULL;
1059 int strict = PyObject_IsTrue(s->strict);
1060 Py_ssize_t next_idx;
1062 pairs = PyList_New(0);
1063 if (pairs == NULL)
1064 return NULL;
1066 /* skip whitespace after { */
1067 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1069 /* only loop if the object is non-empty */
1070 if (idx <= end_idx && str[idx] != '}') {
1071 while (idx <= end_idx) {
1072 /* read key */
1073 if (str[idx] != '"') {
1074 raise_errmsg("Expecting property name", pystr, idx);
1075 goto bail;
1077 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1078 if (key == NULL)
1079 goto bail;
1080 idx = next_idx;
1082 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1083 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1084 if (idx > end_idx || str[idx] != ':') {
1085 raise_errmsg("Expecting : delimiter", pystr, idx);
1086 goto bail;
1088 idx++;
1089 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1091 /* read any JSON term */
1092 val = scan_once_unicode(s, pystr, idx, &next_idx);
1093 if (val == NULL)
1094 goto bail;
1096 item = PyTuple_Pack(2, key, val);
1097 if (item == NULL)
1098 goto bail;
1099 Py_CLEAR(key);
1100 Py_CLEAR(val);
1101 if (PyList_Append(pairs, item) == -1) {
1102 Py_DECREF(item);
1103 goto bail;
1105 Py_DECREF(item);
1106 idx = next_idx;
1108 /* skip whitespace before } or , */
1109 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1111 /* bail if the object is closed or we didn't get the , delimiter */
1112 if (idx > end_idx) break;
1113 if (str[idx] == '}') {
1114 break;
1116 else if (str[idx] != ',') {
1117 raise_errmsg("Expecting , delimiter", pystr, idx);
1118 goto bail;
1120 idx++;
1122 /* skip whitespace after , delimiter */
1123 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1127 /* verify that idx < end_idx, str[idx] should be '}' */
1128 if (idx > end_idx || str[idx] != '}') {
1129 raise_errmsg("Expecting object", pystr, end_idx);
1130 goto bail;
1133 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1134 if (s->pairs_hook != Py_None) {
1135 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1136 if (val == NULL)
1137 goto bail;
1138 Py_DECREF(pairs);
1139 *next_idx_ptr = idx + 1;
1140 return val;
1143 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1144 pairs, NULL);
1145 if (rval == NULL)
1146 goto bail;
1147 Py_CLEAR(pairs);
1149 /* if object_hook is not None: rval = object_hook(rval) */
1150 if (s->object_hook != Py_None) {
1151 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1152 if (val == NULL)
1153 goto bail;
1154 Py_DECREF(rval);
1155 rval = val;
1156 val = NULL;
1158 *next_idx_ptr = idx + 1;
1159 return rval;
1160 bail:
1161 Py_XDECREF(key);
1162 Py_XDECREF(val);
1163 Py_XDECREF(pairs);
1164 return NULL;
1167 static PyObject *
1168 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1169 /* Read a JSON array from PyString pystr.
1170 idx is the index of the first character after the opening brace.
1171 *next_idx_ptr is a return-by-reference index to the first character after
1172 the closing brace.
1174 Returns a new PyList
1176 char *str = PyString_AS_STRING(pystr);
1177 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1178 PyObject *val = NULL;
1179 PyObject *rval = PyList_New(0);
1180 Py_ssize_t next_idx;
1181 if (rval == NULL)
1182 return NULL;
1184 /* skip whitespace after [ */
1185 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1187 /* only loop if the array is non-empty */
1188 if (idx <= end_idx && str[idx] != ']') {
1189 while (idx <= end_idx) {
1191 /* read any JSON term and de-tuplefy the (rval, idx) */
1192 val = scan_once_str(s, pystr, idx, &next_idx);
1193 if (val == NULL)
1194 goto bail;
1196 if (PyList_Append(rval, val) == -1)
1197 goto bail;
1199 Py_CLEAR(val);
1200 idx = next_idx;
1202 /* skip whitespace between term and , */
1203 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1205 /* bail if the array is closed or we didn't get the , delimiter */
1206 if (idx > end_idx) break;
1207 if (str[idx] == ']') {
1208 break;
1210 else if (str[idx] != ',') {
1211 raise_errmsg("Expecting , delimiter", pystr, idx);
1212 goto bail;
1214 idx++;
1216 /* skip whitespace after , */
1217 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1221 /* verify that idx < end_idx, str[idx] should be ']' */
1222 if (idx > end_idx || str[idx] != ']') {
1223 raise_errmsg("Expecting object", pystr, end_idx);
1224 goto bail;
1226 *next_idx_ptr = idx + 1;
1227 return rval;
1228 bail:
1229 Py_XDECREF(val);
1230 Py_DECREF(rval);
1231 return NULL;
1234 static PyObject *
1235 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1236 /* Read a JSON array from PyString pystr.
1237 idx is the index of the first character after the opening brace.
1238 *next_idx_ptr is a return-by-reference index to the first character after
1239 the closing brace.
1241 Returns a new PyList
1243 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1244 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1245 PyObject *val = NULL;
1246 PyObject *rval = PyList_New(0);
1247 Py_ssize_t next_idx;
1248 if (rval == NULL)
1249 return NULL;
1251 /* skip whitespace after [ */
1252 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1254 /* only loop if the array is non-empty */
1255 if (idx <= end_idx && str[idx] != ']') {
1256 while (idx <= end_idx) {
1258 /* read any JSON term */
1259 val = scan_once_unicode(s, pystr, idx, &next_idx);
1260 if (val == NULL)
1261 goto bail;
1263 if (PyList_Append(rval, val) == -1)
1264 goto bail;
1266 Py_CLEAR(val);
1267 idx = next_idx;
1269 /* skip whitespace between term and , */
1270 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1272 /* bail if the array is closed or we didn't get the , delimiter */
1273 if (idx > end_idx) break;
1274 if (str[idx] == ']') {
1275 break;
1277 else if (str[idx] != ',') {
1278 raise_errmsg("Expecting , delimiter", pystr, idx);
1279 goto bail;
1281 idx++;
1283 /* skip whitespace after , */
1284 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1288 /* verify that idx < end_idx, str[idx] should be ']' */
1289 if (idx > end_idx || str[idx] != ']') {
1290 raise_errmsg("Expecting object", pystr, end_idx);
1291 goto bail;
1293 *next_idx_ptr = idx + 1;
1294 return rval;
1295 bail:
1296 Py_XDECREF(val);
1297 Py_DECREF(rval);
1298 return NULL;
1301 static PyObject *
1302 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1303 /* Read a JSON constant from PyString pystr.
1304 constant is the constant string that was found
1305 ("NaN", "Infinity", "-Infinity").
1306 idx is the index of the first character of the constant
1307 *next_idx_ptr is a return-by-reference index to the first character after
1308 the constant.
1310 Returns the result of parse_constant
1312 PyObject *cstr;
1313 PyObject *rval;
1314 /* constant is "NaN", "Infinity", or "-Infinity" */
1315 cstr = PyString_InternFromString(constant);
1316 if (cstr == NULL)
1317 return NULL;
1319 /* rval = parse_constant(constant) */
1320 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1321 idx += PyString_GET_SIZE(cstr);
1322 Py_DECREF(cstr);
1323 *next_idx_ptr = idx;
1324 return rval;
1327 static PyObject *
1328 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1329 /* Read a JSON number from PyString pystr.
1330 idx is the index of the first character of the number
1331 *next_idx_ptr is a return-by-reference index to the first character after
1332 the number.
1334 Returns a new PyObject representation of that number:
1335 PyInt, PyLong, or PyFloat.
1336 May return other types if parse_int or parse_float are set
1338 char *str = PyString_AS_STRING(pystr);
1339 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1340 Py_ssize_t idx = start;
1341 int is_float = 0;
1342 PyObject *rval;
1343 PyObject *numstr;
1345 /* read a sign if it's there, make sure it's not the end of the string */
1346 if (str[idx] == '-') {
1347 idx++;
1348 if (idx > end_idx) {
1349 PyErr_SetNone(PyExc_StopIteration);
1350 return NULL;
1354 /* read as many integer digits as we find as long as it doesn't start with 0 */
1355 if (str[idx] >= '1' && str[idx] <= '9') {
1356 idx++;
1357 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1359 /* if it starts with 0 we only expect one integer digit */
1360 else if (str[idx] == '0') {
1361 idx++;
1363 /* no integer digits, error */
1364 else {
1365 PyErr_SetNone(PyExc_StopIteration);
1366 return NULL;
1369 /* if the next char is '.' followed by a digit then read all float digits */
1370 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1371 is_float = 1;
1372 idx += 2;
1373 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1376 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1377 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1379 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1380 Py_ssize_t e_start = idx;
1381 idx++;
1383 /* read an exponent sign if present */
1384 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1386 /* read all digits */
1387 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1389 /* if we got a digit, then parse as float. if not, backtrack */
1390 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1391 is_float = 1;
1393 else {
1394 idx = e_start;
1398 /* copy the section we determined to be a number */
1399 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1400 if (numstr == NULL)
1401 return NULL;
1402 if (is_float) {
1403 /* parse as a float using a fast path if available, otherwise call user defined method */
1404 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1405 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1407 else {
1408 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1409 NULL, NULL);
1410 if (d == -1.0 && PyErr_Occurred())
1411 return NULL;
1412 rval = PyFloat_FromDouble(d);
1415 else {
1416 /* parse as an int using a fast path if available, otherwise call user defined method */
1417 if (s->parse_int != (PyObject *)&PyInt_Type) {
1418 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1420 else {
1421 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1424 Py_DECREF(numstr);
1425 *next_idx_ptr = idx;
1426 return rval;
1429 static PyObject *
1430 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1431 /* Read a JSON number from PyUnicode pystr.
1432 idx is the index of the first character of the number
1433 *next_idx_ptr is a return-by-reference index to the first character after
1434 the number.
1436 Returns a new PyObject representation of that number:
1437 PyInt, PyLong, or PyFloat.
1438 May return other types if parse_int or parse_float are set
1440 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1441 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1442 Py_ssize_t idx = start;
1443 int is_float = 0;
1444 PyObject *rval;
1445 PyObject *numstr;
1447 /* read a sign if it's there, make sure it's not the end of the string */
1448 if (str[idx] == '-') {
1449 idx++;
1450 if (idx > end_idx) {
1451 PyErr_SetNone(PyExc_StopIteration);
1452 return NULL;
1456 /* read as many integer digits as we find as long as it doesn't start with 0 */
1457 if (str[idx] >= '1' && str[idx] <= '9') {
1458 idx++;
1459 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1461 /* if it starts with 0 we only expect one integer digit */
1462 else if (str[idx] == '0') {
1463 idx++;
1465 /* no integer digits, error */
1466 else {
1467 PyErr_SetNone(PyExc_StopIteration);
1468 return NULL;
1471 /* if the next char is '.' followed by a digit then read all float digits */
1472 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1473 is_float = 1;
1474 idx += 2;
1475 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1478 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1479 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1480 Py_ssize_t e_start = idx;
1481 idx++;
1483 /* read an exponent sign if present */
1484 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1486 /* read all digits */
1487 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1489 /* if we got a digit, then parse as float. if not, backtrack */
1490 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1491 is_float = 1;
1493 else {
1494 idx = e_start;
1498 /* copy the section we determined to be a number */
1499 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1500 if (numstr == NULL)
1501 return NULL;
1502 if (is_float) {
1503 /* parse as a float using a fast path if available, otherwise call user defined method */
1504 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1505 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1507 else {
1508 rval = PyFloat_FromString(numstr, NULL);
1511 else {
1512 /* no fast path for unicode -> int, just call */
1513 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1515 Py_DECREF(numstr);
1516 *next_idx_ptr = idx;
1517 return rval;
1520 static PyObject *
1521 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1523 /* Read one JSON term (of any kind) from PyString pystr.
1524 idx is the index of the first character of the term
1525 *next_idx_ptr is a return-by-reference index to the first character after
1526 the number.
1528 Returns a new PyObject representation of the term.
1530 char *str = PyString_AS_STRING(pystr);
1531 Py_ssize_t length = PyString_GET_SIZE(pystr);
1532 if (idx >= length) {
1533 PyErr_SetNone(PyExc_StopIteration);
1534 return NULL;
1536 switch (str[idx]) {
1537 case '"':
1538 /* string */
1539 return scanstring_str(pystr, idx + 1,
1540 PyString_AS_STRING(s->encoding),
1541 PyObject_IsTrue(s->strict),
1542 next_idx_ptr);
1543 case '{':
1544 /* object */
1545 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1546 case '[':
1547 /* array */
1548 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1549 case 'n':
1550 /* null */
1551 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1552 Py_INCREF(Py_None);
1553 *next_idx_ptr = idx + 4;
1554 return Py_None;
1556 break;
1557 case 't':
1558 /* true */
1559 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1560 Py_INCREF(Py_True);
1561 *next_idx_ptr = idx + 4;
1562 return Py_True;
1564 break;
1565 case 'f':
1566 /* false */
1567 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1568 Py_INCREF(Py_False);
1569 *next_idx_ptr = idx + 5;
1570 return Py_False;
1572 break;
1573 case 'N':
1574 /* NaN */
1575 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1576 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1578 break;
1579 case 'I':
1580 /* Infinity */
1581 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1582 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1584 break;
1585 case '-':
1586 /* -Infinity */
1587 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1588 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1590 break;
1592 /* Didn't find a string, object, array, or named constant. Look for a number. */
1593 return _match_number_str(s, pystr, idx, next_idx_ptr);
1596 static PyObject *
1597 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1599 /* Read one JSON term (of any kind) from PyUnicode pystr.
1600 idx is the index of the first character of the term
1601 *next_idx_ptr is a return-by-reference index to the first character after
1602 the number.
1604 Returns a new PyObject representation of the term.
1606 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1607 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1608 if (idx >= length) {
1609 PyErr_SetNone(PyExc_StopIteration);
1610 return NULL;
1612 switch (str[idx]) {
1613 case '"':
1614 /* string */
1615 return scanstring_unicode(pystr, idx + 1,
1616 PyObject_IsTrue(s->strict),
1617 next_idx_ptr);
1618 case '{':
1619 /* object */
1620 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1621 case '[':
1622 /* array */
1623 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1624 case 'n':
1625 /* null */
1626 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1627 Py_INCREF(Py_None);
1628 *next_idx_ptr = idx + 4;
1629 return Py_None;
1631 break;
1632 case 't':
1633 /* true */
1634 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1635 Py_INCREF(Py_True);
1636 *next_idx_ptr = idx + 4;
1637 return Py_True;
1639 break;
1640 case 'f':
1641 /* false */
1642 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1643 Py_INCREF(Py_False);
1644 *next_idx_ptr = idx + 5;
1645 return Py_False;
1647 break;
1648 case 'N':
1649 /* NaN */
1650 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1651 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1653 break;
1654 case 'I':
1655 /* Infinity */
1656 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1657 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1659 break;
1660 case '-':
1661 /* -Infinity */
1662 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1663 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1665 break;
1667 /* Didn't find a string, object, array, or named constant. Look for a number. */
1668 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1671 static PyObject *
1672 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1674 /* Python callable interface to scan_once_{str,unicode} */
1675 PyObject *pystr;
1676 PyObject *rval;
1677 Py_ssize_t idx;
1678 Py_ssize_t next_idx = -1;
1679 static char *kwlist[] = {"string", "idx", NULL};
1680 PyScannerObject *s;
1681 assert(PyScanner_Check(self));
1682 s = (PyScannerObject *)self;
1683 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1684 return NULL;
1686 if (PyString_Check(pystr)) {
1687 rval = scan_once_str(s, pystr, idx, &next_idx);
1689 else if (PyUnicode_Check(pystr)) {
1690 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1692 else {
1693 PyErr_Format(PyExc_TypeError,
1694 "first argument must be a string, not %.80s",
1695 Py_TYPE(pystr)->tp_name);
1696 return NULL;
1698 return _build_rval_index_tuple(rval, next_idx);
1701 static PyObject *
1702 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1704 PyScannerObject *s;
1705 s = (PyScannerObject *)type->tp_alloc(type, 0);
1706 if (s != NULL) {
1707 s->encoding = NULL;
1708 s->strict = NULL;
1709 s->object_hook = NULL;
1710 s->pairs_hook = NULL;
1711 s->parse_float = NULL;
1712 s->parse_int = NULL;
1713 s->parse_constant = NULL;
1715 return (PyObject *)s;
1718 static int
1719 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1721 /* Initialize Scanner object */
1722 PyObject *ctx;
1723 static char *kwlist[] = {"context", NULL};
1724 PyScannerObject *s;
1726 assert(PyScanner_Check(self));
1727 s = (PyScannerObject *)self;
1729 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1730 return -1;
1732 /* PyString_AS_STRING is used on encoding */
1733 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1734 if (s->encoding == Py_None) {
1735 Py_DECREF(Py_None);
1736 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1738 else if (PyUnicode_Check(s->encoding)) {
1739 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1740 Py_DECREF(s->encoding);
1741 s->encoding = tmp;
1743 if (s->encoding == NULL || !PyString_Check(s->encoding))
1744 goto bail;
1746 /* All of these will fail "gracefully" so we don't need to verify them */
1747 s->strict = PyObject_GetAttrString(ctx, "strict");
1748 if (s->strict == NULL)
1749 goto bail;
1750 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1751 if (s->object_hook == NULL)
1752 goto bail;
1753 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1754 if (s->object_hook == NULL)
1755 goto bail;
1756 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1757 if (s->parse_float == NULL)
1758 goto bail;
1759 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1760 if (s->parse_int == NULL)
1761 goto bail;
1762 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1763 if (s->parse_constant == NULL)
1764 goto bail;
1766 return 0;
1768 bail:
1769 Py_CLEAR(s->encoding);
1770 Py_CLEAR(s->strict);
1771 Py_CLEAR(s->object_hook);
1772 Py_CLEAR(s->pairs_hook);
1773 Py_CLEAR(s->parse_float);
1774 Py_CLEAR(s->parse_int);
1775 Py_CLEAR(s->parse_constant);
1776 return -1;
1779 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1781 static
1782 PyTypeObject PyScannerType = {
1783 PyObject_HEAD_INIT(NULL)
1784 0, /* tp_internal */
1785 "_json.Scanner", /* tp_name */
1786 sizeof(PyScannerObject), /* tp_basicsize */
1787 0, /* tp_itemsize */
1788 scanner_dealloc, /* tp_dealloc */
1789 0, /* tp_print */
1790 0, /* tp_getattr */
1791 0, /* tp_setattr */
1792 0, /* tp_compare */
1793 0, /* tp_repr */
1794 0, /* tp_as_number */
1795 0, /* tp_as_sequence */
1796 0, /* tp_as_mapping */
1797 0, /* tp_hash */
1798 scanner_call, /* tp_call */
1799 0, /* tp_str */
1800 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1801 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1802 0, /* tp_as_buffer */
1803 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1804 scanner_doc, /* tp_doc */
1805 scanner_traverse, /* tp_traverse */
1806 scanner_clear, /* tp_clear */
1807 0, /* tp_richcompare */
1808 0, /* tp_weaklistoffset */
1809 0, /* tp_iter */
1810 0, /* tp_iternext */
1811 0, /* tp_methods */
1812 scanner_members, /* tp_members */
1813 0, /* tp_getset */
1814 0, /* tp_base */
1815 0, /* tp_dict */
1816 0, /* tp_descr_get */
1817 0, /* tp_descr_set */
1818 0, /* tp_dictoffset */
1819 scanner_init, /* tp_init */
1820 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1821 scanner_new, /* tp_new */
1822 0,/* PyObject_GC_Del, */ /* tp_free */
1825 static PyObject *
1826 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1828 PyEncoderObject *s;
1829 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1830 if (s != NULL) {
1831 s->markers = NULL;
1832 s->defaultfn = NULL;
1833 s->encoder = NULL;
1834 s->indent = NULL;
1835 s->key_separator = NULL;
1836 s->item_separator = NULL;
1837 s->sort_keys = NULL;
1838 s->skipkeys = NULL;
1840 return (PyObject *)s;
1843 static int
1844 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1846 /* initialize Encoder object */
1847 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1849 PyEncoderObject *s;
1850 PyObject *allow_nan;
1852 assert(PyEncoder_Check(self));
1853 s = (PyEncoderObject *)self;
1855 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1856 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
1857 return -1;
1859 Py_INCREF(s->markers);
1860 Py_INCREF(s->defaultfn);
1861 Py_INCREF(s->encoder);
1862 Py_INCREF(s->indent);
1863 Py_INCREF(s->key_separator);
1864 Py_INCREF(s->item_separator);
1865 Py_INCREF(s->sort_keys);
1866 Py_INCREF(s->skipkeys);
1867 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1868 s->allow_nan = PyObject_IsTrue(allow_nan);
1869 return 0;
1872 static PyObject *
1873 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1875 /* Python callable interface to encode_listencode_obj */
1876 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1877 PyObject *obj;
1878 PyObject *rval;
1879 Py_ssize_t indent_level;
1880 PyEncoderObject *s;
1881 assert(PyEncoder_Check(self));
1882 s = (PyEncoderObject *)self;
1883 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1884 &obj, _convertPyInt_AsSsize_t, &indent_level))
1885 return NULL;
1886 rval = PyList_New(0);
1887 if (rval == NULL)
1888 return NULL;
1889 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1890 Py_DECREF(rval);
1891 return NULL;
1893 return rval;
1896 static PyObject *
1897 _encoded_const(PyObject *obj)
1899 /* Return the JSON string representation of None, True, False */
1900 if (obj == Py_None) {
1901 static PyObject *s_null = NULL;
1902 if (s_null == NULL) {
1903 s_null = PyString_InternFromString("null");
1905 Py_INCREF(s_null);
1906 return s_null;
1908 else if (obj == Py_True) {
1909 static PyObject *s_true = NULL;
1910 if (s_true == NULL) {
1911 s_true = PyString_InternFromString("true");
1913 Py_INCREF(s_true);
1914 return s_true;
1916 else if (obj == Py_False) {
1917 static PyObject *s_false = NULL;
1918 if (s_false == NULL) {
1919 s_false = PyString_InternFromString("false");
1921 Py_INCREF(s_false);
1922 return s_false;
1924 else {
1925 PyErr_SetString(PyExc_ValueError, "not a const");
1926 return NULL;
1930 static PyObject *
1931 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1933 /* Return the JSON representation of a PyFloat */
1934 double i = PyFloat_AS_DOUBLE(obj);
1935 if (!Py_IS_FINITE(i)) {
1936 if (!s->allow_nan) {
1937 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1938 return NULL;
1940 if (i > 0) {
1941 return PyString_FromString("Infinity");
1943 else if (i < 0) {
1944 return PyString_FromString("-Infinity");
1946 else {
1947 return PyString_FromString("NaN");
1950 /* Use a better float format here? */
1951 return PyObject_Repr(obj);
1954 static PyObject *
1955 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1957 /* Return the JSON representation of a string */
1958 if (s->fast_encode)
1959 return py_encode_basestring_ascii(NULL, obj);
1960 else
1961 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1964 static int
1965 _steal_list_append(PyObject *lst, PyObject *stolen)
1967 /* Append stolen and then decrement its reference count */
1968 int rval = PyList_Append(lst, stolen);
1969 Py_DECREF(stolen);
1970 return rval;
1973 static int
1974 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1976 /* Encode Python object obj to a JSON term, rval is a PyList */
1977 PyObject *newobj;
1978 int rv;
1980 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1981 PyObject *cstr = _encoded_const(obj);
1982 if (cstr == NULL)
1983 return -1;
1984 return _steal_list_append(rval, cstr);
1986 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1988 PyObject *encoded = encoder_encode_string(s, obj);
1989 if (encoded == NULL)
1990 return -1;
1991 return _steal_list_append(rval, encoded);
1993 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1994 PyObject *encoded = PyObject_Str(obj);
1995 if (encoded == NULL)
1996 return -1;
1997 return _steal_list_append(rval, encoded);
1999 else if (PyFloat_Check(obj)) {
2000 PyObject *encoded = encoder_encode_float(s, obj);
2001 if (encoded == NULL)
2002 return -1;
2003 return _steal_list_append(rval, encoded);
2005 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
2006 return encoder_listencode_list(s, rval, obj, indent_level);
2008 else if (PyDict_Check(obj)) {
2009 return encoder_listencode_dict(s, rval, obj, indent_level);
2011 else {
2012 PyObject *ident = NULL;
2013 if (s->markers != Py_None) {
2014 int has_key;
2015 ident = PyLong_FromVoidPtr(obj);
2016 if (ident == NULL)
2017 return -1;
2018 has_key = PyDict_Contains(s->markers, ident);
2019 if (has_key) {
2020 if (has_key != -1)
2021 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2022 Py_DECREF(ident);
2023 return -1;
2025 if (PyDict_SetItem(s->markers, ident, obj)) {
2026 Py_DECREF(ident);
2027 return -1;
2030 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2031 if (newobj == NULL) {
2032 Py_XDECREF(ident);
2033 return -1;
2035 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2036 Py_DECREF(newobj);
2037 if (rv) {
2038 Py_XDECREF(ident);
2039 return -1;
2041 if (ident != NULL) {
2042 if (PyDict_DelItem(s->markers, ident)) {
2043 Py_XDECREF(ident);
2044 return -1;
2046 Py_XDECREF(ident);
2048 return rv;
2052 static int
2053 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2055 /* Encode Python dict dct a JSON term, rval is a PyList */
2056 static PyObject *open_dict = NULL;
2057 static PyObject *close_dict = NULL;
2058 static PyObject *empty_dict = NULL;
2059 PyObject *kstr = NULL;
2060 PyObject *ident = NULL;
2061 PyObject *key, *value;
2062 Py_ssize_t pos;
2063 int skipkeys;
2064 Py_ssize_t idx;
2066 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2067 open_dict = PyString_InternFromString("{");
2068 close_dict = PyString_InternFromString("}");
2069 empty_dict = PyString_InternFromString("{}");
2070 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2071 return -1;
2073 if (PyDict_Size(dct) == 0)
2074 return PyList_Append(rval, empty_dict);
2076 if (s->markers != Py_None) {
2077 int has_key;
2078 ident = PyLong_FromVoidPtr(dct);
2079 if (ident == NULL)
2080 goto bail;
2081 has_key = PyDict_Contains(s->markers, ident);
2082 if (has_key) {
2083 if (has_key != -1)
2084 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2085 goto bail;
2087 if (PyDict_SetItem(s->markers, ident, dct)) {
2088 goto bail;
2092 if (PyList_Append(rval, open_dict))
2093 goto bail;
2095 if (s->indent != Py_None) {
2096 /* TODO: DOES NOT RUN */
2097 indent_level += 1;
2099 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2100 separator = _item_separator + newline_indent
2101 buf += newline_indent
2105 /* TODO: C speedup not implemented for sort_keys */
2107 pos = 0;
2108 skipkeys = PyObject_IsTrue(s->skipkeys);
2109 idx = 0;
2110 while (PyDict_Next(dct, &pos, &key, &value)) {
2111 PyObject *encoded;
2113 if (PyString_Check(key) || PyUnicode_Check(key)) {
2114 Py_INCREF(key);
2115 kstr = key;
2117 else if (PyFloat_Check(key)) {
2118 kstr = encoder_encode_float(s, key);
2119 if (kstr == NULL)
2120 goto bail;
2122 else if (PyInt_Check(key) || PyLong_Check(key)) {
2123 kstr = PyObject_Str(key);
2124 if (kstr == NULL)
2125 goto bail;
2127 else if (key == Py_True || key == Py_False || key == Py_None) {
2128 kstr = _encoded_const(key);
2129 if (kstr == NULL)
2130 goto bail;
2132 else if (skipkeys) {
2133 continue;
2135 else {
2136 /* TODO: include repr of key */
2137 PyErr_SetString(PyExc_ValueError, "keys must be a string");
2138 goto bail;
2141 if (idx) {
2142 if (PyList_Append(rval, s->item_separator))
2143 goto bail;
2146 encoded = encoder_encode_string(s, kstr);
2147 Py_CLEAR(kstr);
2148 if (encoded == NULL)
2149 goto bail;
2150 if (PyList_Append(rval, encoded)) {
2151 Py_DECREF(encoded);
2152 goto bail;
2154 Py_DECREF(encoded);
2155 if (PyList_Append(rval, s->key_separator))
2156 goto bail;
2157 if (encoder_listencode_obj(s, rval, value, indent_level))
2158 goto bail;
2159 idx += 1;
2161 if (ident != NULL) {
2162 if (PyDict_DelItem(s->markers, ident))
2163 goto bail;
2164 Py_CLEAR(ident);
2166 if (s->indent != Py_None) {
2167 /* TODO: DOES NOT RUN */
2168 indent_level -= 1;
2170 yield '\n' + (' ' * (_indent * _current_indent_level))
2173 if (PyList_Append(rval, close_dict))
2174 goto bail;
2175 return 0;
2177 bail:
2178 Py_XDECREF(kstr);
2179 Py_XDECREF(ident);
2180 return -1;
2184 static int
2185 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2187 /* Encode Python list seq to a JSON term, rval is a PyList */
2188 static PyObject *open_array = NULL;
2189 static PyObject *close_array = NULL;
2190 static PyObject *empty_array = NULL;
2191 PyObject *ident = NULL;
2192 PyObject *s_fast = NULL;
2193 Py_ssize_t num_items;
2194 PyObject **seq_items;
2195 Py_ssize_t i;
2197 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2198 open_array = PyString_InternFromString("[");
2199 close_array = PyString_InternFromString("]");
2200 empty_array = PyString_InternFromString("[]");
2201 if (open_array == NULL || close_array == NULL || empty_array == NULL)
2202 return -1;
2204 ident = NULL;
2205 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2206 if (s_fast == NULL)
2207 return -1;
2208 num_items = PySequence_Fast_GET_SIZE(s_fast);
2209 if (num_items == 0) {
2210 Py_DECREF(s_fast);
2211 return PyList_Append(rval, empty_array);
2214 if (s->markers != Py_None) {
2215 int has_key;
2216 ident = PyLong_FromVoidPtr(seq);
2217 if (ident == NULL)
2218 goto bail;
2219 has_key = PyDict_Contains(s->markers, ident);
2220 if (has_key) {
2221 if (has_key != -1)
2222 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2223 goto bail;
2225 if (PyDict_SetItem(s->markers, ident, seq)) {
2226 goto bail;
2230 seq_items = PySequence_Fast_ITEMS(s_fast);
2231 if (PyList_Append(rval, open_array))
2232 goto bail;
2233 if (s->indent != Py_None) {
2234 /* TODO: DOES NOT RUN */
2235 indent_level += 1;
2237 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2238 separator = _item_separator + newline_indent
2239 buf += newline_indent
2242 for (i = 0; i < num_items; i++) {
2243 PyObject *obj = seq_items[i];
2244 if (i) {
2245 if (PyList_Append(rval, s->item_separator))
2246 goto bail;
2248 if (encoder_listencode_obj(s, rval, obj, indent_level))
2249 goto bail;
2251 if (ident != NULL) {
2252 if (PyDict_DelItem(s->markers, ident))
2253 goto bail;
2254 Py_CLEAR(ident);
2256 if (s->indent != Py_None) {
2257 /* TODO: DOES NOT RUN */
2258 indent_level -= 1;
2260 yield '\n' + (' ' * (_indent * _current_indent_level))
2263 if (PyList_Append(rval, close_array))
2264 goto bail;
2265 Py_DECREF(s_fast);
2266 return 0;
2268 bail:
2269 Py_XDECREF(ident);
2270 Py_DECREF(s_fast);
2271 return -1;
2274 static void
2275 encoder_dealloc(PyObject *self)
2277 /* Deallocate Encoder */
2278 encoder_clear(self);
2279 Py_TYPE(self)->tp_free(self);
2282 static int
2283 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2285 PyEncoderObject *s;
2286 assert(PyEncoder_Check(self));
2287 s = (PyEncoderObject *)self;
2288 Py_VISIT(s->markers);
2289 Py_VISIT(s->defaultfn);
2290 Py_VISIT(s->encoder);
2291 Py_VISIT(s->indent);
2292 Py_VISIT(s->key_separator);
2293 Py_VISIT(s->item_separator);
2294 Py_VISIT(s->sort_keys);
2295 Py_VISIT(s->skipkeys);
2296 return 0;
2299 static int
2300 encoder_clear(PyObject *self)
2302 /* Deallocate Encoder */
2303 PyEncoderObject *s;
2304 assert(PyEncoder_Check(self));
2305 s = (PyEncoderObject *)self;
2306 Py_CLEAR(s->markers);
2307 Py_CLEAR(s->defaultfn);
2308 Py_CLEAR(s->encoder);
2309 Py_CLEAR(s->indent);
2310 Py_CLEAR(s->key_separator);
2311 Py_CLEAR(s->item_separator);
2312 Py_CLEAR(s->sort_keys);
2313 Py_CLEAR(s->skipkeys);
2314 return 0;
2317 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2319 static
2320 PyTypeObject PyEncoderType = {
2321 PyObject_HEAD_INIT(NULL)
2322 0, /* tp_internal */
2323 "_json.Encoder", /* tp_name */
2324 sizeof(PyEncoderObject), /* tp_basicsize */
2325 0, /* tp_itemsize */
2326 encoder_dealloc, /* tp_dealloc */
2327 0, /* tp_print */
2328 0, /* tp_getattr */
2329 0, /* tp_setattr */
2330 0, /* tp_compare */
2331 0, /* tp_repr */
2332 0, /* tp_as_number */
2333 0, /* tp_as_sequence */
2334 0, /* tp_as_mapping */
2335 0, /* tp_hash */
2336 encoder_call, /* tp_call */
2337 0, /* tp_str */
2338 0, /* tp_getattro */
2339 0, /* tp_setattro */
2340 0, /* tp_as_buffer */
2341 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2342 encoder_doc, /* tp_doc */
2343 encoder_traverse, /* tp_traverse */
2344 encoder_clear, /* tp_clear */
2345 0, /* tp_richcompare */
2346 0, /* tp_weaklistoffset */
2347 0, /* tp_iter */
2348 0, /* tp_iternext */
2349 0, /* tp_methods */
2350 encoder_members, /* tp_members */
2351 0, /* tp_getset */
2352 0, /* tp_base */
2353 0, /* tp_dict */
2354 0, /* tp_descr_get */
2355 0, /* tp_descr_set */
2356 0, /* tp_dictoffset */
2357 encoder_init, /* tp_init */
2358 0, /* tp_alloc */
2359 encoder_new, /* tp_new */
2360 0, /* tp_free */
2363 static PyMethodDef speedups_methods[] = {
2364 {"encode_basestring_ascii",
2365 (PyCFunction)py_encode_basestring_ascii,
2366 METH_O,
2367 pydoc_encode_basestring_ascii},
2368 {"scanstring",
2369 (PyCFunction)py_scanstring,
2370 METH_VARARGS,
2371 pydoc_scanstring},
2372 {NULL, NULL, 0, NULL}
2375 PyDoc_STRVAR(module_doc,
2376 "json speedups\n");
2378 void
2379 init_json(void)
2381 PyObject *m;
2382 PyScannerType.tp_new = PyType_GenericNew;
2383 if (PyType_Ready(&PyScannerType) < 0)
2384 return;
2385 PyEncoderType.tp_new = PyType_GenericNew;
2386 if (PyType_Ready(&PyEncoderType) < 0)
2387 return;
2388 m = Py_InitModule3("_json", speedups_methods, module_doc);
2389 Py_INCREF((PyObject*)&PyScannerType);
2390 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2391 Py_INCREF((PyObject*)&PyEncoderType);
2392 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);