Merged revisions 77838 via svnmerge from
[python/dscho.git] / Modules / _json.c
blobe55543906f29b43421c40d3e91aa04082eea5d3e
1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
23 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
24 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
25 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
26 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
28 static PyTypeObject PyScannerType;
29 static PyTypeObject PyEncoderType;
31 typedef struct _PyScannerObject {
32 PyObject_HEAD
33 PyObject *strict;
34 PyObject *object_hook;
35 PyObject *object_pairs_hook;
36 PyObject *parse_float;
37 PyObject *parse_int;
38 PyObject *parse_constant;
39 } PyScannerObject;
41 static PyMemberDef scanner_members[] = {
42 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
43 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
44 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
45 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
46 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
47 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
48 {NULL}
51 typedef struct _PyEncoderObject {
52 PyObject_HEAD
53 PyObject *markers;
54 PyObject *defaultfn;
55 PyObject *encoder;
56 PyObject *indent;
57 PyObject *key_separator;
58 PyObject *item_separator;
59 PyObject *sort_keys;
60 PyObject *skipkeys;
61 int fast_encode;
62 int allow_nan;
63 } PyEncoderObject;
65 static PyMemberDef encoder_members[] = {
66 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
67 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
68 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
69 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
70 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
71 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
72 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
73 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
74 {NULL}
77 static PyObject *
78 ascii_escape_unicode(PyObject *pystr);
79 static PyObject *
80 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
81 void init_json(void);
82 static PyObject *
83 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
84 static PyObject *
85 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
86 static PyObject *
87 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
88 static int
89 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
90 static void
91 scanner_dealloc(PyObject *self);
92 static int
93 scanner_clear(PyObject *self);
94 static PyObject *
95 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
96 static int
97 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
98 static void
99 encoder_dealloc(PyObject *self);
100 static int
101 encoder_clear(PyObject *self);
102 static int
103 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
104 static int
105 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
106 static int
107 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
108 static PyObject *
109 _encoded_const(PyObject *obj);
110 static void
111 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
112 static PyObject *
113 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
114 static int
115 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
116 static PyObject *
117 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
118 static PyObject *
119 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
121 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
122 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
124 #define MIN_EXPANSION 6
125 #ifdef Py_UNICODE_WIDE
126 #define MAX_EXPANSION (2 * MIN_EXPANSION)
127 #else
128 #define MAX_EXPANSION MIN_EXPANSION
129 #endif
131 static int
132 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
134 /* PyObject to Py_ssize_t converter */
135 *size_ptr = PyLong_AsSsize_t(o);
136 if (*size_ptr == -1 && PyErr_Occurred())
137 return 0;
138 return 1;
141 static PyObject *
142 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
144 /* Py_ssize_t to PyObject converter */
145 return PyLong_FromSsize_t(*size_ptr);
148 static Py_ssize_t
149 ascii_escape_unichar(Py_UNICODE c, Py_UNICODE *output, Py_ssize_t chars)
151 /* Escape unicode code point c to ASCII escape sequences
152 in char *output. output must have at least 12 bytes unused to
153 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
154 output[chars++] = '\\';
155 switch (c) {
156 case '\\': output[chars++] = c; break;
157 case '"': output[chars++] = c; break;
158 case '\b': output[chars++] = 'b'; break;
159 case '\f': output[chars++] = 'f'; break;
160 case '\n': output[chars++] = 'n'; break;
161 case '\r': output[chars++] = 'r'; break;
162 case '\t': output[chars++] = 't'; break;
163 default:
164 #ifdef Py_UNICODE_WIDE
165 if (c >= 0x10000) {
166 /* UTF-16 surrogate pair */
167 Py_UNICODE v = c - 0x10000;
168 c = 0xd800 | ((v >> 10) & 0x3ff);
169 output[chars++] = 'u';
170 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
171 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
172 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
173 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
174 c = 0xdc00 | (v & 0x3ff);
175 output[chars++] = '\\';
177 #endif
178 output[chars++] = 'u';
179 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
180 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
181 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
182 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
184 return chars;
187 static PyObject *
188 ascii_escape_unicode(PyObject *pystr)
190 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
191 Py_ssize_t i;
192 Py_ssize_t input_chars;
193 Py_ssize_t output_size;
194 Py_ssize_t max_output_size;
195 Py_ssize_t chars;
196 PyObject *rval;
197 Py_UNICODE *output;
198 Py_UNICODE *input_unicode;
200 input_chars = PyUnicode_GET_SIZE(pystr);
201 input_unicode = PyUnicode_AS_UNICODE(pystr);
203 /* One char input can be up to 6 chars output, estimate 4 of these */
204 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
205 max_output_size = 2 + (input_chars * MAX_EXPANSION);
206 rval = PyUnicode_FromStringAndSize(NULL, output_size);
207 if (rval == NULL) {
208 return NULL;
210 output = PyUnicode_AS_UNICODE(rval);
211 chars = 0;
212 output[chars++] = '"';
213 for (i = 0; i < input_chars; i++) {
214 Py_UNICODE c = input_unicode[i];
215 if (S_CHAR(c)) {
216 output[chars++] = c;
218 else {
219 chars = ascii_escape_unichar(c, output, chars);
221 if (output_size - chars < (1 + MAX_EXPANSION)) {
222 /* There's more than four, so let's resize by a lot */
223 Py_ssize_t new_output_size = output_size * 2;
224 /* This is an upper bound */
225 if (new_output_size > max_output_size) {
226 new_output_size = max_output_size;
228 /* Make sure that the output size changed before resizing */
229 if (new_output_size != output_size) {
230 output_size = new_output_size;
231 if (PyUnicode_Resize(&rval, output_size) == -1) {
232 return NULL;
234 output = PyUnicode_AS_UNICODE(rval);
238 output[chars++] = '"';
239 if (PyUnicode_Resize(&rval, chars) == -1) {
240 return NULL;
242 return rval;
245 static void
246 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
248 /* Use the Python function json.decoder.errmsg to raise a nice
249 looking ValueError exception */
250 static PyObject *errmsg_fn = NULL;
251 PyObject *pymsg;
252 if (errmsg_fn == NULL) {
253 PyObject *decoder = PyImport_ImportModule("json.decoder");
254 if (decoder == NULL)
255 return;
256 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
257 Py_DECREF(decoder);
258 if (errmsg_fn == NULL)
259 return;
261 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
262 if (pymsg) {
263 PyErr_SetObject(PyExc_ValueError, pymsg);
264 Py_DECREF(pymsg);
268 static PyObject *
269 join_list_unicode(PyObject *lst)
271 /* return u''.join(lst) */
272 static PyObject *sep = NULL;
273 if (sep == NULL) {
274 sep = PyUnicode_FromStringAndSize("", 0);
275 if (sep == NULL)
276 return NULL;
278 return PyUnicode_Join(sep, lst);
281 static PyObject *
282 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
283 /* return (rval, idx) tuple, stealing reference to rval */
284 PyObject *tpl;
285 PyObject *pyidx;
287 steal a reference to rval, returns (rval, idx)
289 if (rval == NULL) {
290 return NULL;
292 pyidx = PyLong_FromSsize_t(idx);
293 if (pyidx == NULL) {
294 Py_DECREF(rval);
295 return NULL;
297 tpl = PyTuple_New(2);
298 if (tpl == NULL) {
299 Py_DECREF(pyidx);
300 Py_DECREF(rval);
301 return NULL;
303 PyTuple_SET_ITEM(tpl, 0, rval);
304 PyTuple_SET_ITEM(tpl, 1, pyidx);
305 return tpl;
308 static PyObject *
309 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
311 /* Read the JSON string from PyUnicode pystr.
312 end is the index of the first character after the quote.
313 if strict is zero then literal control characters are allowed
314 *next_end_ptr is a return-by-reference index of the character
315 after the end quote
317 Return value is a new PyUnicode
319 PyObject *rval;
320 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
321 Py_ssize_t begin = end - 1;
322 Py_ssize_t next = begin;
323 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
324 PyObject *chunks = PyList_New(0);
325 if (chunks == NULL) {
326 goto bail;
328 if (end < 0 || len <= end) {
329 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
330 goto bail;
332 while (1) {
333 /* Find the end of the string or the next escape */
334 Py_UNICODE c = 0;
335 PyObject *chunk = NULL;
336 for (next = end; next < len; next++) {
337 c = buf[next];
338 if (c == '"' || c == '\\') {
339 break;
341 else if (strict && c <= 0x1f) {
342 raise_errmsg("Invalid control character at", pystr, next);
343 goto bail;
346 if (!(c == '"' || c == '\\')) {
347 raise_errmsg("Unterminated string starting at", pystr, begin);
348 goto bail;
350 /* Pick up this chunk if it's not zero length */
351 if (next != end) {
352 chunk = PyUnicode_FromUnicode(&buf[end], next - end);
353 if (chunk == NULL) {
354 goto bail;
356 if (PyList_Append(chunks, chunk)) {
357 Py_DECREF(chunk);
358 goto bail;
360 Py_DECREF(chunk);
362 next++;
363 if (c == '"') {
364 end = next;
365 break;
367 if (next == len) {
368 raise_errmsg("Unterminated string starting at", pystr, begin);
369 goto bail;
371 c = buf[next];
372 if (c != 'u') {
373 /* Non-unicode backslash escapes */
374 end = next + 1;
375 switch (c) {
376 case '"': break;
377 case '\\': break;
378 case '/': break;
379 case 'b': c = '\b'; break;
380 case 'f': c = '\f'; break;
381 case 'n': c = '\n'; break;
382 case 'r': c = '\r'; break;
383 case 't': c = '\t'; break;
384 default: c = 0;
386 if (c == 0) {
387 raise_errmsg("Invalid \\escape", pystr, end - 2);
388 goto bail;
391 else {
392 c = 0;
393 next++;
394 end = next + 4;
395 if (end >= len) {
396 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
397 goto bail;
399 /* Decode 4 hex digits */
400 for (; next < end; next++) {
401 Py_UNICODE digit = buf[next];
402 c <<= 4;
403 switch (digit) {
404 case '0': case '1': case '2': case '3': case '4':
405 case '5': case '6': case '7': case '8': case '9':
406 c |= (digit - '0'); break;
407 case 'a': case 'b': case 'c': case 'd': case 'e':
408 case 'f':
409 c |= (digit - 'a' + 10); break;
410 case 'A': case 'B': case 'C': case 'D': case 'E':
411 case 'F':
412 c |= (digit - 'A' + 10); break;
413 default:
414 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
415 goto bail;
418 #ifdef Py_UNICODE_WIDE
419 /* Surrogate pair */
420 if ((c & 0xfc00) == 0xd800) {
421 Py_UNICODE c2 = 0;
422 if (end + 6 >= len) {
423 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
424 goto bail;
426 if (buf[next++] != '\\' || buf[next++] != 'u') {
427 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
428 goto bail;
430 end += 6;
431 /* Decode 4 hex digits */
432 for (; next < end; next++) {
433 c2 <<= 4;
434 Py_UNICODE digit = buf[next];
435 switch (digit) {
436 case '0': case '1': case '2': case '3': case '4':
437 case '5': case '6': case '7': case '8': case '9':
438 c2 |= (digit - '0'); break;
439 case 'a': case 'b': case 'c': case 'd': case 'e':
440 case 'f':
441 c2 |= (digit - 'a' + 10); break;
442 case 'A': case 'B': case 'C': case 'D': case 'E':
443 case 'F':
444 c2 |= (digit - 'A' + 10); break;
445 default:
446 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
447 goto bail;
450 if ((c2 & 0xfc00) != 0xdc00) {
451 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
452 goto bail;
454 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
456 else if ((c & 0xfc00) == 0xdc00) {
457 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
458 goto bail;
460 #endif
462 chunk = PyUnicode_FromUnicode(&c, 1);
463 if (chunk == NULL) {
464 goto bail;
466 if (PyList_Append(chunks, chunk)) {
467 Py_DECREF(chunk);
468 goto bail;
470 Py_DECREF(chunk);
473 rval = join_list_unicode(chunks);
474 if (rval == NULL) {
475 goto bail;
477 Py_DECREF(chunks);
478 *next_end_ptr = end;
479 return rval;
480 bail:
481 *next_end_ptr = -1;
482 Py_XDECREF(chunks);
483 return NULL;
486 PyDoc_STRVAR(pydoc_scanstring,
487 "scanstring(basestring, end, strict=True) -> (bytes, end)\n"
488 "\n"
489 "Scan the string s for a JSON string. End is the index of the\n"
490 "character in s after the quote that started the JSON string.\n"
491 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
492 "on attempt to decode an invalid string. If strict is False then literal\n"
493 "control characters are allowed in the string.\n"
494 "\n"
495 "Returns a tuple of the decoded string and the index of the character in s\n"
496 "after the end quote."
499 static PyObject *
500 py_scanstring(PyObject* self UNUSED, PyObject *args)
502 PyObject *pystr;
503 PyObject *rval;
504 Py_ssize_t end;
505 Py_ssize_t next_end = -1;
506 int strict = 1;
507 if (!PyArg_ParseTuple(args, "OO&|i:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &strict)) {
508 return NULL;
510 if (PyUnicode_Check(pystr)) {
511 rval = scanstring_unicode(pystr, end, strict, &next_end);
513 else {
514 PyErr_Format(PyExc_TypeError,
515 "first argument must be a string or bytes, not %.80s",
516 Py_TYPE(pystr)->tp_name);
517 return NULL;
519 return _build_rval_index_tuple(rval, next_end);
522 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
523 "encode_basestring_ascii(basestring) -> bytes\n"
524 "\n"
525 "Return an ASCII-only JSON representation of a Python string"
528 static PyObject *
529 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
531 PyObject *rval;
532 /* Return an ASCII-only JSON representation of a Python string */
533 /* METH_O */
534 if (PyUnicode_Check(pystr)) {
535 rval = ascii_escape_unicode(pystr);
537 else {
538 PyErr_Format(PyExc_TypeError,
539 "first argument must be a string, not %.80s",
540 Py_TYPE(pystr)->tp_name);
541 return NULL;
543 return rval;
546 static void
547 scanner_dealloc(PyObject *self)
549 /* Deallocate scanner object */
550 scanner_clear(self);
551 Py_TYPE(self)->tp_free(self);
554 static int
555 scanner_traverse(PyObject *self, visitproc visit, void *arg)
557 PyScannerObject *s;
558 assert(PyScanner_Check(self));
559 s = (PyScannerObject *)self;
560 Py_VISIT(s->strict);
561 Py_VISIT(s->object_hook);
562 Py_VISIT(s->object_pairs_hook);
563 Py_VISIT(s->parse_float);
564 Py_VISIT(s->parse_int);
565 Py_VISIT(s->parse_constant);
566 return 0;
569 static int
570 scanner_clear(PyObject *self)
572 PyScannerObject *s;
573 assert(PyScanner_Check(self));
574 s = (PyScannerObject *)self;
575 Py_CLEAR(s->strict);
576 Py_CLEAR(s->object_hook);
577 Py_CLEAR(s->object_pairs_hook);
578 Py_CLEAR(s->parse_float);
579 Py_CLEAR(s->parse_int);
580 Py_CLEAR(s->parse_constant);
581 return 0;
584 static PyObject *
585 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
586 /* Read a JSON object from PyUnicode pystr.
587 idx is the index of the first character after the opening curly brace.
588 *next_idx_ptr is a return-by-reference index to the first character after
589 the closing curly brace.
591 Returns a new PyObject (usually a dict, but object_hook can change that)
593 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
594 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
595 PyObject *val = NULL;
596 PyObject *rval = PyList_New(0);
597 PyObject *key = NULL;
598 int strict = PyObject_IsTrue(s->strict);
599 Py_ssize_t next_idx;
600 if (rval == NULL)
601 return NULL;
603 /* skip whitespace after { */
604 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
606 /* only loop if the object is non-empty */
607 if (idx <= end_idx && str[idx] != '}') {
608 while (idx <= end_idx) {
609 /* read key */
610 if (str[idx] != '"') {
611 raise_errmsg("Expecting property name", pystr, idx);
612 goto bail;
614 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
615 if (key == NULL)
616 goto bail;
617 idx = next_idx;
619 /* skip whitespace between key and : delimiter, read :, skip whitespace */
620 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
621 if (idx > end_idx || str[idx] != ':') {
622 raise_errmsg("Expecting : delimiter", pystr, idx);
623 goto bail;
625 idx++;
626 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
628 /* read any JSON term */
629 val = scan_once_unicode(s, pystr, idx, &next_idx);
630 if (val == NULL)
631 goto bail;
634 PyObject *tuple = PyTuple_Pack(2, key, val);
635 if (tuple == NULL)
636 goto bail;
637 if (PyList_Append(rval, tuple) == -1) {
638 Py_DECREF(tuple);
639 goto bail;
641 Py_DECREF(tuple);
644 Py_CLEAR(key);
645 Py_CLEAR(val);
646 idx = next_idx;
648 /* skip whitespace before } or , */
649 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
651 /* bail if the object is closed or we didn't get the , delimiter */
652 if (idx > end_idx) break;
653 if (str[idx] == '}') {
654 break;
656 else if (str[idx] != ',') {
657 raise_errmsg("Expecting , delimiter", pystr, idx);
658 goto bail;
660 idx++;
662 /* skip whitespace after , delimiter */
663 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
667 /* verify that idx < end_idx, str[idx] should be '}' */
668 if (idx > end_idx || str[idx] != '}') {
669 raise_errmsg("Expecting object", pystr, end_idx);
670 goto bail;
673 *next_idx_ptr = idx + 1;
675 if (s->object_pairs_hook != Py_None) {
676 val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
677 if (val == NULL)
678 goto bail;
679 Py_DECREF(rval);
680 return val;
683 val = PyDict_New();
684 if (val == NULL)
685 goto bail;
686 if (PyDict_MergeFromSeq2(val, rval, 1) == -1)
687 goto bail;
688 Py_DECREF(rval);
689 rval = val;
691 /* if object_hook is not None: rval = object_hook(rval) */
692 if (s->object_hook != Py_None) {
693 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
694 if (val == NULL)
695 goto bail;
696 Py_DECREF(rval);
697 rval = val;
698 val = NULL;
700 return rval;
701 bail:
702 Py_XDECREF(key);
703 Py_XDECREF(val);
704 Py_DECREF(rval);
705 return NULL;
708 static PyObject *
709 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
710 /* Read a JSON array from PyString pystr.
711 idx is the index of the first character after the opening brace.
712 *next_idx_ptr is a return-by-reference index to the first character after
713 the closing brace.
715 Returns a new PyList
717 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
718 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
719 PyObject *val = NULL;
720 PyObject *rval = PyList_New(0);
721 Py_ssize_t next_idx;
722 if (rval == NULL)
723 return NULL;
725 /* skip whitespace after [ */
726 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
728 /* only loop if the array is non-empty */
729 if (idx <= end_idx && str[idx] != ']') {
730 while (idx <= end_idx) {
732 /* read any JSON term */
733 val = scan_once_unicode(s, pystr, idx, &next_idx);
734 if (val == NULL)
735 goto bail;
737 if (PyList_Append(rval, val) == -1)
738 goto bail;
740 Py_CLEAR(val);
741 idx = next_idx;
743 /* skip whitespace between term and , */
744 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
746 /* bail if the array is closed or we didn't get the , delimiter */
747 if (idx > end_idx) break;
748 if (str[idx] == ']') {
749 break;
751 else if (str[idx] != ',') {
752 raise_errmsg("Expecting , delimiter", pystr, idx);
753 goto bail;
755 idx++;
757 /* skip whitespace after , */
758 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
762 /* verify that idx < end_idx, str[idx] should be ']' */
763 if (idx > end_idx || str[idx] != ']') {
764 raise_errmsg("Expecting object", pystr, end_idx);
765 goto bail;
767 *next_idx_ptr = idx + 1;
768 return rval;
769 bail:
770 Py_XDECREF(val);
771 Py_DECREF(rval);
772 return NULL;
775 static PyObject *
776 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
777 /* Read a JSON constant from PyString pystr.
778 constant is the constant string that was found
779 ("NaN", "Infinity", "-Infinity").
780 idx is the index of the first character of the constant
781 *next_idx_ptr is a return-by-reference index to the first character after
782 the constant.
784 Returns the result of parse_constant
786 PyObject *cstr;
787 PyObject *rval;
788 /* constant is "NaN", "Infinity", or "-Infinity" */
789 cstr = PyUnicode_InternFromString(constant);
790 if (cstr == NULL)
791 return NULL;
793 /* rval = parse_constant(constant) */
794 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
795 idx += PyUnicode_GET_SIZE(cstr);
796 Py_DECREF(cstr);
797 *next_idx_ptr = idx;
798 return rval;
801 static PyObject *
802 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
803 /* Read a JSON number from PyUnicode pystr.
804 idx is the index of the first character of the number
805 *next_idx_ptr is a return-by-reference index to the first character after
806 the number.
808 Returns a new PyObject representation of that number:
809 PyInt, PyLong, or PyFloat.
810 May return other types if parse_int or parse_float are set
812 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
813 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
814 Py_ssize_t idx = start;
815 int is_float = 0;
816 PyObject *rval;
817 PyObject *numstr;
819 /* read a sign if it's there, make sure it's not the end of the string */
820 if (str[idx] == '-') {
821 idx++;
822 if (idx > end_idx) {
823 PyErr_SetNone(PyExc_StopIteration);
824 return NULL;
828 /* read as many integer digits as we find as long as it doesn't start with 0 */
829 if (str[idx] >= '1' && str[idx] <= '9') {
830 idx++;
831 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
833 /* if it starts with 0 we only expect one integer digit */
834 else if (str[idx] == '0') {
835 idx++;
837 /* no integer digits, error */
838 else {
839 PyErr_SetNone(PyExc_StopIteration);
840 return NULL;
843 /* if the next char is '.' followed by a digit then read all float digits */
844 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
845 is_float = 1;
846 idx += 2;
847 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
850 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
851 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
852 Py_ssize_t e_start = idx;
853 idx++;
855 /* read an exponent sign if present */
856 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
858 /* read all digits */
859 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
861 /* if we got a digit, then parse as float. if not, backtrack */
862 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
863 is_float = 1;
865 else {
866 idx = e_start;
870 /* copy the section we determined to be a number */
871 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
872 if (numstr == NULL)
873 return NULL;
874 if (is_float) {
875 /* parse as a float using a fast path if available, otherwise call user defined method */
876 if (s->parse_float != (PyObject *)&PyFloat_Type) {
877 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
879 else {
880 rval = PyFloat_FromString(numstr);
883 else {
884 /* no fast path for unicode -> int, just call */
885 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
887 Py_DECREF(numstr);
888 *next_idx_ptr = idx;
889 return rval;
892 static PyObject *
893 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
895 /* Read one JSON term (of any kind) from PyUnicode pystr.
896 idx is the index of the first character of the term
897 *next_idx_ptr is a return-by-reference index to the first character after
898 the number.
900 Returns a new PyObject representation of the term.
902 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
903 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
904 if (idx >= length) {
905 PyErr_SetNone(PyExc_StopIteration);
906 return NULL;
908 switch (str[idx]) {
909 case '"':
910 /* string */
911 return scanstring_unicode(pystr, idx + 1,
912 PyObject_IsTrue(s->strict),
913 next_idx_ptr);
914 case '{':
915 /* object */
916 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
917 case '[':
918 /* array */
919 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
920 case 'n':
921 /* null */
922 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
923 Py_INCREF(Py_None);
924 *next_idx_ptr = idx + 4;
925 return Py_None;
927 break;
928 case 't':
929 /* true */
930 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
931 Py_INCREF(Py_True);
932 *next_idx_ptr = idx + 4;
933 return Py_True;
935 break;
936 case 'f':
937 /* false */
938 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
939 Py_INCREF(Py_False);
940 *next_idx_ptr = idx + 5;
941 return Py_False;
943 break;
944 case 'N':
945 /* NaN */
946 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
947 return _parse_constant(s, "NaN", idx, next_idx_ptr);
949 break;
950 case 'I':
951 /* Infinity */
952 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
953 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
955 break;
956 case '-':
957 /* -Infinity */
958 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
959 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
961 break;
963 /* Didn't find a string, object, array, or named constant. Look for a number. */
964 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
967 static PyObject *
968 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
970 /* Python callable interface to scan_once_{str,unicode} */
971 PyObject *pystr;
972 PyObject *rval;
973 Py_ssize_t idx;
974 Py_ssize_t next_idx = -1;
975 static char *kwlist[] = {"string", "idx", NULL};
976 PyScannerObject *s;
977 assert(PyScanner_Check(self));
978 s = (PyScannerObject *)self;
979 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
980 return NULL;
982 if (PyUnicode_Check(pystr)) {
983 rval = scan_once_unicode(s, pystr, idx, &next_idx);
985 else {
986 PyErr_Format(PyExc_TypeError,
987 "first argument must be a string, not %.80s",
988 Py_TYPE(pystr)->tp_name);
989 return NULL;
991 return _build_rval_index_tuple(rval, next_idx);
994 static PyObject *
995 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
997 PyScannerObject *s;
998 s = (PyScannerObject *)type->tp_alloc(type, 0);
999 if (s != NULL) {
1000 s->strict = NULL;
1001 s->object_hook = NULL;
1002 s->object_pairs_hook = NULL;
1003 s->parse_float = NULL;
1004 s->parse_int = NULL;
1005 s->parse_constant = NULL;
1007 return (PyObject *)s;
1010 static int
1011 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1013 /* Initialize Scanner object */
1014 PyObject *ctx;
1015 static char *kwlist[] = {"context", NULL};
1016 PyScannerObject *s;
1018 assert(PyScanner_Check(self));
1019 s = (PyScannerObject *)self;
1021 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1022 return -1;
1024 /* All of these will fail "gracefully" so we don't need to verify them */
1025 s->strict = PyObject_GetAttrString(ctx, "strict");
1026 if (s->strict == NULL)
1027 goto bail;
1028 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1029 if (s->object_hook == NULL)
1030 goto bail;
1031 s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1032 if (s->object_pairs_hook == NULL)
1033 goto bail;
1034 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1035 if (s->parse_float == NULL)
1036 goto bail;
1037 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1038 if (s->parse_int == NULL)
1039 goto bail;
1040 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1041 if (s->parse_constant == NULL)
1042 goto bail;
1044 return 0;
1046 bail:
1047 Py_CLEAR(s->strict);
1048 Py_CLEAR(s->object_hook);
1049 Py_CLEAR(s->object_pairs_hook);
1050 Py_CLEAR(s->parse_float);
1051 Py_CLEAR(s->parse_int);
1052 Py_CLEAR(s->parse_constant);
1053 return -1;
1056 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1058 static
1059 PyTypeObject PyScannerType = {
1060 PyVarObject_HEAD_INIT(NULL, 0)
1061 "_json.Scanner", /* tp_name */
1062 sizeof(PyScannerObject), /* tp_basicsize */
1063 0, /* tp_itemsize */
1064 scanner_dealloc, /* tp_dealloc */
1065 0, /* tp_print */
1066 0, /* tp_getattr */
1067 0, /* tp_setattr */
1068 0, /* tp_compare */
1069 0, /* tp_repr */
1070 0, /* tp_as_number */
1071 0, /* tp_as_sequence */
1072 0, /* tp_as_mapping */
1073 0, /* tp_hash */
1074 scanner_call, /* tp_call */
1075 0, /* tp_str */
1076 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1077 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1078 0, /* tp_as_buffer */
1079 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1080 scanner_doc, /* tp_doc */
1081 scanner_traverse, /* tp_traverse */
1082 scanner_clear, /* tp_clear */
1083 0, /* tp_richcompare */
1084 0, /* tp_weaklistoffset */
1085 0, /* tp_iter */
1086 0, /* tp_iternext */
1087 0, /* tp_methods */
1088 scanner_members, /* tp_members */
1089 0, /* tp_getset */
1090 0, /* tp_base */
1091 0, /* tp_dict */
1092 0, /* tp_descr_get */
1093 0, /* tp_descr_set */
1094 0, /* tp_dictoffset */
1095 scanner_init, /* tp_init */
1096 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1097 scanner_new, /* tp_new */
1098 0,/* PyObject_GC_Del, */ /* tp_free */
1101 static PyObject *
1102 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1104 PyEncoderObject *s;
1105 s = (PyEncoderObject *)type->tp_alloc(type, 0);
1106 if (s != NULL) {
1107 s->markers = NULL;
1108 s->defaultfn = NULL;
1109 s->encoder = NULL;
1110 s->indent = NULL;
1111 s->key_separator = NULL;
1112 s->item_separator = NULL;
1113 s->sort_keys = NULL;
1114 s->skipkeys = NULL;
1116 return (PyObject *)s;
1119 static int
1120 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1122 /* initialize Encoder object */
1123 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1125 PyEncoderObject *s;
1126 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1127 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan;
1129 assert(PyEncoder_Check(self));
1130 s = (PyEncoderObject *)self;
1132 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1133 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1134 &sort_keys, &skipkeys, &allow_nan))
1135 return -1;
1137 s->markers = markers;
1138 s->defaultfn = defaultfn;
1139 s->encoder = encoder;
1140 s->indent = indent;
1141 s->key_separator = key_separator;
1142 s->item_separator = item_separator;
1143 s->sort_keys = sort_keys;
1144 s->skipkeys = skipkeys;
1145 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1146 s->allow_nan = PyObject_IsTrue(allow_nan);
1148 Py_INCREF(s->markers);
1149 Py_INCREF(s->defaultfn);
1150 Py_INCREF(s->encoder);
1151 Py_INCREF(s->indent);
1152 Py_INCREF(s->key_separator);
1153 Py_INCREF(s->item_separator);
1154 Py_INCREF(s->sort_keys);
1155 Py_INCREF(s->skipkeys);
1156 return 0;
1159 static PyObject *
1160 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1162 /* Python callable interface to encode_listencode_obj */
1163 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1164 PyObject *obj;
1165 PyObject *rval;
1166 Py_ssize_t indent_level;
1167 PyEncoderObject *s;
1168 assert(PyEncoder_Check(self));
1169 s = (PyEncoderObject *)self;
1170 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1171 &obj, _convertPyInt_AsSsize_t, &indent_level))
1172 return NULL;
1173 rval = PyList_New(0);
1174 if (rval == NULL)
1175 return NULL;
1176 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1177 Py_DECREF(rval);
1178 return NULL;
1180 return rval;
1183 static PyObject *
1184 _encoded_const(PyObject *obj)
1186 /* Return the JSON string representation of None, True, False */
1187 if (obj == Py_None) {
1188 static PyObject *s_null = NULL;
1189 if (s_null == NULL) {
1190 s_null = PyUnicode_InternFromString("null");
1192 Py_INCREF(s_null);
1193 return s_null;
1195 else if (obj == Py_True) {
1196 static PyObject *s_true = NULL;
1197 if (s_true == NULL) {
1198 s_true = PyUnicode_InternFromString("true");
1200 Py_INCREF(s_true);
1201 return s_true;
1203 else if (obj == Py_False) {
1204 static PyObject *s_false = NULL;
1205 if (s_false == NULL) {
1206 s_false = PyUnicode_InternFromString("false");
1208 Py_INCREF(s_false);
1209 return s_false;
1211 else {
1212 PyErr_SetString(PyExc_ValueError, "not a const");
1213 return NULL;
1217 static PyObject *
1218 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1220 /* Return the JSON representation of a PyFloat */
1221 double i = PyFloat_AS_DOUBLE(obj);
1222 if (!Py_IS_FINITE(i)) {
1223 if (!s->allow_nan) {
1224 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1225 return NULL;
1227 if (i > 0) {
1228 return PyUnicode_FromString("Infinity");
1230 else if (i < 0) {
1231 return PyUnicode_FromString("-Infinity");
1233 else {
1234 return PyUnicode_FromString("NaN");
1237 /* Use a better float format here? */
1238 return PyObject_Repr(obj);
1241 static PyObject *
1242 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1244 /* Return the JSON representation of a string */
1245 if (s->fast_encode)
1246 return py_encode_basestring_ascii(NULL, obj);
1247 else
1248 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1251 static int
1252 _steal_list_append(PyObject *lst, PyObject *stolen)
1254 /* Append stolen and then decrement its reference count */
1255 int rval = PyList_Append(lst, stolen);
1256 Py_DECREF(stolen);
1257 return rval;
1260 static int
1261 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1263 /* Encode Python object obj to a JSON term, rval is a PyList */
1264 PyObject *newobj;
1265 int rv;
1267 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1268 PyObject *cstr = _encoded_const(obj);
1269 if (cstr == NULL)
1270 return -1;
1271 return _steal_list_append(rval, cstr);
1273 else if (PyUnicode_Check(obj))
1275 PyObject *encoded = encoder_encode_string(s, obj);
1276 if (encoded == NULL)
1277 return -1;
1278 return _steal_list_append(rval, encoded);
1280 else if (PyLong_Check(obj)) {
1281 PyObject *encoded = PyObject_Str(obj);
1282 if (encoded == NULL)
1283 return -1;
1284 return _steal_list_append(rval, encoded);
1286 else if (PyFloat_Check(obj)) {
1287 PyObject *encoded = encoder_encode_float(s, obj);
1288 if (encoded == NULL)
1289 return -1;
1290 return _steal_list_append(rval, encoded);
1292 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1293 return encoder_listencode_list(s, rval, obj, indent_level);
1295 else if (PyDict_Check(obj)) {
1296 return encoder_listencode_dict(s, rval, obj, indent_level);
1298 else {
1299 PyObject *ident = NULL;
1300 if (s->markers != Py_None) {
1301 int has_key;
1302 ident = PyLong_FromVoidPtr(obj);
1303 if (ident == NULL)
1304 return -1;
1305 has_key = PyDict_Contains(s->markers, ident);
1306 if (has_key) {
1307 if (has_key != -1)
1308 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1309 Py_DECREF(ident);
1310 return -1;
1312 if (PyDict_SetItem(s->markers, ident, obj)) {
1313 Py_DECREF(ident);
1314 return -1;
1317 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1318 if (newobj == NULL) {
1319 Py_XDECREF(ident);
1320 return -1;
1322 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
1323 Py_DECREF(newobj);
1324 if (rv) {
1325 Py_XDECREF(ident);
1326 return -1;
1328 if (ident != NULL) {
1329 if (PyDict_DelItem(s->markers, ident)) {
1330 Py_XDECREF(ident);
1331 return -1;
1333 Py_XDECREF(ident);
1335 return rv;
1339 static int
1340 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
1342 /* Encode Python dict dct a JSON term, rval is a PyList */
1343 static PyObject *open_dict = NULL;
1344 static PyObject *close_dict = NULL;
1345 static PyObject *empty_dict = NULL;
1346 PyObject *kstr = NULL;
1347 PyObject *ident = NULL;
1348 PyObject *it = NULL;
1349 PyObject *items;
1350 PyObject *item = NULL;
1351 int skipkeys;
1352 Py_ssize_t idx;
1353 PyObject *mapping;
1354 static PyObject *code = NULL;
1356 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1357 open_dict = PyUnicode_InternFromString("{");
1358 close_dict = PyUnicode_InternFromString("}");
1359 empty_dict = PyUnicode_InternFromString("{}");
1360 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1361 return -1;
1363 if (Py_SIZE(dct) == 0)
1364 return PyList_Append(rval, empty_dict);
1366 if (s->markers != Py_None) {
1367 int has_key;
1368 ident = PyLong_FromVoidPtr(dct);
1369 if (ident == NULL)
1370 goto bail;
1371 has_key = PyDict_Contains(s->markers, ident);
1372 if (has_key) {
1373 if (has_key != -1)
1374 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1375 goto bail;
1377 if (PyDict_SetItem(s->markers, ident, dct)) {
1378 goto bail;
1382 if (PyList_Append(rval, open_dict))
1383 goto bail;
1385 if (s->indent != Py_None) {
1386 /* TODO: DOES NOT RUN */
1387 indent_level += 1;
1389 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1390 separator = _item_separator + newline_indent
1391 buf += newline_indent
1395 if (PyObject_IsTrue(s->sort_keys)) {
1396 if (code == NULL) {
1397 code = Py_CompileString("sorted(d.items(), key=lambda kv: kv[0])",
1398 "_json.c", Py_eval_input);
1399 if (code == NULL)
1400 goto bail;
1403 mapping = PyDict_New();
1404 if (mapping == NULL)
1405 goto bail;
1406 if (PyDict_SetItemString(mapping, "d", dct) == -1) {
1407 Py_DECREF(mapping);
1408 goto bail;
1410 items = PyEval_EvalCode((PyCodeObject *)code, PyEval_GetGlobals(), mapping);
1411 Py_DECREF(mapping);
1412 } else {
1413 items = PyMapping_Items(dct);
1415 if (items == NULL)
1416 goto bail;
1417 it = PyObject_GetIter(items);
1418 Py_DECREF(items);
1419 if (it == NULL)
1420 goto bail;
1421 skipkeys = PyObject_IsTrue(s->skipkeys);
1422 idx = 0;
1423 while ((item = PyIter_Next(it)) != NULL) {
1424 PyObject *encoded, *key, *value;
1425 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
1426 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1427 goto bail;
1429 key = PyTuple_GET_ITEM(item, 0);
1430 if (PyUnicode_Check(key)) {
1431 Py_INCREF(key);
1432 kstr = key;
1434 else if (PyFloat_Check(key)) {
1435 kstr = encoder_encode_float(s, key);
1436 if (kstr == NULL)
1437 goto bail;
1439 else if (key == Py_True || key == Py_False || key == Py_None) {
1440 /* This must come before the PyLong_Check because
1441 True and False are also 1 and 0.*/
1442 kstr = _encoded_const(key);
1443 if (kstr == NULL)
1444 goto bail;
1446 else if (PyLong_Check(key)) {
1447 kstr = PyObject_Str(key);
1448 if (kstr == NULL)
1449 goto bail;
1451 else if (skipkeys) {
1452 Py_DECREF(item);
1453 continue;
1455 else {
1456 /* TODO: include repr of key */
1457 PyErr_SetString(PyExc_ValueError, "keys must be a string");
1458 goto bail;
1461 if (idx) {
1462 if (PyList_Append(rval, s->item_separator))
1463 goto bail;
1466 encoded = encoder_encode_string(s, kstr);
1467 Py_CLEAR(kstr);
1468 if (encoded == NULL)
1469 goto bail;
1470 if (PyList_Append(rval, encoded)) {
1471 Py_DECREF(encoded);
1472 goto bail;
1474 Py_DECREF(encoded);
1475 if (PyList_Append(rval, s->key_separator))
1476 goto bail;
1478 value = PyTuple_GET_ITEM(item, 1);
1479 if (encoder_listencode_obj(s, rval, value, indent_level))
1480 goto bail;
1481 idx += 1;
1482 Py_DECREF(item);
1484 if (PyErr_Occurred())
1485 goto bail;
1486 Py_CLEAR(it);
1488 if (ident != NULL) {
1489 if (PyDict_DelItem(s->markers, ident))
1490 goto bail;
1491 Py_CLEAR(ident);
1493 if (s->indent != Py_None) {
1494 /* TODO: DOES NOT RUN */
1495 indent_level -= 1;
1497 yield '\n' + (' ' * (_indent * _current_indent_level))
1500 if (PyList_Append(rval, close_dict))
1501 goto bail;
1502 return 0;
1504 bail:
1505 Py_XDECREF(it);
1506 Py_XDECREF(item);
1507 Py_XDECREF(kstr);
1508 Py_XDECREF(ident);
1509 return -1;
1513 static int
1514 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
1516 /* Encode Python list seq to a JSON term, rval is a PyList */
1517 static PyObject *open_array = NULL;
1518 static PyObject *close_array = NULL;
1519 static PyObject *empty_array = NULL;
1520 PyObject *ident = NULL;
1521 PyObject *s_fast = NULL;
1522 Py_ssize_t num_items;
1523 PyObject **seq_items;
1524 Py_ssize_t i;
1526 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1527 open_array = PyUnicode_InternFromString("[");
1528 close_array = PyUnicode_InternFromString("]");
1529 empty_array = PyUnicode_InternFromString("[]");
1530 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1531 return -1;
1533 ident = NULL;
1534 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1535 if (s_fast == NULL)
1536 return -1;
1537 num_items = PySequence_Fast_GET_SIZE(s_fast);
1538 if (num_items == 0) {
1539 Py_DECREF(s_fast);
1540 return PyList_Append(rval, empty_array);
1543 if (s->markers != Py_None) {
1544 int has_key;
1545 ident = PyLong_FromVoidPtr(seq);
1546 if (ident == NULL)
1547 goto bail;
1548 has_key = PyDict_Contains(s->markers, ident);
1549 if (has_key) {
1550 if (has_key != -1)
1551 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1552 goto bail;
1554 if (PyDict_SetItem(s->markers, ident, seq)) {
1555 goto bail;
1559 seq_items = PySequence_Fast_ITEMS(s_fast);
1560 if (PyList_Append(rval, open_array))
1561 goto bail;
1562 if (s->indent != Py_None) {
1563 /* TODO: DOES NOT RUN */
1564 indent_level += 1;
1566 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1567 separator = _item_separator + newline_indent
1568 buf += newline_indent
1571 for (i = 0; i < num_items; i++) {
1572 PyObject *obj = seq_items[i];
1573 if (i) {
1574 if (PyList_Append(rval, s->item_separator))
1575 goto bail;
1577 if (encoder_listencode_obj(s, rval, obj, indent_level))
1578 goto bail;
1580 if (ident != NULL) {
1581 if (PyDict_DelItem(s->markers, ident))
1582 goto bail;
1583 Py_CLEAR(ident);
1585 if (s->indent != Py_None) {
1586 /* TODO: DOES NOT RUN */
1587 indent_level -= 1;
1589 yield '\n' + (' ' * (_indent * _current_indent_level))
1592 if (PyList_Append(rval, close_array))
1593 goto bail;
1594 Py_DECREF(s_fast);
1595 return 0;
1597 bail:
1598 Py_XDECREF(ident);
1599 Py_DECREF(s_fast);
1600 return -1;
1603 static void
1604 encoder_dealloc(PyObject *self)
1606 /* Deallocate Encoder */
1607 encoder_clear(self);
1608 Py_TYPE(self)->tp_free(self);
1611 static int
1612 encoder_traverse(PyObject *self, visitproc visit, void *arg)
1614 PyEncoderObject *s;
1615 assert(PyEncoder_Check(self));
1616 s = (PyEncoderObject *)self;
1617 Py_VISIT(s->markers);
1618 Py_VISIT(s->defaultfn);
1619 Py_VISIT(s->encoder);
1620 Py_VISIT(s->indent);
1621 Py_VISIT(s->key_separator);
1622 Py_VISIT(s->item_separator);
1623 Py_VISIT(s->sort_keys);
1624 Py_VISIT(s->skipkeys);
1625 return 0;
1628 static int
1629 encoder_clear(PyObject *self)
1631 /* Deallocate Encoder */
1632 PyEncoderObject *s;
1633 assert(PyEncoder_Check(self));
1634 s = (PyEncoderObject *)self;
1635 Py_CLEAR(s->markers);
1636 Py_CLEAR(s->defaultfn);
1637 Py_CLEAR(s->encoder);
1638 Py_CLEAR(s->indent);
1639 Py_CLEAR(s->key_separator);
1640 Py_CLEAR(s->item_separator);
1641 Py_CLEAR(s->sort_keys);
1642 Py_CLEAR(s->skipkeys);
1643 return 0;
1646 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1648 static
1649 PyTypeObject PyEncoderType = {
1650 PyVarObject_HEAD_INIT(NULL, 0)
1651 "_json.Encoder", /* tp_name */
1652 sizeof(PyEncoderObject), /* tp_basicsize */
1653 0, /* tp_itemsize */
1654 encoder_dealloc, /* tp_dealloc */
1655 0, /* tp_print */
1656 0, /* tp_getattr */
1657 0, /* tp_setattr */
1658 0, /* tp_compare */
1659 0, /* tp_repr */
1660 0, /* tp_as_number */
1661 0, /* tp_as_sequence */
1662 0, /* tp_as_mapping */
1663 0, /* tp_hash */
1664 encoder_call, /* tp_call */
1665 0, /* tp_str */
1666 0, /* tp_getattro */
1667 0, /* tp_setattro */
1668 0, /* tp_as_buffer */
1669 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
1670 encoder_doc, /* tp_doc */
1671 encoder_traverse, /* tp_traverse */
1672 encoder_clear, /* tp_clear */
1673 0, /* tp_richcompare */
1674 0, /* tp_weaklistoffset */
1675 0, /* tp_iter */
1676 0, /* tp_iternext */
1677 0, /* tp_methods */
1678 encoder_members, /* tp_members */
1679 0, /* tp_getset */
1680 0, /* tp_base */
1681 0, /* tp_dict */
1682 0, /* tp_descr_get */
1683 0, /* tp_descr_set */
1684 0, /* tp_dictoffset */
1685 encoder_init, /* tp_init */
1686 0, /* tp_alloc */
1687 encoder_new, /* tp_new */
1688 0, /* tp_free */
1691 static PyMethodDef speedups_methods[] = {
1692 {"encode_basestring_ascii",
1693 (PyCFunction)py_encode_basestring_ascii,
1694 METH_O,
1695 pydoc_encode_basestring_ascii},
1696 {"scanstring",
1697 (PyCFunction)py_scanstring,
1698 METH_VARARGS,
1699 pydoc_scanstring},
1700 {NULL, NULL, 0, NULL}
1703 PyDoc_STRVAR(module_doc,
1704 "json speedups\n");
1706 static struct PyModuleDef jsonmodule = {
1707 PyModuleDef_HEAD_INIT,
1708 "_json",
1709 module_doc,
1711 speedups_methods,
1712 NULL,
1713 NULL,
1714 NULL,
1715 NULL
1718 PyObject*
1719 PyInit__json(void)
1721 PyObject *m = PyModule_Create(&jsonmodule);
1722 if (!m)
1723 return NULL;
1724 PyScannerType.tp_new = PyType_GenericNew;
1725 if (PyType_Ready(&PyScannerType) < 0)
1726 goto fail;
1727 PyEncoderType.tp_new = PyType_GenericNew;
1728 if (PyType_Ready(&PyEncoderType) < 0)
1729 goto fail;
1730 Py_INCREF((PyObject*)&PyScannerType);
1731 if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1732 Py_DECREF((PyObject*)&PyScannerType);
1733 goto fail;
1735 Py_INCREF((PyObject*)&PyEncoderType);
1736 if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1737 Py_DECREF((PyObject*)&PyEncoderType);
1738 goto fail;
1740 return m;
1741 fail:
1742 Py_DECREF(m);
1743 return NULL;