2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t
;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
18 #define UNUSED __attribute__((__unused__))
23 #define DEFAULT_ENCODING "utf-8"
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
30 static PyTypeObject PyScannerType
;
31 static PyTypeObject PyEncoderType
;
33 typedef struct _PyScannerObject
{
37 PyObject
*object_hook
;
39 PyObject
*parse_float
;
41 PyObject
*parse_constant
;
44 static PyMemberDef scanner_members
[] = {
45 {"encoding", T_OBJECT
, offsetof(PyScannerObject
, encoding
), READONLY
, "encoding"},
46 {"strict", T_OBJECT
, offsetof(PyScannerObject
, strict
), READONLY
, "strict"},
47 {"object_hook", T_OBJECT
, offsetof(PyScannerObject
, object_hook
), READONLY
, "object_hook"},
48 {"object_pairs_hook", T_OBJECT
, offsetof(PyScannerObject
, pairs_hook
), READONLY
, "object_pairs_hook"},
49 {"parse_float", T_OBJECT
, offsetof(PyScannerObject
, parse_float
), READONLY
, "parse_float"},
50 {"parse_int", T_OBJECT
, offsetof(PyScannerObject
, parse_int
), READONLY
, "parse_int"},
51 {"parse_constant", T_OBJECT
, offsetof(PyScannerObject
, parse_constant
), READONLY
, "parse_constant"},
55 typedef struct _PyEncoderObject
{
61 PyObject
*key_separator
;
62 PyObject
*item_separator
;
69 static PyMemberDef encoder_members
[] = {
70 {"markers", T_OBJECT
, offsetof(PyEncoderObject
, markers
), READONLY
, "markers"},
71 {"default", T_OBJECT
, offsetof(PyEncoderObject
, defaultfn
), READONLY
, "default"},
72 {"encoder", T_OBJECT
, offsetof(PyEncoderObject
, encoder
), READONLY
, "encoder"},
73 {"indent", T_OBJECT
, offsetof(PyEncoderObject
, indent
), READONLY
, "indent"},
74 {"key_separator", T_OBJECT
, offsetof(PyEncoderObject
, key_separator
), READONLY
, "key_separator"},
75 {"item_separator", T_OBJECT
, offsetof(PyEncoderObject
, item_separator
), READONLY
, "item_separator"},
76 {"sort_keys", T_OBJECT
, offsetof(PyEncoderObject
, sort_keys
), READONLY
, "sort_keys"},
77 {"skipkeys", T_OBJECT
, offsetof(PyEncoderObject
, skipkeys
), READONLY
, "skipkeys"},
82 ascii_escape_char(Py_UNICODE c
, char *output
, Py_ssize_t chars
);
84 ascii_escape_unicode(PyObject
*pystr
);
86 ascii_escape_str(PyObject
*pystr
);
88 py_encode_basestring_ascii(PyObject
* self UNUSED
, PyObject
*pystr
);
91 scan_once_str(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
);
93 scan_once_unicode(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
);
95 _build_rval_index_tuple(PyObject
*rval
, Py_ssize_t idx
);
97 scanner_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
99 scanner_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
);
101 scanner_dealloc(PyObject
*self
);
103 scanner_clear(PyObject
*self
);
105 encoder_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
107 encoder_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
);
109 encoder_dealloc(PyObject
*self
);
111 encoder_clear(PyObject
*self
);
113 encoder_listencode_list(PyEncoderObject
*s
, PyObject
*rval
, PyObject
*seq
, Py_ssize_t indent_level
);
115 encoder_listencode_obj(PyEncoderObject
*s
, PyObject
*rval
, PyObject
*obj
, Py_ssize_t indent_level
);
117 encoder_listencode_dict(PyEncoderObject
*s
, PyObject
*rval
, PyObject
*dct
, Py_ssize_t indent_level
);
119 _encoded_const(PyObject
*obj
);
121 raise_errmsg(char *msg
, PyObject
*s
, Py_ssize_t end
);
123 encoder_encode_string(PyEncoderObject
*s
, PyObject
*obj
);
125 _convertPyInt_AsSsize_t(PyObject
*o
, Py_ssize_t
*size_ptr
);
127 _convertPyInt_FromSsize_t(Py_ssize_t
*size_ptr
);
129 encoder_encode_float(PyEncoderObject
*s
, PyObject
*obj
);
131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
134 #define MIN_EXPANSION 6
135 #ifdef Py_UNICODE_WIDE
136 #define MAX_EXPANSION (2 * MIN_EXPANSION)
138 #define MAX_EXPANSION MIN_EXPANSION
142 _convertPyInt_AsSsize_t(PyObject
*o
, Py_ssize_t
*size_ptr
)
144 /* PyObject to Py_ssize_t converter */
145 *size_ptr
= PyInt_AsSsize_t(o
);
146 if (*size_ptr
== -1 && PyErr_Occurred())
152 _convertPyInt_FromSsize_t(Py_ssize_t
*size_ptr
)
154 /* Py_ssize_t to PyObject converter */
155 return PyInt_FromSsize_t(*size_ptr
);
159 ascii_escape_char(Py_UNICODE c
, char *output
, Py_ssize_t chars
)
161 /* Escape unicode code point c to ASCII escape sequences
162 in char *output. output must have at least 12 bytes unused to
163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
164 output
[chars
++] = '\\';
166 case '\\': output
[chars
++] = (char)c
; break;
167 case '"': output
[chars
++] = (char)c
; break;
168 case '\b': output
[chars
++] = 'b'; break;
169 case '\f': output
[chars
++] = 'f'; break;
170 case '\n': output
[chars
++] = 'n'; break;
171 case '\r': output
[chars
++] = 'r'; break;
172 case '\t': output
[chars
++] = 't'; break;
174 #ifdef Py_UNICODE_WIDE
176 /* UTF-16 surrogate pair */
177 Py_UNICODE v
= c
- 0x10000;
178 c
= 0xd800 | ((v
>> 10) & 0x3ff);
179 output
[chars
++] = 'u';
180 output
[chars
++] = "0123456789abcdef"[(c
>> 12) & 0xf];
181 output
[chars
++] = "0123456789abcdef"[(c
>> 8) & 0xf];
182 output
[chars
++] = "0123456789abcdef"[(c
>> 4) & 0xf];
183 output
[chars
++] = "0123456789abcdef"[(c
) & 0xf];
184 c
= 0xdc00 | (v
& 0x3ff);
185 output
[chars
++] = '\\';
188 output
[chars
++] = 'u';
189 output
[chars
++] = "0123456789abcdef"[(c
>> 12) & 0xf];
190 output
[chars
++] = "0123456789abcdef"[(c
>> 8) & 0xf];
191 output
[chars
++] = "0123456789abcdef"[(c
>> 4) & 0xf];
192 output
[chars
++] = "0123456789abcdef"[(c
) & 0xf];
198 ascii_escape_unicode(PyObject
*pystr
)
200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
202 Py_ssize_t input_chars
;
203 Py_ssize_t output_size
;
204 Py_ssize_t max_output_size
;
208 Py_UNICODE
*input_unicode
;
210 input_chars
= PyUnicode_GET_SIZE(pystr
);
211 input_unicode
= PyUnicode_AS_UNICODE(pystr
);
213 /* One char input can be up to 6 chars output, estimate 4 of these */
214 output_size
= 2 + (MIN_EXPANSION
* 4) + input_chars
;
215 max_output_size
= 2 + (input_chars
* MAX_EXPANSION
);
216 rval
= PyString_FromStringAndSize(NULL
, output_size
);
220 output
= PyString_AS_STRING(rval
);
222 output
[chars
++] = '"';
223 for (i
= 0; i
< input_chars
; i
++) {
224 Py_UNICODE c
= input_unicode
[i
];
226 output
[chars
++] = (char)c
;
229 chars
= ascii_escape_char(c
, output
, chars
);
231 if (output_size
- chars
< (1 + MAX_EXPANSION
)) {
232 /* There's more than four, so let's resize by a lot */
233 Py_ssize_t new_output_size
= output_size
* 2;
234 /* This is an upper bound */
235 if (new_output_size
> max_output_size
) {
236 new_output_size
= max_output_size
;
238 /* Make sure that the output size changed before resizing */
239 if (new_output_size
!= output_size
) {
240 output_size
= new_output_size
;
241 if (_PyString_Resize(&rval
, output_size
) == -1) {
244 output
= PyString_AS_STRING(rval
);
248 output
[chars
++] = '"';
249 if (_PyString_Resize(&rval
, chars
) == -1) {
256 ascii_escape_str(PyObject
*pystr
)
258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
260 Py_ssize_t input_chars
;
261 Py_ssize_t output_size
;
267 input_chars
= PyString_GET_SIZE(pystr
);
268 input_str
= PyString_AS_STRING(pystr
);
270 /* Fast path for a string that's already ASCII */
271 for (i
= 0; i
< input_chars
; i
++) {
272 Py_UNICODE c
= (Py_UNICODE
)(unsigned char)input_str
[i
];
274 /* If we have to escape something, scan the string for unicode */
276 for (j
= i
; j
< input_chars
; j
++) {
277 c
= (Py_UNICODE
)(unsigned char)input_str
[j
];
279 /* We hit a non-ASCII character, bail to unicode mode */
281 uni
= PyUnicode_DecodeUTF8(input_str
, input_chars
, "strict");
285 rval
= ascii_escape_unicode(uni
);
294 if (i
== input_chars
) {
295 /* Input is already ASCII */
296 output_size
= 2 + input_chars
;
299 /* One char input can be up to 6 chars output, estimate 4 of these */
300 output_size
= 2 + (MIN_EXPANSION
* 4) + input_chars
;
302 rval
= PyString_FromStringAndSize(NULL
, output_size
);
306 output
= PyString_AS_STRING(rval
);
309 /* We know that everything up to i is ASCII already */
311 memcpy(&output
[1], input_str
, i
);
313 for (; i
< input_chars
; i
++) {
314 Py_UNICODE c
= (Py_UNICODE
)(unsigned char)input_str
[i
];
316 output
[chars
++] = (char)c
;
319 chars
= ascii_escape_char(c
, output
, chars
);
321 /* An ASCII char can't possibly expand to a surrogate! */
322 if (output_size
- chars
< (1 + MIN_EXPANSION
)) {
323 /* There's more than four, so let's resize by a lot */
325 if (output_size
> 2 + (input_chars
* MIN_EXPANSION
)) {
326 output_size
= 2 + (input_chars
* MIN_EXPANSION
);
328 if (_PyString_Resize(&rval
, output_size
) == -1) {
331 output
= PyString_AS_STRING(rval
);
334 output
[chars
++] = '"';
335 if (_PyString_Resize(&rval
, chars
) == -1) {
342 raise_errmsg(char *msg
, PyObject
*s
, Py_ssize_t end
)
344 /* Use the Python function json.decoder.errmsg to raise a nice
345 looking ValueError exception */
346 static PyObject
*errmsg_fn
= NULL
;
348 if (errmsg_fn
== NULL
) {
349 PyObject
*decoder
= PyImport_ImportModule("json.decoder");
352 errmsg_fn
= PyObject_GetAttrString(decoder
, "errmsg");
354 if (errmsg_fn
== NULL
)
357 pymsg
= PyObject_CallFunction(errmsg_fn
, "(zOO&)", msg
, s
, _convertPyInt_FromSsize_t
, &end
);
359 PyErr_SetObject(PyExc_ValueError
, pymsg
);
365 join_list_unicode(PyObject
*lst
)
367 /* return u''.join(lst) */
368 static PyObject
*joinfn
= NULL
;
369 if (joinfn
== NULL
) {
370 PyObject
*ustr
= PyUnicode_FromUnicode(NULL
, 0);
374 joinfn
= PyObject_GetAttrString(ustr
, "join");
379 return PyObject_CallFunctionObjArgs(joinfn
, lst
, NULL
);
383 join_list_string(PyObject
*lst
)
385 /* return ''.join(lst) */
386 static PyObject
*joinfn
= NULL
;
387 if (joinfn
== NULL
) {
388 PyObject
*ustr
= PyString_FromStringAndSize(NULL
, 0);
392 joinfn
= PyObject_GetAttrString(ustr
, "join");
397 return PyObject_CallFunctionObjArgs(joinfn
, lst
, NULL
);
401 _build_rval_index_tuple(PyObject
*rval
, Py_ssize_t idx
) {
402 /* return (rval, idx) tuple, stealing reference to rval */
406 steal a reference to rval, returns (rval, idx)
411 pyidx
= PyInt_FromSsize_t(idx
);
416 tpl
= PyTuple_New(2);
422 PyTuple_SET_ITEM(tpl
, 0, rval
);
423 PyTuple_SET_ITEM(tpl
, 1, pyidx
);
428 scanstring_str(PyObject
*pystr
, Py_ssize_t end
, char *encoding
, int strict
, Py_ssize_t
*next_end_ptr
)
430 /* Read the JSON string from PyString pystr.
431 end is the index of the first character after the quote.
432 encoding is the encoding of pystr (must be an ASCII superset)
433 if strict is zero then literal control characters are allowed
434 *next_end_ptr is a return-by-reference index of the character
437 Return value is a new PyString (if ASCII-only) or PyUnicode
440 Py_ssize_t len
= PyString_GET_SIZE(pystr
);
441 Py_ssize_t begin
= end
- 1;
442 Py_ssize_t next
= begin
;
444 char *buf
= PyString_AS_STRING(pystr
);
445 PyObject
*chunks
= PyList_New(0);
446 if (chunks
== NULL
) {
449 if (end
< 0 || len
<= end
) {
450 PyErr_SetString(PyExc_ValueError
, "end is out of bounds");
454 /* Find the end of the string or the next escape */
456 PyObject
*chunk
= NULL
;
457 for (next
= end
; next
< len
; next
++) {
458 c
= (unsigned char)buf
[next
];
459 if (c
== '"' || c
== '\\') {
462 else if (strict
&& c
<= 0x1f) {
463 raise_errmsg("Invalid control character at", pystr
, next
);
470 if (!(c
== '"' || c
== '\\')) {
471 raise_errmsg("Unterminated string starting at", pystr
, begin
);
474 /* Pick up this chunk if it's not zero length */
476 PyObject
*strchunk
= PyString_FromStringAndSize(&buf
[end
], next
- end
);
477 if (strchunk
== NULL
) {
481 chunk
= PyUnicode_FromEncodedObject(strchunk
, encoding
, NULL
);
490 if (PyList_Append(chunks
, chunk
)) {
502 raise_errmsg("Unterminated string starting at", pystr
, begin
);
507 /* Non-unicode backslash escapes */
513 case 'b': c
= '\b'; break;
514 case 'f': c
= '\f'; break;
515 case 'n': c
= '\n'; break;
516 case 'r': c
= '\r'; break;
517 case 't': c
= '\t'; break;
521 raise_errmsg("Invalid \\escape", pystr
, end
- 2);
530 raise_errmsg("Invalid \\uXXXX escape", pystr
, next
- 1);
533 /* Decode 4 hex digits */
534 for (; next
< end
; next
++) {
535 Py_UNICODE digit
= buf
[next
];
538 case '0': case '1': case '2': case '3': case '4':
539 case '5': case '6': case '7': case '8': case '9':
540 c
|= (digit
- '0'); break;
541 case 'a': case 'b': case 'c': case 'd': case 'e':
543 c
|= (digit
- 'a' + 10); break;
544 case 'A': case 'B': case 'C': case 'D': case 'E':
546 c
|= (digit
- 'A' + 10); break;
548 raise_errmsg("Invalid \\uXXXX escape", pystr
, end
- 5);
552 #ifdef Py_UNICODE_WIDE
554 if ((c
& 0xfc00) == 0xd800) {
556 if (end
+ 6 >= len
) {
557 raise_errmsg("Unpaired high surrogate", pystr
, end
- 5);
560 if (buf
[next
++] != '\\' || buf
[next
++] != 'u') {
561 raise_errmsg("Unpaired high surrogate", pystr
, end
- 5);
565 /* Decode 4 hex digits */
566 for (; next
< end
; next
++) {
568 Py_UNICODE digit
= buf
[next
];
570 case '0': case '1': case '2': case '3': case '4':
571 case '5': case '6': case '7': case '8': case '9':
572 c2
|= (digit
- '0'); break;
573 case 'a': case 'b': case 'c': case 'd': case 'e':
575 c2
|= (digit
- 'a' + 10); break;
576 case 'A': case 'B': case 'C': case 'D': case 'E':
578 c2
|= (digit
- 'A' + 10); break;
580 raise_errmsg("Invalid \\uXXXX escape", pystr
, end
- 5);
584 if ((c2
& 0xfc00) != 0xdc00) {
585 raise_errmsg("Unpaired high surrogate", pystr
, end
- 5);
588 c
= 0x10000 + (((c
- 0xd800) << 10) | (c2
- 0xdc00));
590 else if ((c
& 0xfc00) == 0xdc00) {
591 raise_errmsg("Unpaired low surrogate", pystr
, end
- 5);
600 chunk
= PyUnicode_FromUnicode(&c
, 1);
606 char c_char
= Py_CHARMASK(c
);
607 chunk
= PyString_FromStringAndSize(&c_char
, 1);
612 if (PyList_Append(chunks
, chunk
)) {
619 rval
= join_list_string(chunks
);
634 scanstring_unicode(PyObject
*pystr
, Py_ssize_t end
, int strict
, Py_ssize_t
*next_end_ptr
)
636 /* Read the JSON string from PyUnicode pystr.
637 end is the index of the first character after the quote.
638 if strict is zero then literal control characters are allowed
639 *next_end_ptr is a return-by-reference index of the character
642 Return value is a new PyUnicode
645 Py_ssize_t len
= PyUnicode_GET_SIZE(pystr
);
646 Py_ssize_t begin
= end
- 1;
647 Py_ssize_t next
= begin
;
648 const Py_UNICODE
*buf
= PyUnicode_AS_UNICODE(pystr
);
649 PyObject
*chunks
= PyList_New(0);
650 if (chunks
== NULL
) {
653 if (end
< 0 || len
<= end
) {
654 PyErr_SetString(PyExc_ValueError
, "end is out of bounds");
658 /* Find the end of the string or the next escape */
660 PyObject
*chunk
= NULL
;
661 for (next
= end
; next
< len
; next
++) {
663 if (c
== '"' || c
== '\\') {
666 else if (strict
&& c
<= 0x1f) {
667 raise_errmsg("Invalid control character at", pystr
, next
);
671 if (!(c
== '"' || c
== '\\')) {
672 raise_errmsg("Unterminated string starting at", pystr
, begin
);
675 /* Pick up this chunk if it's not zero length */
677 chunk
= PyUnicode_FromUnicode(&buf
[end
], next
- end
);
681 if (PyList_Append(chunks
, chunk
)) {
693 raise_errmsg("Unterminated string starting at", pystr
, begin
);
698 /* Non-unicode backslash escapes */
704 case 'b': c
= '\b'; break;
705 case 'f': c
= '\f'; break;
706 case 'n': c
= '\n'; break;
707 case 'r': c
= '\r'; break;
708 case 't': c
= '\t'; break;
712 raise_errmsg("Invalid \\escape", pystr
, end
- 2);
721 raise_errmsg("Invalid \\uXXXX escape", pystr
, next
- 1);
724 /* Decode 4 hex digits */
725 for (; next
< end
; next
++) {
726 Py_UNICODE digit
= buf
[next
];
729 case '0': case '1': case '2': case '3': case '4':
730 case '5': case '6': case '7': case '8': case '9':
731 c
|= (digit
- '0'); break;
732 case 'a': case 'b': case 'c': case 'd': case 'e':
734 c
|= (digit
- 'a' + 10); break;
735 case 'A': case 'B': case 'C': case 'D': case 'E':
737 c
|= (digit
- 'A' + 10); break;
739 raise_errmsg("Invalid \\uXXXX escape", pystr
, end
- 5);
743 #ifdef Py_UNICODE_WIDE
745 if ((c
& 0xfc00) == 0xd800) {
747 if (end
+ 6 >= len
) {
748 raise_errmsg("Unpaired high surrogate", pystr
, end
- 5);
751 if (buf
[next
++] != '\\' || buf
[next
++] != 'u') {
752 raise_errmsg("Unpaired high surrogate", pystr
, end
- 5);
756 /* Decode 4 hex digits */
757 for (; next
< end
; next
++) {
759 Py_UNICODE digit
= buf
[next
];
761 case '0': case '1': case '2': case '3': case '4':
762 case '5': case '6': case '7': case '8': case '9':
763 c2
|= (digit
- '0'); break;
764 case 'a': case 'b': case 'c': case 'd': case 'e':
766 c2
|= (digit
- 'a' + 10); break;
767 case 'A': case 'B': case 'C': case 'D': case 'E':
769 c2
|= (digit
- 'A' + 10); break;
771 raise_errmsg("Invalid \\uXXXX escape", pystr
, end
- 5);
775 if ((c2
& 0xfc00) != 0xdc00) {
776 raise_errmsg("Unpaired high surrogate", pystr
, end
- 5);
779 c
= 0x10000 + (((c
- 0xd800) << 10) | (c2
- 0xdc00));
781 else if ((c
& 0xfc00) == 0xdc00) {
782 raise_errmsg("Unpaired low surrogate", pystr
, end
- 5);
787 chunk
= PyUnicode_FromUnicode(&c
, 1);
791 if (PyList_Append(chunks
, chunk
)) {
798 rval
= join_list_unicode(chunks
);
811 PyDoc_STRVAR(pydoc_scanstring
,
812 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
814 "Scan the string s for a JSON string. End is the index of the\n"
815 "character in s after the quote that started the JSON string.\n"
816 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
817 "on attempt to decode an invalid string. If strict is False then literal\n"
818 "control characters are allowed in the string.\n"
820 "Returns a tuple of the decoded string and the index of the character in s\n"
821 "after the end quote."
825 py_scanstring(PyObject
* self UNUSED
, PyObject
*args
)
830 Py_ssize_t next_end
= -1;
831 char *encoding
= NULL
;
833 if (!PyArg_ParseTuple(args
, "OO&|zi:scanstring", &pystr
, _convertPyInt_AsSsize_t
, &end
, &encoding
, &strict
)) {
836 if (encoding
== NULL
) {
837 encoding
= DEFAULT_ENCODING
;
839 if (PyString_Check(pystr
)) {
840 rval
= scanstring_str(pystr
, end
, encoding
, strict
, &next_end
);
842 else if (PyUnicode_Check(pystr
)) {
843 rval
= scanstring_unicode(pystr
, end
, strict
, &next_end
);
846 PyErr_Format(PyExc_TypeError
,
847 "first argument must be a string, not %.80s",
848 Py_TYPE(pystr
)->tp_name
);
851 return _build_rval_index_tuple(rval
, next_end
);
854 PyDoc_STRVAR(pydoc_encode_basestring_ascii
,
855 "encode_basestring_ascii(basestring) -> str\n"
857 "Return an ASCII-only JSON representation of a Python string"
861 py_encode_basestring_ascii(PyObject
* self UNUSED
, PyObject
*pystr
)
863 /* Return an ASCII-only JSON representation of a Python string */
865 if (PyString_Check(pystr
)) {
866 return ascii_escape_str(pystr
);
868 else if (PyUnicode_Check(pystr
)) {
869 return ascii_escape_unicode(pystr
);
872 PyErr_Format(PyExc_TypeError
,
873 "first argument must be a string, not %.80s",
874 Py_TYPE(pystr
)->tp_name
);
880 scanner_dealloc(PyObject
*self
)
882 /* Deallocate scanner object */
884 Py_TYPE(self
)->tp_free(self
);
888 scanner_traverse(PyObject
*self
, visitproc visit
, void *arg
)
891 assert(PyScanner_Check(self
));
892 s
= (PyScannerObject
*)self
;
893 Py_VISIT(s
->encoding
);
895 Py_VISIT(s
->object_hook
);
896 Py_VISIT(s
->pairs_hook
);
897 Py_VISIT(s
->parse_float
);
898 Py_VISIT(s
->parse_int
);
899 Py_VISIT(s
->parse_constant
);
904 scanner_clear(PyObject
*self
)
907 assert(PyScanner_Check(self
));
908 s
= (PyScannerObject
*)self
;
909 Py_CLEAR(s
->encoding
);
911 Py_CLEAR(s
->object_hook
);
912 Py_CLEAR(s
->pairs_hook
);
913 Py_CLEAR(s
->parse_float
);
914 Py_CLEAR(s
->parse_int
);
915 Py_CLEAR(s
->parse_constant
);
920 _parse_object_str(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
) {
921 /* Read a JSON object from PyString pystr.
922 idx is the index of the first character after the opening curly brace.
923 *next_idx_ptr is a return-by-reference index to the first character after
924 the closing curly brace.
926 Returns a new PyObject (usually a dict, but object_hook can change that)
928 char *str
= PyString_AS_STRING(pystr
);
929 Py_ssize_t end_idx
= PyString_GET_SIZE(pystr
) - 1;
933 PyObject
*key
= NULL
;
934 PyObject
*val
= NULL
;
935 char *encoding
= PyString_AS_STRING(s
->encoding
);
936 int strict
= PyObject_IsTrue(s
->strict
);
939 pairs
= PyList_New(0);
943 /* skip whitespace after { */
944 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
946 /* only loop if the object is non-empty */
947 if (idx
<= end_idx
&& str
[idx
] != '}') {
948 while (idx
<= end_idx
) {
950 if (str
[idx
] != '"') {
951 raise_errmsg("Expecting property name", pystr
, idx
);
954 key
= scanstring_str(pystr
, idx
+ 1, encoding
, strict
, &next_idx
);
959 /* skip whitespace between key and : delimiter, read :, skip whitespace */
960 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
961 if (idx
> end_idx
|| str
[idx
] != ':') {
962 raise_errmsg("Expecting : delimiter", pystr
, idx
);
966 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
968 /* read any JSON data type */
969 val
= scan_once_str(s
, pystr
, idx
, &next_idx
);
973 item
= PyTuple_Pack(2, key
, val
);
978 if (PyList_Append(pairs
, item
) == -1) {
985 /* skip whitespace before } or , */
986 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
988 /* bail if the object is closed or we didn't get the , delimiter */
989 if (idx
> end_idx
) break;
990 if (str
[idx
] == '}') {
993 else if (str
[idx
] != ',') {
994 raise_errmsg("Expecting , delimiter", pystr
, idx
);
999 /* skip whitespace after , delimiter */
1000 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1003 /* verify that idx < end_idx, str[idx] should be '}' */
1004 if (idx
> end_idx
|| str
[idx
] != '}') {
1005 raise_errmsg("Expecting object", pystr
, end_idx
);
1009 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1010 if (s
->pairs_hook
!= Py_None
) {
1011 val
= PyObject_CallFunctionObjArgs(s
->pairs_hook
, pairs
, NULL
);
1015 *next_idx_ptr
= idx
+ 1;
1019 rval
= PyObject_CallFunctionObjArgs((PyObject
*)(&PyDict_Type
),
1025 /* if object_hook is not None: rval = object_hook(rval) */
1026 if (s
->object_hook
!= Py_None
) {
1027 val
= PyObject_CallFunctionObjArgs(s
->object_hook
, rval
, NULL
);
1034 *next_idx_ptr
= idx
+ 1;
1044 _parse_object_unicode(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
) {
1045 /* Read a JSON object from PyUnicode pystr.
1046 idx is the index of the first character after the opening curly brace.
1047 *next_idx_ptr is a return-by-reference index to the first character after
1048 the closing curly brace.
1050 Returns a new PyObject (usually a dict, but object_hook can change that)
1052 Py_UNICODE
*str
= PyUnicode_AS_UNICODE(pystr
);
1053 Py_ssize_t end_idx
= PyUnicode_GET_SIZE(pystr
) - 1;
1057 PyObject
*key
= NULL
;
1058 PyObject
*val
= NULL
;
1059 int strict
= PyObject_IsTrue(s
->strict
);
1060 Py_ssize_t next_idx
;
1062 pairs
= PyList_New(0);
1066 /* skip whitespace after { */
1067 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1069 /* only loop if the object is non-empty */
1070 if (idx
<= end_idx
&& str
[idx
] != '}') {
1071 while (idx
<= end_idx
) {
1073 if (str
[idx
] != '"') {
1074 raise_errmsg("Expecting property name", pystr
, idx
);
1077 key
= scanstring_unicode(pystr
, idx
+ 1, strict
, &next_idx
);
1082 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1083 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1084 if (idx
> end_idx
|| str
[idx
] != ':') {
1085 raise_errmsg("Expecting : delimiter", pystr
, idx
);
1089 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1091 /* read any JSON term */
1092 val
= scan_once_unicode(s
, pystr
, idx
, &next_idx
);
1096 item
= PyTuple_Pack(2, key
, val
);
1101 if (PyList_Append(pairs
, item
) == -1) {
1108 /* skip whitespace before } or , */
1109 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1111 /* bail if the object is closed or we didn't get the , delimiter */
1112 if (idx
> end_idx
) break;
1113 if (str
[idx
] == '}') {
1116 else if (str
[idx
] != ',') {
1117 raise_errmsg("Expecting , delimiter", pystr
, idx
);
1122 /* skip whitespace after , delimiter */
1123 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1127 /* verify that idx < end_idx, str[idx] should be '}' */
1128 if (idx
> end_idx
|| str
[idx
] != '}') {
1129 raise_errmsg("Expecting object", pystr
, end_idx
);
1133 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1134 if (s
->pairs_hook
!= Py_None
) {
1135 val
= PyObject_CallFunctionObjArgs(s
->pairs_hook
, pairs
, NULL
);
1139 *next_idx_ptr
= idx
+ 1;
1143 rval
= PyObject_CallFunctionObjArgs((PyObject
*)(&PyDict_Type
),
1149 /* if object_hook is not None: rval = object_hook(rval) */
1150 if (s
->object_hook
!= Py_None
) {
1151 val
= PyObject_CallFunctionObjArgs(s
->object_hook
, rval
, NULL
);
1158 *next_idx_ptr
= idx
+ 1;
1168 _parse_array_str(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
) {
1169 /* Read a JSON array from PyString pystr.
1170 idx is the index of the first character after the opening brace.
1171 *next_idx_ptr is a return-by-reference index to the first character after
1174 Returns a new PyList
1176 char *str
= PyString_AS_STRING(pystr
);
1177 Py_ssize_t end_idx
= PyString_GET_SIZE(pystr
) - 1;
1178 PyObject
*val
= NULL
;
1179 PyObject
*rval
= PyList_New(0);
1180 Py_ssize_t next_idx
;
1184 /* skip whitespace after [ */
1185 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1187 /* only loop if the array is non-empty */
1188 if (idx
<= end_idx
&& str
[idx
] != ']') {
1189 while (idx
<= end_idx
) {
1191 /* read any JSON term and de-tuplefy the (rval, idx) */
1192 val
= scan_once_str(s
, pystr
, idx
, &next_idx
);
1196 if (PyList_Append(rval
, val
) == -1)
1202 /* skip whitespace between term and , */
1203 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1205 /* bail if the array is closed or we didn't get the , delimiter */
1206 if (idx
> end_idx
) break;
1207 if (str
[idx
] == ']') {
1210 else if (str
[idx
] != ',') {
1211 raise_errmsg("Expecting , delimiter", pystr
, idx
);
1216 /* skip whitespace after , */
1217 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1221 /* verify that idx < end_idx, str[idx] should be ']' */
1222 if (idx
> end_idx
|| str
[idx
] != ']') {
1223 raise_errmsg("Expecting object", pystr
, end_idx
);
1226 *next_idx_ptr
= idx
+ 1;
1235 _parse_array_unicode(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
) {
1236 /* Read a JSON array from PyString pystr.
1237 idx is the index of the first character after the opening brace.
1238 *next_idx_ptr is a return-by-reference index to the first character after
1241 Returns a new PyList
1243 Py_UNICODE
*str
= PyUnicode_AS_UNICODE(pystr
);
1244 Py_ssize_t end_idx
= PyUnicode_GET_SIZE(pystr
) - 1;
1245 PyObject
*val
= NULL
;
1246 PyObject
*rval
= PyList_New(0);
1247 Py_ssize_t next_idx
;
1251 /* skip whitespace after [ */
1252 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1254 /* only loop if the array is non-empty */
1255 if (idx
<= end_idx
&& str
[idx
] != ']') {
1256 while (idx
<= end_idx
) {
1258 /* read any JSON term */
1259 val
= scan_once_unicode(s
, pystr
, idx
, &next_idx
);
1263 if (PyList_Append(rval
, val
) == -1)
1269 /* skip whitespace between term and , */
1270 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1272 /* bail if the array is closed or we didn't get the , delimiter */
1273 if (idx
> end_idx
) break;
1274 if (str
[idx
] == ']') {
1277 else if (str
[idx
] != ',') {
1278 raise_errmsg("Expecting , delimiter", pystr
, idx
);
1283 /* skip whitespace after , */
1284 while (idx
<= end_idx
&& IS_WHITESPACE(str
[idx
])) idx
++;
1288 /* verify that idx < end_idx, str[idx] should be ']' */
1289 if (idx
> end_idx
|| str
[idx
] != ']') {
1290 raise_errmsg("Expecting object", pystr
, end_idx
);
1293 *next_idx_ptr
= idx
+ 1;
1302 _parse_constant(PyScannerObject
*s
, char *constant
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
) {
1303 /* Read a JSON constant from PyString pystr.
1304 constant is the constant string that was found
1305 ("NaN", "Infinity", "-Infinity").
1306 idx is the index of the first character of the constant
1307 *next_idx_ptr is a return-by-reference index to the first character after
1310 Returns the result of parse_constant
1314 /* constant is "NaN", "Infinity", or "-Infinity" */
1315 cstr
= PyString_InternFromString(constant
);
1319 /* rval = parse_constant(constant) */
1320 rval
= PyObject_CallFunctionObjArgs(s
->parse_constant
, cstr
, NULL
);
1321 idx
+= PyString_GET_SIZE(cstr
);
1323 *next_idx_ptr
= idx
;
1328 _match_number_str(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t start
, Py_ssize_t
*next_idx_ptr
) {
1329 /* Read a JSON number from PyString pystr.
1330 idx is the index of the first character of the number
1331 *next_idx_ptr is a return-by-reference index to the first character after
1334 Returns a new PyObject representation of that number:
1335 PyInt, PyLong, or PyFloat.
1336 May return other types if parse_int or parse_float are set
1338 char *str
= PyString_AS_STRING(pystr
);
1339 Py_ssize_t end_idx
= PyString_GET_SIZE(pystr
) - 1;
1340 Py_ssize_t idx
= start
;
1345 /* read a sign if it's there, make sure it's not the end of the string */
1346 if (str
[idx
] == '-') {
1348 if (idx
> end_idx
) {
1349 PyErr_SetNone(PyExc_StopIteration
);
1354 /* read as many integer digits as we find as long as it doesn't start with 0 */
1355 if (str
[idx
] >= '1' && str
[idx
] <= '9') {
1357 while (idx
<= end_idx
&& str
[idx
] >= '0' && str
[idx
] <= '9') idx
++;
1359 /* if it starts with 0 we only expect one integer digit */
1360 else if (str
[idx
] == '0') {
1363 /* no integer digits, error */
1365 PyErr_SetNone(PyExc_StopIteration
);
1369 /* if the next char is '.' followed by a digit then read all float digits */
1370 if (idx
< end_idx
&& str
[idx
] == '.' && str
[idx
+ 1] >= '0' && str
[idx
+ 1] <= '9') {
1373 while (idx
<= end_idx
&& str
[idx
] >= '0' && str
[idx
] <= '9') idx
++;
1376 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1377 if (idx
< end_idx
&& (str
[idx
] == 'e' || str
[idx
] == 'E')) {
1379 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1380 Py_ssize_t e_start
= idx
;
1383 /* read an exponent sign if present */
1384 if (idx
< end_idx
&& (str
[idx
] == '-' || str
[idx
] == '+')) idx
++;
1386 /* read all digits */
1387 while (idx
<= end_idx
&& str
[idx
] >= '0' && str
[idx
] <= '9') idx
++;
1389 /* if we got a digit, then parse as float. if not, backtrack */
1390 if (str
[idx
- 1] >= '0' && str
[idx
- 1] <= '9') {
1398 /* copy the section we determined to be a number */
1399 numstr
= PyString_FromStringAndSize(&str
[start
], idx
- start
);
1403 /* parse as a float using a fast path if available, otherwise call user defined method */
1404 if (s
->parse_float
!= (PyObject
*)&PyFloat_Type
) {
1405 rval
= PyObject_CallFunctionObjArgs(s
->parse_float
, numstr
, NULL
);
1408 double d
= PyOS_string_to_double(PyString_AS_STRING(numstr
),
1410 if (d
== -1.0 && PyErr_Occurred())
1412 rval
= PyFloat_FromDouble(d
);
1416 /* parse as an int using a fast path if available, otherwise call user defined method */
1417 if (s
->parse_int
!= (PyObject
*)&PyInt_Type
) {
1418 rval
= PyObject_CallFunctionObjArgs(s
->parse_int
, numstr
, NULL
);
1421 rval
= PyInt_FromString(PyString_AS_STRING(numstr
), NULL
, 10);
1425 *next_idx_ptr
= idx
;
1430 _match_number_unicode(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t start
, Py_ssize_t
*next_idx_ptr
) {
1431 /* Read a JSON number from PyUnicode pystr.
1432 idx is the index of the first character of the number
1433 *next_idx_ptr is a return-by-reference index to the first character after
1436 Returns a new PyObject representation of that number:
1437 PyInt, PyLong, or PyFloat.
1438 May return other types if parse_int or parse_float are set
1440 Py_UNICODE
*str
= PyUnicode_AS_UNICODE(pystr
);
1441 Py_ssize_t end_idx
= PyUnicode_GET_SIZE(pystr
) - 1;
1442 Py_ssize_t idx
= start
;
1447 /* read a sign if it's there, make sure it's not the end of the string */
1448 if (str
[idx
] == '-') {
1450 if (idx
> end_idx
) {
1451 PyErr_SetNone(PyExc_StopIteration
);
1456 /* read as many integer digits as we find as long as it doesn't start with 0 */
1457 if (str
[idx
] >= '1' && str
[idx
] <= '9') {
1459 while (idx
<= end_idx
&& str
[idx
] >= '0' && str
[idx
] <= '9') idx
++;
1461 /* if it starts with 0 we only expect one integer digit */
1462 else if (str
[idx
] == '0') {
1465 /* no integer digits, error */
1467 PyErr_SetNone(PyExc_StopIteration
);
1471 /* if the next char is '.' followed by a digit then read all float digits */
1472 if (idx
< end_idx
&& str
[idx
] == '.' && str
[idx
+ 1] >= '0' && str
[idx
+ 1] <= '9') {
1475 while (idx
<= end_idx
&& str
[idx
] >= '0' && str
[idx
] <= '9') idx
++;
1478 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1479 if (idx
< end_idx
&& (str
[idx
] == 'e' || str
[idx
] == 'E')) {
1480 Py_ssize_t e_start
= idx
;
1483 /* read an exponent sign if present */
1484 if (idx
< end_idx
&& (str
[idx
] == '-' || str
[idx
] == '+')) idx
++;
1486 /* read all digits */
1487 while (idx
<= end_idx
&& str
[idx
] >= '0' && str
[idx
] <= '9') idx
++;
1489 /* if we got a digit, then parse as float. if not, backtrack */
1490 if (str
[idx
- 1] >= '0' && str
[idx
- 1] <= '9') {
1498 /* copy the section we determined to be a number */
1499 numstr
= PyUnicode_FromUnicode(&str
[start
], idx
- start
);
1503 /* parse as a float using a fast path if available, otherwise call user defined method */
1504 if (s
->parse_float
!= (PyObject
*)&PyFloat_Type
) {
1505 rval
= PyObject_CallFunctionObjArgs(s
->parse_float
, numstr
, NULL
);
1508 rval
= PyFloat_FromString(numstr
, NULL
);
1512 /* no fast path for unicode -> int, just call */
1513 rval
= PyObject_CallFunctionObjArgs(s
->parse_int
, numstr
, NULL
);
1516 *next_idx_ptr
= idx
;
1521 scan_once_str(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
)
1523 /* Read one JSON term (of any kind) from PyString pystr.
1524 idx is the index of the first character of the term
1525 *next_idx_ptr is a return-by-reference index to the first character after
1528 Returns a new PyObject representation of the term.
1530 char *str
= PyString_AS_STRING(pystr
);
1531 Py_ssize_t length
= PyString_GET_SIZE(pystr
);
1532 if (idx
>= length
) {
1533 PyErr_SetNone(PyExc_StopIteration
);
1539 return scanstring_str(pystr
, idx
+ 1,
1540 PyString_AS_STRING(s
->encoding
),
1541 PyObject_IsTrue(s
->strict
),
1545 return _parse_object_str(s
, pystr
, idx
+ 1, next_idx_ptr
);
1548 return _parse_array_str(s
, pystr
, idx
+ 1, next_idx_ptr
);
1551 if ((idx
+ 3 < length
) && str
[idx
+ 1] == 'u' && str
[idx
+ 2] == 'l' && str
[idx
+ 3] == 'l') {
1553 *next_idx_ptr
= idx
+ 4;
1559 if ((idx
+ 3 < length
) && str
[idx
+ 1] == 'r' && str
[idx
+ 2] == 'u' && str
[idx
+ 3] == 'e') {
1561 *next_idx_ptr
= idx
+ 4;
1567 if ((idx
+ 4 < length
) && str
[idx
+ 1] == 'a' && str
[idx
+ 2] == 'l' && str
[idx
+ 3] == 's' && str
[idx
+ 4] == 'e') {
1568 Py_INCREF(Py_False
);
1569 *next_idx_ptr
= idx
+ 5;
1575 if ((idx
+ 2 < length
) && str
[idx
+ 1] == 'a' && str
[idx
+ 2] == 'N') {
1576 return _parse_constant(s
, "NaN", idx
, next_idx_ptr
);
1581 if ((idx
+ 7 < length
) && str
[idx
+ 1] == 'n' && str
[idx
+ 2] == 'f' && str
[idx
+ 3] == 'i' && str
[idx
+ 4] == 'n' && str
[idx
+ 5] == 'i' && str
[idx
+ 6] == 't' && str
[idx
+ 7] == 'y') {
1582 return _parse_constant(s
, "Infinity", idx
, next_idx_ptr
);
1587 if ((idx
+ 8 < length
) && str
[idx
+ 1] == 'I' && str
[idx
+ 2] == 'n' && str
[idx
+ 3] == 'f' && str
[idx
+ 4] == 'i' && str
[idx
+ 5] == 'n' && str
[idx
+ 6] == 'i' && str
[idx
+ 7] == 't' && str
[idx
+ 8] == 'y') {
1588 return _parse_constant(s
, "-Infinity", idx
, next_idx_ptr
);
1592 /* Didn't find a string, object, array, or named constant. Look for a number. */
1593 return _match_number_str(s
, pystr
, idx
, next_idx_ptr
);
1597 scan_once_unicode(PyScannerObject
*s
, PyObject
*pystr
, Py_ssize_t idx
, Py_ssize_t
*next_idx_ptr
)
1599 /* Read one JSON term (of any kind) from PyUnicode pystr.
1600 idx is the index of the first character of the term
1601 *next_idx_ptr is a return-by-reference index to the first character after
1604 Returns a new PyObject representation of the term.
1606 Py_UNICODE
*str
= PyUnicode_AS_UNICODE(pystr
);
1607 Py_ssize_t length
= PyUnicode_GET_SIZE(pystr
);
1608 if (idx
>= length
) {
1609 PyErr_SetNone(PyExc_StopIteration
);
1615 return scanstring_unicode(pystr
, idx
+ 1,
1616 PyObject_IsTrue(s
->strict
),
1620 return _parse_object_unicode(s
, pystr
, idx
+ 1, next_idx_ptr
);
1623 return _parse_array_unicode(s
, pystr
, idx
+ 1, next_idx_ptr
);
1626 if ((idx
+ 3 < length
) && str
[idx
+ 1] == 'u' && str
[idx
+ 2] == 'l' && str
[idx
+ 3] == 'l') {
1628 *next_idx_ptr
= idx
+ 4;
1634 if ((idx
+ 3 < length
) && str
[idx
+ 1] == 'r' && str
[idx
+ 2] == 'u' && str
[idx
+ 3] == 'e') {
1636 *next_idx_ptr
= idx
+ 4;
1642 if ((idx
+ 4 < length
) && str
[idx
+ 1] == 'a' && str
[idx
+ 2] == 'l' && str
[idx
+ 3] == 's' && str
[idx
+ 4] == 'e') {
1643 Py_INCREF(Py_False
);
1644 *next_idx_ptr
= idx
+ 5;
1650 if ((idx
+ 2 < length
) && str
[idx
+ 1] == 'a' && str
[idx
+ 2] == 'N') {
1651 return _parse_constant(s
, "NaN", idx
, next_idx_ptr
);
1656 if ((idx
+ 7 < length
) && str
[idx
+ 1] == 'n' && str
[idx
+ 2] == 'f' && str
[idx
+ 3] == 'i' && str
[idx
+ 4] == 'n' && str
[idx
+ 5] == 'i' && str
[idx
+ 6] == 't' && str
[idx
+ 7] == 'y') {
1657 return _parse_constant(s
, "Infinity", idx
, next_idx_ptr
);
1662 if ((idx
+ 8 < length
) && str
[idx
+ 1] == 'I' && str
[idx
+ 2] == 'n' && str
[idx
+ 3] == 'f' && str
[idx
+ 4] == 'i' && str
[idx
+ 5] == 'n' && str
[idx
+ 6] == 'i' && str
[idx
+ 7] == 't' && str
[idx
+ 8] == 'y') {
1663 return _parse_constant(s
, "-Infinity", idx
, next_idx_ptr
);
1667 /* Didn't find a string, object, array, or named constant. Look for a number. */
1668 return _match_number_unicode(s
, pystr
, idx
, next_idx_ptr
);
1672 scanner_call(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1674 /* Python callable interface to scan_once_{str,unicode} */
1678 Py_ssize_t next_idx
= -1;
1679 static char *kwlist
[] = {"string", "idx", NULL
};
1681 assert(PyScanner_Check(self
));
1682 s
= (PyScannerObject
*)self
;
1683 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "OO&:scan_once", kwlist
, &pystr
, _convertPyInt_AsSsize_t
, &idx
))
1686 if (PyString_Check(pystr
)) {
1687 rval
= scan_once_str(s
, pystr
, idx
, &next_idx
);
1689 else if (PyUnicode_Check(pystr
)) {
1690 rval
= scan_once_unicode(s
, pystr
, idx
, &next_idx
);
1693 PyErr_Format(PyExc_TypeError
,
1694 "first argument must be a string, not %.80s",
1695 Py_TYPE(pystr
)->tp_name
);
1698 return _build_rval_index_tuple(rval
, next_idx
);
1702 scanner_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1705 s
= (PyScannerObject
*)type
->tp_alloc(type
, 0);
1709 s
->object_hook
= NULL
;
1710 s
->pairs_hook
= NULL
;
1711 s
->parse_float
= NULL
;
1712 s
->parse_int
= NULL
;
1713 s
->parse_constant
= NULL
;
1715 return (PyObject
*)s
;
1719 scanner_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1721 /* Initialize Scanner object */
1723 static char *kwlist
[] = {"context", NULL
};
1726 assert(PyScanner_Check(self
));
1727 s
= (PyScannerObject
*)self
;
1729 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "O:make_scanner", kwlist
, &ctx
))
1732 /* PyString_AS_STRING is used on encoding */
1733 s
->encoding
= PyObject_GetAttrString(ctx
, "encoding");
1734 if (s
->encoding
== NULL
)
1736 if (s
->encoding
== Py_None
) {
1738 s
->encoding
= PyString_InternFromString(DEFAULT_ENCODING
);
1740 else if (PyUnicode_Check(s
->encoding
)) {
1741 PyObject
*tmp
= PyUnicode_AsEncodedString(s
->encoding
, NULL
, NULL
);
1742 Py_DECREF(s
->encoding
);
1745 if (s
->encoding
== NULL
|| !PyString_Check(s
->encoding
))
1748 /* All of these will fail "gracefully" so we don't need to verify them */
1749 s
->strict
= PyObject_GetAttrString(ctx
, "strict");
1750 if (s
->strict
== NULL
)
1752 s
->object_hook
= PyObject_GetAttrString(ctx
, "object_hook");
1753 if (s
->object_hook
== NULL
)
1755 s
->pairs_hook
= PyObject_GetAttrString(ctx
, "object_pairs_hook");
1756 if (s
->object_hook
== NULL
)
1758 s
->parse_float
= PyObject_GetAttrString(ctx
, "parse_float");
1759 if (s
->parse_float
== NULL
)
1761 s
->parse_int
= PyObject_GetAttrString(ctx
, "parse_int");
1762 if (s
->parse_int
== NULL
)
1764 s
->parse_constant
= PyObject_GetAttrString(ctx
, "parse_constant");
1765 if (s
->parse_constant
== NULL
)
1771 Py_CLEAR(s
->encoding
);
1772 Py_CLEAR(s
->strict
);
1773 Py_CLEAR(s
->object_hook
);
1774 Py_CLEAR(s
->pairs_hook
);
1775 Py_CLEAR(s
->parse_float
);
1776 Py_CLEAR(s
->parse_int
);
1777 Py_CLEAR(s
->parse_constant
);
1781 PyDoc_STRVAR(scanner_doc
, "JSON scanner object");
1784 PyTypeObject PyScannerType
= {
1785 PyObject_HEAD_INIT(NULL
)
1786 0, /* tp_internal */
1787 "_json.Scanner", /* tp_name */
1788 sizeof(PyScannerObject
), /* tp_basicsize */
1789 0, /* tp_itemsize */
1790 scanner_dealloc
, /* tp_dealloc */
1796 0, /* tp_as_number */
1797 0, /* tp_as_sequence */
1798 0, /* tp_as_mapping */
1800 scanner_call
, /* tp_call */
1802 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1803 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1804 0, /* tp_as_buffer */
1805 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
1806 scanner_doc
, /* tp_doc */
1807 scanner_traverse
, /* tp_traverse */
1808 scanner_clear
, /* tp_clear */
1809 0, /* tp_richcompare */
1810 0, /* tp_weaklistoffset */
1812 0, /* tp_iternext */
1814 scanner_members
, /* tp_members */
1818 0, /* tp_descr_get */
1819 0, /* tp_descr_set */
1820 0, /* tp_dictoffset */
1821 scanner_init
, /* tp_init */
1822 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1823 scanner_new
, /* tp_new */
1824 0,/* PyObject_GC_Del, */ /* tp_free */
1828 encoder_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
1831 s
= (PyEncoderObject
*)type
->tp_alloc(type
, 0);
1834 s
->defaultfn
= NULL
;
1837 s
->key_separator
= NULL
;
1838 s
->item_separator
= NULL
;
1839 s
->sort_keys
= NULL
;
1842 return (PyObject
*)s
;
1846 encoder_init(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1848 /* initialize Encoder object */
1849 static char *kwlist
[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL
};
1852 PyObject
*markers
, *defaultfn
, *encoder
, *indent
, *key_separator
;
1853 PyObject
*item_separator
, *sort_keys
, *skipkeys
, *allow_nan
;
1855 assert(PyEncoder_Check(self
));
1856 s
= (PyEncoderObject
*)self
;
1858 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "OOOOOOOOO:make_encoder", kwlist
,
1859 &markers
, &defaultfn
, &encoder
, &indent
, &key_separator
, &item_separator
,
1860 &sort_keys
, &skipkeys
, &allow_nan
))
1863 s
->markers
= markers
;
1864 s
->defaultfn
= defaultfn
;
1865 s
->encoder
= encoder
;
1867 s
->key_separator
= key_separator
;
1868 s
->item_separator
= item_separator
;
1869 s
->sort_keys
= sort_keys
;
1870 s
->skipkeys
= skipkeys
;
1871 s
->fast_encode
= (PyCFunction_Check(s
->encoder
) && PyCFunction_GetFunction(s
->encoder
) == (PyCFunction
)py_encode_basestring_ascii
);
1872 s
->allow_nan
= PyObject_IsTrue(allow_nan
);
1874 Py_INCREF(s
->markers
);
1875 Py_INCREF(s
->defaultfn
);
1876 Py_INCREF(s
->encoder
);
1877 Py_INCREF(s
->indent
);
1878 Py_INCREF(s
->key_separator
);
1879 Py_INCREF(s
->item_separator
);
1880 Py_INCREF(s
->sort_keys
);
1881 Py_INCREF(s
->skipkeys
);
1886 encoder_call(PyObject
*self
, PyObject
*args
, PyObject
*kwds
)
1888 /* Python callable interface to encode_listencode_obj */
1889 static char *kwlist
[] = {"obj", "_current_indent_level", NULL
};
1892 Py_ssize_t indent_level
;
1894 assert(PyEncoder_Check(self
));
1895 s
= (PyEncoderObject
*)self
;
1896 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "OO&:_iterencode", kwlist
,
1897 &obj
, _convertPyInt_AsSsize_t
, &indent_level
))
1899 rval
= PyList_New(0);
1902 if (encoder_listencode_obj(s
, rval
, obj
, indent_level
)) {
1910 _encoded_const(PyObject
*obj
)
1912 /* Return the JSON string representation of None, True, False */
1913 if (obj
== Py_None
) {
1914 static PyObject
*s_null
= NULL
;
1915 if (s_null
== NULL
) {
1916 s_null
= PyString_InternFromString("null");
1921 else if (obj
== Py_True
) {
1922 static PyObject
*s_true
= NULL
;
1923 if (s_true
== NULL
) {
1924 s_true
= PyString_InternFromString("true");
1929 else if (obj
== Py_False
) {
1930 static PyObject
*s_false
= NULL
;
1931 if (s_false
== NULL
) {
1932 s_false
= PyString_InternFromString("false");
1938 PyErr_SetString(PyExc_ValueError
, "not a const");
1944 encoder_encode_float(PyEncoderObject
*s
, PyObject
*obj
)
1946 /* Return the JSON representation of a PyFloat */
1947 double i
= PyFloat_AS_DOUBLE(obj
);
1948 if (!Py_IS_FINITE(i
)) {
1949 if (!s
->allow_nan
) {
1950 PyErr_SetString(PyExc_ValueError
, "Out of range float values are not JSON compliant");
1954 return PyString_FromString("Infinity");
1957 return PyString_FromString("-Infinity");
1960 return PyString_FromString("NaN");
1963 /* Use a better float format here? */
1964 return PyObject_Repr(obj
);
1968 encoder_encode_string(PyEncoderObject
*s
, PyObject
*obj
)
1970 /* Return the JSON representation of a string */
1972 return py_encode_basestring_ascii(NULL
, obj
);
1974 return PyObject_CallFunctionObjArgs(s
->encoder
, obj
, NULL
);
1978 _steal_list_append(PyObject
*lst
, PyObject
*stolen
)
1980 /* Append stolen and then decrement its reference count */
1981 int rval
= PyList_Append(lst
, stolen
);
1987 encoder_listencode_obj(PyEncoderObject
*s
, PyObject
*rval
, PyObject
*obj
, Py_ssize_t indent_level
)
1989 /* Encode Python object obj to a JSON term, rval is a PyList */
1993 if (obj
== Py_None
|| obj
== Py_True
|| obj
== Py_False
) {
1994 PyObject
*cstr
= _encoded_const(obj
);
1997 return _steal_list_append(rval
, cstr
);
1999 else if (PyString_Check(obj
) || PyUnicode_Check(obj
))
2001 PyObject
*encoded
= encoder_encode_string(s
, obj
);
2002 if (encoded
== NULL
)
2004 return _steal_list_append(rval
, encoded
);
2006 else if (PyInt_Check(obj
) || PyLong_Check(obj
)) {
2007 PyObject
*encoded
= PyObject_Str(obj
);
2008 if (encoded
== NULL
)
2010 return _steal_list_append(rval
, encoded
);
2012 else if (PyFloat_Check(obj
)) {
2013 PyObject
*encoded
= encoder_encode_float(s
, obj
);
2014 if (encoded
== NULL
)
2016 return _steal_list_append(rval
, encoded
);
2018 else if (PyList_Check(obj
) || PyTuple_Check(obj
)) {
2019 return encoder_listencode_list(s
, rval
, obj
, indent_level
);
2021 else if (PyDict_Check(obj
)) {
2022 return encoder_listencode_dict(s
, rval
, obj
, indent_level
);
2025 PyObject
*ident
= NULL
;
2026 if (s
->markers
!= Py_None
) {
2028 ident
= PyLong_FromVoidPtr(obj
);
2031 has_key
= PyDict_Contains(s
->markers
, ident
);
2034 PyErr_SetString(PyExc_ValueError
, "Circular reference detected");
2038 if (PyDict_SetItem(s
->markers
, ident
, obj
)) {
2043 newobj
= PyObject_CallFunctionObjArgs(s
->defaultfn
, obj
, NULL
);
2044 if (newobj
== NULL
) {
2048 rv
= encoder_listencode_obj(s
, rval
, newobj
, indent_level
);
2054 if (ident
!= NULL
) {
2055 if (PyDict_DelItem(s
->markers
, ident
)) {
2066 encoder_listencode_dict(PyEncoderObject
*s
, PyObject
*rval
, PyObject
*dct
, Py_ssize_t indent_level
)
2068 /* Encode Python dict dct a JSON term, rval is a PyList */
2069 static PyObject
*open_dict
= NULL
;
2070 static PyObject
*close_dict
= NULL
;
2071 static PyObject
*empty_dict
= NULL
;
2072 PyObject
*kstr
= NULL
;
2073 PyObject
*ident
= NULL
;
2074 PyObject
*key
, *value
;
2079 if (open_dict
== NULL
|| close_dict
== NULL
|| empty_dict
== NULL
) {
2080 open_dict
= PyString_InternFromString("{");
2081 close_dict
= PyString_InternFromString("}");
2082 empty_dict
= PyString_InternFromString("{}");
2083 if (open_dict
== NULL
|| close_dict
== NULL
|| empty_dict
== NULL
)
2086 if (PyDict_Size(dct
) == 0)
2087 return PyList_Append(rval
, empty_dict
);
2089 if (s
->markers
!= Py_None
) {
2091 ident
= PyLong_FromVoidPtr(dct
);
2094 has_key
= PyDict_Contains(s
->markers
, ident
);
2097 PyErr_SetString(PyExc_ValueError
, "Circular reference detected");
2100 if (PyDict_SetItem(s
->markers
, ident
, dct
)) {
2105 if (PyList_Append(rval
, open_dict
))
2108 if (s
->indent
!= Py_None
) {
2109 /* TODO: DOES NOT RUN */
2112 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2113 separator = _item_separator + newline_indent
2114 buf += newline_indent
2118 /* TODO: C speedup not implemented for sort_keys */
2121 skipkeys
= PyObject_IsTrue(s
->skipkeys
);
2123 while (PyDict_Next(dct
, &pos
, &key
, &value
)) {
2126 if (PyString_Check(key
) || PyUnicode_Check(key
)) {
2130 else if (PyFloat_Check(key
)) {
2131 kstr
= encoder_encode_float(s
, key
);
2135 else if (PyInt_Check(key
) || PyLong_Check(key
)) {
2136 kstr
= PyObject_Str(key
);
2140 else if (key
== Py_True
|| key
== Py_False
|| key
== Py_None
) {
2141 kstr
= _encoded_const(key
);
2145 else if (skipkeys
) {
2149 /* TODO: include repr of key */
2150 PyErr_SetString(PyExc_ValueError
, "keys must be a string");
2155 if (PyList_Append(rval
, s
->item_separator
))
2159 encoded
= encoder_encode_string(s
, kstr
);
2161 if (encoded
== NULL
)
2163 if (PyList_Append(rval
, encoded
)) {
2168 if (PyList_Append(rval
, s
->key_separator
))
2170 if (encoder_listencode_obj(s
, rval
, value
, indent_level
))
2174 if (ident
!= NULL
) {
2175 if (PyDict_DelItem(s
->markers
, ident
))
2179 if (s
->indent
!= Py_None
) {
2180 /* TODO: DOES NOT RUN */
2183 yield '\n' + (' ' * (_indent * _current_indent_level))
2186 if (PyList_Append(rval
, close_dict
))
2198 encoder_listencode_list(PyEncoderObject
*s
, PyObject
*rval
, PyObject
*seq
, Py_ssize_t indent_level
)
2200 /* Encode Python list seq to a JSON term, rval is a PyList */
2201 static PyObject
*open_array
= NULL
;
2202 static PyObject
*close_array
= NULL
;
2203 static PyObject
*empty_array
= NULL
;
2204 PyObject
*ident
= NULL
;
2205 PyObject
*s_fast
= NULL
;
2206 Py_ssize_t num_items
;
2207 PyObject
**seq_items
;
2210 if (open_array
== NULL
|| close_array
== NULL
|| empty_array
== NULL
) {
2211 open_array
= PyString_InternFromString("[");
2212 close_array
= PyString_InternFromString("]");
2213 empty_array
= PyString_InternFromString("[]");
2214 if (open_array
== NULL
|| close_array
== NULL
|| empty_array
== NULL
)
2218 s_fast
= PySequence_Fast(seq
, "_iterencode_list needs a sequence");
2221 num_items
= PySequence_Fast_GET_SIZE(s_fast
);
2222 if (num_items
== 0) {
2224 return PyList_Append(rval
, empty_array
);
2227 if (s
->markers
!= Py_None
) {
2229 ident
= PyLong_FromVoidPtr(seq
);
2232 has_key
= PyDict_Contains(s
->markers
, ident
);
2235 PyErr_SetString(PyExc_ValueError
, "Circular reference detected");
2238 if (PyDict_SetItem(s
->markers
, ident
, seq
)) {
2243 seq_items
= PySequence_Fast_ITEMS(s_fast
);
2244 if (PyList_Append(rval
, open_array
))
2246 if (s
->indent
!= Py_None
) {
2247 /* TODO: DOES NOT RUN */
2250 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2251 separator = _item_separator + newline_indent
2252 buf += newline_indent
2255 for (i
= 0; i
< num_items
; i
++) {
2256 PyObject
*obj
= seq_items
[i
];
2258 if (PyList_Append(rval
, s
->item_separator
))
2261 if (encoder_listencode_obj(s
, rval
, obj
, indent_level
))
2264 if (ident
!= NULL
) {
2265 if (PyDict_DelItem(s
->markers
, ident
))
2269 if (s
->indent
!= Py_None
) {
2270 /* TODO: DOES NOT RUN */
2273 yield '\n' + (' ' * (_indent * _current_indent_level))
2276 if (PyList_Append(rval
, close_array
))
2288 encoder_dealloc(PyObject
*self
)
2290 /* Deallocate Encoder */
2291 encoder_clear(self
);
2292 Py_TYPE(self
)->tp_free(self
);
2296 encoder_traverse(PyObject
*self
, visitproc visit
, void *arg
)
2299 assert(PyEncoder_Check(self
));
2300 s
= (PyEncoderObject
*)self
;
2301 Py_VISIT(s
->markers
);
2302 Py_VISIT(s
->defaultfn
);
2303 Py_VISIT(s
->encoder
);
2304 Py_VISIT(s
->indent
);
2305 Py_VISIT(s
->key_separator
);
2306 Py_VISIT(s
->item_separator
);
2307 Py_VISIT(s
->sort_keys
);
2308 Py_VISIT(s
->skipkeys
);
2313 encoder_clear(PyObject
*self
)
2315 /* Deallocate Encoder */
2317 assert(PyEncoder_Check(self
));
2318 s
= (PyEncoderObject
*)self
;
2319 Py_CLEAR(s
->markers
);
2320 Py_CLEAR(s
->defaultfn
);
2321 Py_CLEAR(s
->encoder
);
2322 Py_CLEAR(s
->indent
);
2323 Py_CLEAR(s
->key_separator
);
2324 Py_CLEAR(s
->item_separator
);
2325 Py_CLEAR(s
->sort_keys
);
2326 Py_CLEAR(s
->skipkeys
);
2330 PyDoc_STRVAR(encoder_doc
, "_iterencode(obj, _current_indent_level) -> iterable");
2333 PyTypeObject PyEncoderType
= {
2334 PyObject_HEAD_INIT(NULL
)
2335 0, /* tp_internal */
2336 "_json.Encoder", /* tp_name */
2337 sizeof(PyEncoderObject
), /* tp_basicsize */
2338 0, /* tp_itemsize */
2339 encoder_dealloc
, /* tp_dealloc */
2345 0, /* tp_as_number */
2346 0, /* tp_as_sequence */
2347 0, /* tp_as_mapping */
2349 encoder_call
, /* tp_call */
2351 0, /* tp_getattro */
2352 0, /* tp_setattro */
2353 0, /* tp_as_buffer */
2354 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_HAVE_GC
, /* tp_flags */
2355 encoder_doc
, /* tp_doc */
2356 encoder_traverse
, /* tp_traverse */
2357 encoder_clear
, /* tp_clear */
2358 0, /* tp_richcompare */
2359 0, /* tp_weaklistoffset */
2361 0, /* tp_iternext */
2363 encoder_members
, /* tp_members */
2367 0, /* tp_descr_get */
2368 0, /* tp_descr_set */
2369 0, /* tp_dictoffset */
2370 encoder_init
, /* tp_init */
2372 encoder_new
, /* tp_new */
2376 static PyMethodDef speedups_methods
[] = {
2377 {"encode_basestring_ascii",
2378 (PyCFunction
)py_encode_basestring_ascii
,
2380 pydoc_encode_basestring_ascii
},
2382 (PyCFunction
)py_scanstring
,
2385 {NULL
, NULL
, 0, NULL
}
2388 PyDoc_STRVAR(module_doc
,
2395 PyScannerType
.tp_new
= PyType_GenericNew
;
2396 if (PyType_Ready(&PyScannerType
) < 0)
2398 PyEncoderType
.tp_new
= PyType_GenericNew
;
2399 if (PyType_Ready(&PyEncoderType
) < 0)
2401 m
= Py_InitModule3("_json", speedups_methods
, module_doc
);
2402 Py_INCREF((PyObject
*)&PyScannerType
);
2403 PyModule_AddObject(m
, "make_scanner", (PyObject
*)&PyScannerType
);
2404 Py_INCREF((PyObject
*)&PyEncoderType
);
2405 PyModule_AddObject(m
, "make_encoder", (PyObject
*)&PyEncoderType
);