1 """Implementation of JSONDecoder
7 from simplejson
.scanner
import make_scanner
9 from simplejson
._speedups
import scanstring
as c_scanstring
13 __all__
= ['JSONDecoder']
15 FLAGS
= re
.VERBOSE | re
.MULTILINE | re
.DOTALL
17 def _floatconstants():
18 _BYTES
= '7FF80000000000007FF0000000000000'.decode('hex')
19 # The struct module in Python 2.4 would get frexp() out of range here
20 # when an endian is specified in the format string. Fixed in Python 2.5+
21 if sys
.byteorder
!= 'big':
22 _BYTES
= _BYTES
[:8][::-1] + _BYTES
[8:][::-1]
23 nan
, inf
= struct
.unpack('dd', _BYTES
)
26 NaN
, PosInf
, NegInf
= _floatconstants()
29 class JSONDecodeError(ValueError):
30 """Subclass of ValueError with the following additional properties:
32 msg: The unformatted error message
33 doc: The JSON document being parsed
34 pos: The start index of doc where parsing failed
35 end: The end index of doc where parsing failed (may be None)
36 lineno: The line corresponding to pos
37 colno: The column corresponding to pos
38 endlineno: The line corresponding to end (may be None)
39 endcolno: The column corresponding to end (may be None)
42 def __init__(self
, msg
, doc
, pos
, end
=None):
43 ValueError.__init
__(self
, errmsg(msg
, doc
, pos
, end
=end
))
48 self
.lineno
, self
.colno
= linecol(doc
, pos
)
50 self
.endlineno
, self
.endcolno
= linecol(doc
, pos
)
52 self
.endlineno
, self
.endcolno
= None, None
55 def linecol(doc
, pos
):
56 lineno
= doc
.count('\n', 0, pos
) + 1
60 colno
= pos
- doc
.rindex('\n', 0, pos
)
64 def errmsg(msg
, doc
, pos
, end
=None):
65 # Note that this function is called from _speedups
66 lineno
, colno
= linecol(doc
, pos
)
68 #fmt = '{0}: line {1} column {2} (char {3})'
69 #return fmt.format(msg, lineno, colno, pos)
70 fmt
= '%s: line %d column %d (char %d)'
71 return fmt
% (msg
, lineno
, colno
, pos
)
72 endlineno
, endcolno
= linecol(doc
, end
)
73 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
74 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
75 fmt
= '%s: line %d column %d - line %d column %d (char %d - %d)'
76 return fmt
% (msg
, lineno
, colno
, endlineno
, endcolno
, pos
, end
)
85 STRINGCHUNK
= re
.compile(r
'(.*?)(["\\\x00-\x1f])', FLAGS
)
87 '"': u
'"', '\\': u
'\\', '/': u
'/',
88 'b': u
'\b', 'f': u
'\f', 'n': u
'\n', 'r': u
'\r', 't': u
'\t',
91 DEFAULT_ENCODING
= "utf-8"
93 def py_scanstring(s
, end
, encoding
=None, strict
=True,
94 _b
=BACKSLASH
, _m
=STRINGCHUNK
.match
):
95 """Scan the string s for a JSON string. End is the index of the
96 character in s after the quote that started the JSON string.
97 Unescapes all valid JSON string escape sequences and raises ValueError
98 on attempt to decode an invalid string. If strict is False then literal
99 control characters are allowed in the string.
101 Returns a tuple of the decoded string and the index of the character in s
102 after the end quote."""
104 encoding
= DEFAULT_ENCODING
106 _append
= chunks
.append
111 raise JSONDecodeError(
112 "Unterminated string starting at", s
, begin
)
114 content
, terminator
= chunk
.groups()
115 # Content is contains zero or more unescaped string characters
117 if not isinstance(content
, unicode):
118 content
= unicode(content
, encoding
)
120 # Terminator is the end of string, a literal control character,
121 # or a backslash denoting that an escape sequence follows
122 if terminator
== '"':
124 elif terminator
!= '\\':
126 msg
= "Invalid control character %r at" % (terminator
,)
127 #msg = "Invalid control character {0!r} at".format(terminator)
128 raise JSONDecodeError(msg
, s
, end
)
135 raise JSONDecodeError(
136 "Unterminated string starting at", s
, begin
)
137 # If not a unicode escape sequence, must be in the lookup table
142 msg
= "Invalid \\escape: " + repr(esc
)
143 raise JSONDecodeError(msg
, s
, end
)
146 # Unicode escape sequence
147 esc
= s
[end
+ 1:end
+ 5]
150 msg
= "Invalid \\uXXXX escape"
151 raise JSONDecodeError(msg
, s
, end
)
153 # Check for surrogate pair on UCS-4 systems
154 if 0xd800 <= uni
<= 0xdbff and sys
.maxunicode
> 65535:
155 msg
= "Invalid \\uXXXX\\uXXXX surrogate pair"
156 if not s
[end
+ 5:end
+ 7] == '\\u':
157 raise JSONDecodeError(msg
, s
, end
)
158 esc2
= s
[end
+ 7:end
+ 11]
160 raise JSONDecodeError(msg
, s
, end
)
162 uni
= 0x10000 + (((uni
- 0xd800) << 10) |
(uni2
- 0xdc00))
166 # Append the unescaped character
168 return u
''.join(chunks
), end
171 # Use speedup if available
172 scanstring
= c_scanstring
or py_scanstring
174 WHITESPACE
= re
.compile(r
'[ \t\n\r]*', FLAGS
)
175 WHITESPACE_STR
= ' \t\n\r'
177 def JSONObject((s
, end
), encoding
, strict
, scan_once
, object_hook
,
178 object_pairs_hook
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
180 # Use a slice to prevent IndexError from being raised, the following
181 # check will raise a more specific ValueError if the string is empty
182 nextchar
= s
[end
:end
+ 1]
183 # Normally we expect nextchar == '"'
186 end
= _w(s
, end
).end()
187 nextchar
= s
[end
:end
+ 1]
188 # Trivial empty object
190 return pairs
, end
+ 1
191 elif nextchar
!= '"':
192 raise JSONDecodeError("Expecting property name", s
, end
)
195 key
, end
= scanstring(s
, end
, encoding
, strict
)
197 # To skip some function call overhead we optimize the fast paths where
198 # the JSON key separator is ": " or just ":".
199 if s
[end
:end
+ 1] != ':':
200 end
= _w(s
, end
).end()
201 if s
[end
:end
+ 1] != ':':
202 raise JSONDecodeError("Expecting : delimiter", s
, end
)
210 end
= _w(s
, end
+ 1).end()
215 value
, end
= scan_once(s
, end
)
216 except StopIteration:
217 raise JSONDecodeError("Expecting object", s
, end
)
218 pairs
.append((key
, value
))
223 end
= _w(s
, end
+ 1).end()
231 elif nextchar
!= ',':
232 raise JSONDecodeError("Expecting , delimiter", s
, end
- 1)
240 end
= _w(s
, end
+ 1).end()
247 raise JSONDecodeError("Expecting property name", s
, end
- 1)
249 if object_pairs_hook
is not None:
250 result
= object_pairs_hook(pairs
)
253 if object_hook
is not None:
254 pairs
= object_hook(pairs
)
257 def JSONArray((s
, end
), scan_once
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
259 nextchar
= s
[end
:end
+ 1]
261 end
= _w(s
, end
+ 1).end()
262 nextchar
= s
[end
:end
+ 1]
263 # Look-ahead for trivial empty array
265 return values
, end
+ 1
266 _append
= values
.append
269 value
, end
= scan_once(s
, end
)
270 except StopIteration:
271 raise JSONDecodeError("Expecting object", s
, end
)
273 nextchar
= s
[end
:end
+ 1]
275 end
= _w(s
, end
+ 1).end()
276 nextchar
= s
[end
:end
+ 1]
280 elif nextchar
!= ',':
281 raise JSONDecodeError("Expecting , delimiter", s
, end
)
287 end
= _w(s
, end
+ 1).end()
293 class JSONDecoder(object):
294 """Simple JSON <http://json.org> decoder
296 Performs the following translations in decoding by default:
298 +---------------+-------------------+
300 +===============+===================+
302 +---------------+-------------------+
304 +---------------+-------------------+
306 +---------------+-------------------+
307 | number (int) | int, long |
308 +---------------+-------------------+
309 | number (real) | float |
310 +---------------+-------------------+
312 +---------------+-------------------+
314 +---------------+-------------------+
316 +---------------+-------------------+
318 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
319 their corresponding ``float`` values, which is outside the JSON spec.
323 def __init__(self
, encoding
=None, object_hook
=None, parse_float
=None,
324 parse_int
=None, parse_constant
=None, strict
=True,
325 object_pairs_hook
=None):
327 *encoding* determines the encoding used to interpret any
328 :class:`str` objects decoded by this instance (``'utf-8'`` by
329 default). It has no effect when decoding :class:`unicode` objects.
331 Note that currently only encodings that are a superset of ASCII work,
332 strings of other encodings should be passed in as :class:`unicode`.
334 *object_hook*, if specified, will be called with the result of every
335 JSON object decoded and its return value will be used in place of the
336 given :class:`dict`. This can be used to provide custom
337 deserializations (e.g. to support JSON-RPC class hinting).
339 *object_pairs_hook* is an optional function that will be called with
340 the result of any object literal decode with an ordered list of pairs.
341 The return value of *object_pairs_hook* will be used instead of the
342 :class:`dict`. This feature can be used to implement custom decoders
343 that rely on the order that the key and value pairs are decoded (for
344 example, :func:`collections.OrderedDict` will remember the order of
345 insertion). If *object_hook* is also defined, the *object_pairs_hook*
348 *parse_float*, if specified, will be called with the string of every
349 JSON float to be decoded. By default, this is equivalent to
350 ``float(num_str)``. This can be used to use another datatype or parser
351 for JSON floats (e.g. :class:`decimal.Decimal`).
353 *parse_int*, if specified, will be called with the string of every
354 JSON int to be decoded. By default, this is equivalent to
355 ``int(num_str)``. This can be used to use another datatype or parser
356 for JSON integers (e.g. :class:`float`).
358 *parse_constant*, if specified, will be called with one of the
359 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
360 can be used to raise an exception if invalid JSON numbers are
363 *strict* controls the parser's behavior when it encounters an
364 invalid control character in a string. The default setting of
365 ``True`` means that unescaped control characters are parse errors, if
366 ``False`` then control characters will be allowed in strings.
369 self
.encoding
= encoding
370 self
.object_hook
= object_hook
371 self
.object_pairs_hook
= object_pairs_hook
372 self
.parse_float
= parse_float
or float
373 self
.parse_int
= parse_int
or int
374 self
.parse_constant
= parse_constant
or _CONSTANTS
.__getitem
__
376 self
.parse_object
= JSONObject
377 self
.parse_array
= JSONArray
378 self
.parse_string
= scanstring
379 self
.scan_once
= make_scanner(self
)
381 def decode(self
, s
, _w
=WHITESPACE
.match
):
382 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
383 instance containing a JSON document)
386 obj
, end
= self
.raw_decode(s
, idx
=_w(s
, 0).end())
387 end
= _w(s
, end
).end()
389 raise JSONDecodeError("Extra data", s
, end
, len(s
))
392 def raw_decode(self
, s
, idx
=0):
393 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
394 beginning with a JSON document) and return a 2-tuple of the Python
395 representation and the index in ``s`` where the document ended.
397 This can be used to decode a JSON document from a string that may
398 have extraneous data at the end.
402 obj
, end
= self
.scan_once(s
, idx
)
403 except StopIteration:
404 raise JSONDecodeError("No JSON object could be decoded", s
, idx
)