1 """Implementation of JSONDecoder
7 from simplejson
.scanner
import make_scanner
9 from simplejson
._speedups
import scanstring
as c_scanstring
13 __all__
= ['JSONDecoder']
15 FLAGS
= re
.VERBOSE | re
.MULTILINE | re
.DOTALL
17 def _floatconstants():
18 _BYTES
= '7FF80000000000007FF0000000000000'.decode('hex')
19 # The struct module in Python 2.4 would get frexp() out of range here
20 # when an endian is specified in the format string. Fixed in Python 2.5+
21 if sys
.byteorder
!= 'big':
22 _BYTES
= _BYTES
[:8][::-1] + _BYTES
[8:][::-1]
23 nan
, inf
= struct
.unpack('dd', _BYTES
)
26 NaN
, PosInf
, NegInf
= _floatconstants()
29 class JSONDecodeError(ValueError):
30 """Subclass of ValueError with the following additional properties:
32 msg: The unformatted error message
33 doc: The JSON document being parsed
34 pos: The start index of doc where parsing failed
35 end: The end index of doc where parsing failed (may be None)
36 lineno: The line corresponding to pos
37 colno: The column corresponding to pos
38 endlineno: The line corresponding to end (may be None)
39 endcolno: The column corresponding to end (may be None)
42 def __init__(self
, msg
, doc
, pos
, end
=None):
43 ValueError.__init
__(self
, errmsg(msg
, doc
, pos
, end
=end
))
48 self
.lineno
, self
.colno
= linecol(doc
, pos
)
50 self
.endlineno
, self
.endcolno
= linecol(doc
, pos
)
52 self
.endlineno
, self
.endcolno
= None, None
55 def linecol(doc
, pos
):
56 lineno
= doc
.count('\n', 0, pos
) + 1
60 colno
= pos
- doc
.rindex('\n', 0, pos
)
64 def errmsg(msg
, doc
, pos
, end
=None):
65 # Note that this function is called from _speedups
66 lineno
, colno
= linecol(doc
, pos
)
68 #fmt = '{0}: line {1} column {2} (char {3})'
69 #return fmt.format(msg, lineno, colno, pos)
70 fmt
= '%s: line %d column %d (char %d)'
71 return fmt
% (msg
, lineno
, colno
, pos
)
72 endlineno
, endcolno
= linecol(doc
, end
)
73 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
74 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
75 fmt
= '%s: line %d column %d - line %d column %d (char %d - %d)'
76 return fmt
% (msg
, lineno
, colno
, endlineno
, endcolno
, pos
, end
)
85 STRINGCHUNK
= re
.compile(r
'(.*?)(["\\\x00-\x1f])', FLAGS
)
87 '"': u
'"', '\\': u
'\\', '/': u
'/',
88 'b': u
'\b', 'f': u
'\f', 'n': u
'\n', 'r': u
'\r', 't': u
'\t',
91 DEFAULT_ENCODING
= "utf-8"
93 def py_scanstring(s
, end
, encoding
=None, strict
=True,
94 _b
=BACKSLASH
, _m
=STRINGCHUNK
.match
):
95 """Scan the string s for a JSON string. End is the index of the
96 character in s after the quote that started the JSON string.
97 Unescapes all valid JSON string escape sequences and raises ValueError
98 on attempt to decode an invalid string. If strict is False then literal
99 control characters are allowed in the string.
101 Returns a tuple of the decoded string and the index of the character in s
102 after the end quote."""
104 encoding
= DEFAULT_ENCODING
106 _append
= chunks
.append
111 raise JSONDecodeError(
112 "Unterminated string starting at", s
, begin
)
114 content
, terminator
= chunk
.groups()
115 # Content is contains zero or more unescaped string characters
117 if not isinstance(content
, unicode):
118 content
= unicode(content
, encoding
)
120 # Terminator is the end of string, a literal control character,
121 # or a backslash denoting that an escape sequence follows
122 if terminator
== '"':
124 elif terminator
!= '\\':
126 msg
= "Invalid control character %r at" % (terminator
,)
127 #msg = "Invalid control character {0!r} at".format(terminator)
128 raise JSONDecodeError(msg
, s
, end
)
135 raise JSONDecodeError(
136 "Unterminated string starting at", s
, begin
)
137 # If not a unicode escape sequence, must be in the lookup table
142 msg
= "Invalid \\escape: " + repr(esc
)
143 raise JSONDecodeError(msg
, s
, end
)
146 # Unicode escape sequence
147 esc
= s
[end
+ 1:end
+ 5]
150 msg
= "Invalid \\uXXXX escape"
151 raise JSONDecodeError(msg
, s
, end
)
153 # Check for surrogate pair on UCS-4 systems
154 if 0xd800 <= uni
<= 0xdbff and sys
.maxunicode
> 65535:
155 msg
= "Invalid \\uXXXX\\uXXXX surrogate pair"
156 if not s
[end
+ 5:end
+ 7] == '\\u':
157 raise JSONDecodeError(msg
, s
, end
)
158 esc2
= s
[end
+ 7:end
+ 11]
160 raise JSONDecodeError(msg
, s
, end
)
162 uni
= 0x10000 + (((uni
- 0xd800) << 10) |
(uni2
- 0xdc00))
166 # Append the unescaped character
168 return u
''.join(chunks
), end
171 # Use speedup if available
172 scanstring
= c_scanstring
or py_scanstring
174 WHITESPACE
= re
.compile(r
'[ \t\n\r]*', FLAGS
)
175 WHITESPACE_STR
= ' \t\n\r'
177 def JSONObject((s
, end
), encoding
, strict
, scan_once
, object_hook
,
178 object_pairs_hook
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
180 # Use a slice to prevent IndexError from being raised, the following
181 # check will raise a more specific ValueError if the string is empty
182 nextchar
= s
[end
:end
+ 1]
183 # Normally we expect nextchar == '"'
186 end
= _w(s
, end
).end()
187 nextchar
= s
[end
:end
+ 1]
188 # Trivial empty object
190 if object_pairs_hook
is not None:
191 result
= object_pairs_hook(pairs
)
194 if object_hook
is not None:
195 pairs
= object_hook(pairs
)
196 return pairs
, end
+ 1
197 elif nextchar
!= '"':
198 raise JSONDecodeError("Expecting property name", s
, end
)
201 key
, end
= scanstring(s
, end
, encoding
, strict
)
203 # To skip some function call overhead we optimize the fast paths where
204 # the JSON key separator is ": " or just ":".
205 if s
[end
:end
+ 1] != ':':
206 end
= _w(s
, end
).end()
207 if s
[end
:end
+ 1] != ':':
208 raise JSONDecodeError("Expecting : delimiter", s
, end
)
216 end
= _w(s
, end
+ 1).end()
221 value
, end
= scan_once(s
, end
)
222 except StopIteration:
223 raise JSONDecodeError("Expecting object", s
, end
)
224 pairs
.append((key
, value
))
229 end
= _w(s
, end
+ 1).end()
237 elif nextchar
!= ',':
238 raise JSONDecodeError("Expecting , delimiter", s
, end
- 1)
246 end
= _w(s
, end
+ 1).end()
253 raise JSONDecodeError("Expecting property name", s
, end
- 1)
255 if object_pairs_hook
is not None:
256 result
= object_pairs_hook(pairs
)
259 if object_hook
is not None:
260 pairs
= object_hook(pairs
)
263 def JSONArray((s
, end
), scan_once
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
265 nextchar
= s
[end
:end
+ 1]
267 end
= _w(s
, end
+ 1).end()
268 nextchar
= s
[end
:end
+ 1]
269 # Look-ahead for trivial empty array
271 return values
, end
+ 1
272 _append
= values
.append
275 value
, end
= scan_once(s
, end
)
276 except StopIteration:
277 raise JSONDecodeError("Expecting object", s
, end
)
279 nextchar
= s
[end
:end
+ 1]
281 end
= _w(s
, end
+ 1).end()
282 nextchar
= s
[end
:end
+ 1]
286 elif nextchar
!= ',':
287 raise JSONDecodeError("Expecting , delimiter", s
, end
)
293 end
= _w(s
, end
+ 1).end()
299 class JSONDecoder(object):
300 """Simple JSON <http://json.org> decoder
302 Performs the following translations in decoding by default:
304 +---------------+-------------------+
306 +===============+===================+
308 +---------------+-------------------+
310 +---------------+-------------------+
312 +---------------+-------------------+
313 | number (int) | int, long |
314 +---------------+-------------------+
315 | number (real) | float |
316 +---------------+-------------------+
318 +---------------+-------------------+
320 +---------------+-------------------+
322 +---------------+-------------------+
324 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
325 their corresponding ``float`` values, which is outside the JSON spec.
329 def __init__(self
, encoding
=None, object_hook
=None, parse_float
=None,
330 parse_int
=None, parse_constant
=None, strict
=True,
331 object_pairs_hook
=None):
333 *encoding* determines the encoding used to interpret any
334 :class:`str` objects decoded by this instance (``'utf-8'`` by
335 default). It has no effect when decoding :class:`unicode` objects.
337 Note that currently only encodings that are a superset of ASCII work,
338 strings of other encodings should be passed in as :class:`unicode`.
340 *object_hook*, if specified, will be called with the result of every
341 JSON object decoded and its return value will be used in place of the
342 given :class:`dict`. This can be used to provide custom
343 deserializations (e.g. to support JSON-RPC class hinting).
345 *object_pairs_hook* is an optional function that will be called with
346 the result of any object literal decode with an ordered list of pairs.
347 The return value of *object_pairs_hook* will be used instead of the
348 :class:`dict`. This feature can be used to implement custom decoders
349 that rely on the order that the key and value pairs are decoded (for
350 example, :func:`collections.OrderedDict` will remember the order of
351 insertion). If *object_hook* is also defined, the *object_pairs_hook*
354 *parse_float*, if specified, will be called with the string of every
355 JSON float to be decoded. By default, this is equivalent to
356 ``float(num_str)``. This can be used to use another datatype or parser
357 for JSON floats (e.g. :class:`decimal.Decimal`).
359 *parse_int*, if specified, will be called with the string of every
360 JSON int to be decoded. By default, this is equivalent to
361 ``int(num_str)``. This can be used to use another datatype or parser
362 for JSON integers (e.g. :class:`float`).
364 *parse_constant*, if specified, will be called with one of the
365 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
366 can be used to raise an exception if invalid JSON numbers are
369 *strict* controls the parser's behavior when it encounters an
370 invalid control character in a string. The default setting of
371 ``True`` means that unescaped control characters are parse errors, if
372 ``False`` then control characters will be allowed in strings.
375 self
.encoding
= encoding
376 self
.object_hook
= object_hook
377 self
.object_pairs_hook
= object_pairs_hook
378 self
.parse_float
= parse_float
or float
379 self
.parse_int
= parse_int
or int
380 self
.parse_constant
= parse_constant
or _CONSTANTS
.__getitem
__
382 self
.parse_object
= JSONObject
383 self
.parse_array
= JSONArray
384 self
.parse_string
= scanstring
385 self
.scan_once
= make_scanner(self
)
387 def decode(self
, s
, _w
=WHITESPACE
.match
):
388 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
389 instance containing a JSON document)
392 obj
, end
= self
.raw_decode(s
, idx
=_w(s
, 0).end())
393 end
= _w(s
, end
).end()
395 raise JSONDecodeError("Extra data", s
, end
, len(s
))
398 def raw_decode(self
, s
, idx
=0):
399 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
400 beginning with a JSON document) and return a 2-tuple of the Python
401 representation and the index in ``s`` where the document ended.
403 This can be used to decode a JSON document from a string that may
404 have extraneous data at the end.
408 obj
, end
= self
.scan_once(s
, idx
)
409 except StopIteration:
410 raise JSONDecodeError("No JSON object could be decoded", s
, idx
)