1 """Implementation of JSONDecoder
7 from json
.scanner
import make_scanner
9 from _json
import scanstring
as c_scanstring
13 __all__
= ['JSONDecoder']
15 FLAGS
= re
.VERBOSE | re
.MULTILINE | re
.DOTALL
17 def _floatconstants():
18 _BYTES
= '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys
.byteorder
!= 'big':
20 _BYTES
= _BYTES
[:8][::-1] + _BYTES
[8:][::-1]
21 nan
, inf
= struct
.unpack('dd', _BYTES
)
24 NaN
, PosInf
, NegInf
= _floatconstants()
27 def linecol(doc
, pos
):
28 lineno
= doc
.count('\n', 0, pos
) + 1
32 colno
= pos
- doc
.rindex('\n', 0, pos
)
36 def errmsg(msg
, doc
, pos
, end
=None):
37 # Note that this function is called from _json
38 lineno
, colno
= linecol(doc
, pos
)
40 fmt
= '{0}: line {1} column {2} (char {3})'
41 return fmt
.format(msg
, lineno
, colno
, pos
)
42 #fmt = '%s: line %d column %d (char %d)'
43 #return fmt % (msg, lineno, colno, pos)
44 endlineno
, endcolno
= linecol(doc
, end
)
45 fmt
= '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46 return fmt
.format(msg
, lineno
, colno
, endlineno
, endcolno
, pos
, end
)
47 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48 #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
57 STRINGCHUNK
= re
.compile(r
'(.*?)(["\\\x00-\x1f])', FLAGS
)
59 '"': u
'"', '\\': u
'\\', '/': u
'/',
60 'b': u
'\b', 'f': u
'\f', 'n': u
'\n', 'r': u
'\r', 't': u
'\t',
63 DEFAULT_ENCODING
= "utf-8"
65 def py_scanstring(s
, end
, encoding
=None, strict
=True,
66 _b
=BACKSLASH
, _m
=STRINGCHUNK
.match
):
67 """Scan the string s for a JSON string. End is the index of the
68 character in s after the quote that started the JSON string.
69 Unescapes all valid JSON string escape sequences and raises ValueError
70 on attempt to decode an invalid string. If strict is False then literal
71 control characters are allowed in the string.
73 Returns a tuple of the decoded string and the index of the character in s
74 after the end quote."""
76 encoding
= DEFAULT_ENCODING
78 _append
= chunks
.append
84 errmsg("Unterminated string starting at", s
, begin
))
86 content
, terminator
= chunk
.groups()
87 # Content is contains zero or more unescaped string characters
89 if not isinstance(content
, unicode):
90 content
= unicode(content
, encoding
)
92 # Terminator is the end of string, a literal control character,
93 # or a backslash denoting that an escape sequence follows
96 elif terminator
!= '\\':
98 #msg = "Invalid control character %r at" % (terminator,)
99 msg
= "Invalid control character {0!r} at".format(terminator
)
100 raise ValueError(errmsg(msg
, s
, end
))
108 errmsg("Unterminated string starting at", s
, begin
))
109 # If not a unicode escape sequence, must be in the lookup table
114 msg
= "Invalid \\escape: " + repr(esc
)
115 raise ValueError(errmsg(msg
, s
, end
))
118 # Unicode escape sequence
119 esc
= s
[end
+ 1:end
+ 5]
122 msg
= "Invalid \\uXXXX escape"
123 raise ValueError(errmsg(msg
, s
, end
))
125 # Check for surrogate pair on UCS-4 systems
126 if 0xd800 <= uni
<= 0xdbff and sys
.maxunicode
> 65535:
127 msg
= "Invalid \\uXXXX\\uXXXX surrogate pair"
128 if not s
[end
+ 5:end
+ 7] == '\\u':
129 raise ValueError(errmsg(msg
, s
, end
))
130 esc2
= s
[end
+ 7:end
+ 11]
132 raise ValueError(errmsg(msg
, s
, end
))
134 uni
= 0x10000 + (((uni
- 0xd800) << 10) |
(uni2
- 0xdc00))
138 # Append the unescaped character
140 return u
''.join(chunks
), end
143 # Use speedup if available
144 scanstring
= c_scanstring
or py_scanstring
146 WHITESPACE
= re
.compile(r
'[ \t\n\r]*', FLAGS
)
147 WHITESPACE_STR
= ' \t\n\r'
149 def JSONObject(s_and_end
, encoding
, strict
, scan_once
, object_hook
,
150 object_pairs_hook
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
153 pairs_append
= pairs
.append
154 # Use a slice to prevent IndexError from being raised, the following
155 # check will raise a more specific ValueError if the string is empty
156 nextchar
= s
[end
:end
+ 1]
157 # Normally we expect nextchar == '"'
160 end
= _w(s
, end
).end()
161 nextchar
= s
[end
:end
+ 1]
162 # Trivial empty object
164 return pairs
, end
+ 1
165 elif nextchar
!= '"':
166 raise ValueError(errmsg("Expecting property name", s
, end
))
169 key
, end
= scanstring(s
, end
, encoding
, strict
)
171 # To skip some function call overhead we optimize the fast paths where
172 # the JSON key separator is ": " or just ":".
173 if s
[end
:end
+ 1] != ':':
174 end
= _w(s
, end
).end()
175 if s
[end
:end
+ 1] != ':':
176 raise ValueError(errmsg("Expecting : delimiter", s
, end
))
184 end
= _w(s
, end
+ 1).end()
189 value
, end
= scan_once(s
, end
)
190 except StopIteration:
191 raise ValueError(errmsg("Expecting object", s
, end
))
192 pairs_append((key
, value
))
197 end
= _w(s
, end
+ 1).end()
205 elif nextchar
!= ',':
206 raise ValueError(errmsg("Expecting , delimiter", s
, end
- 1))
214 end
= _w(s
, end
+ 1).end()
221 raise ValueError(errmsg("Expecting property name", s
, end
- 1))
223 if object_pairs_hook
is not None:
224 result
= object_pairs_hook(pairs
)
227 if object_hook
is not None:
228 pairs
= object_hook(pairs
)
231 def JSONArray(s_and_end
, scan_once
, _w
=WHITESPACE
.match
, _ws
=WHITESPACE_STR
):
234 nextchar
= s
[end
:end
+ 1]
236 end
= _w(s
, end
+ 1).end()
237 nextchar
= s
[end
:end
+ 1]
238 # Look-ahead for trivial empty array
240 return values
, end
+ 1
241 _append
= values
.append
244 value
, end
= scan_once(s
, end
)
245 except StopIteration:
246 raise ValueError(errmsg("Expecting object", s
, end
))
248 nextchar
= s
[end
:end
+ 1]
250 end
= _w(s
, end
+ 1).end()
251 nextchar
= s
[end
:end
+ 1]
255 elif nextchar
!= ',':
256 raise ValueError(errmsg("Expecting , delimiter", s
, end
))
262 end
= _w(s
, end
+ 1).end()
268 class JSONDecoder(object):
269 """Simple JSON <http://json.org> decoder
271 Performs the following translations in decoding by default:
273 +---------------+-------------------+
275 +===============+===================+
277 +---------------+-------------------+
279 +---------------+-------------------+
281 +---------------+-------------------+
282 | number (int) | int, long |
283 +---------------+-------------------+
284 | number (real) | float |
285 +---------------+-------------------+
287 +---------------+-------------------+
289 +---------------+-------------------+
291 +---------------+-------------------+
293 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
294 their corresponding ``float`` values, which is outside the JSON spec.
298 def __init__(self
, encoding
=None, object_hook
=None, parse_float
=None,
299 parse_int
=None, parse_constant
=None, strict
=True,
300 object_pairs_hook
=None):
301 """``encoding`` determines the encoding used to interpret any ``str``
302 objects decoded by this instance (utf-8 by default). It has no
303 effect when decoding ``unicode`` objects.
305 Note that currently only encodings that are a superset of ASCII work,
306 strings of other encodings should be passed in as ``unicode``.
308 ``object_hook``, if specified, will be called with the result
309 of every JSON object decoded and its return value will be used in
310 place of the given ``dict``. This can be used to provide custom
311 deserializations (e.g. to support JSON-RPC class hinting).
313 ``parse_float``, if specified, will be called with the string
314 of every JSON float to be decoded. By default this is equivalent to
315 float(num_str). This can be used to use another datatype or parser
316 for JSON floats (e.g. decimal.Decimal).
318 ``parse_int``, if specified, will be called with the string
319 of every JSON int to be decoded. By default this is equivalent to
320 int(num_str). This can be used to use another datatype or parser
321 for JSON integers (e.g. float).
323 ``parse_constant``, if specified, will be called with one of the
324 following strings: -Infinity, Infinity, NaN.
325 This can be used to raise an exception if invalid JSON numbers
329 self
.encoding
= encoding
330 self
.object_hook
= object_hook
331 self
.object_pairs_hook
= object_pairs_hook
332 self
.parse_float
= parse_float
or float
333 self
.parse_int
= parse_int
or int
334 self
.parse_constant
= parse_constant
or _CONSTANTS
.__getitem
__
336 self
.parse_object
= JSONObject
337 self
.parse_array
= JSONArray
338 self
.parse_string
= scanstring
339 self
.scan_once
= make_scanner(self
)
341 def decode(self
, s
, _w
=WHITESPACE
.match
):
342 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
343 instance containing a JSON document)
346 obj
, end
= self
.raw_decode(s
, idx
=_w(s
, 0).end())
347 end
= _w(s
, end
).end()
349 raise ValueError(errmsg("Extra data", s
, end
, len(s
)))
352 def raw_decode(self
, s
, idx
=0):
353 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
354 beginning with a JSON document) and return a 2-tuple of the Python
355 representation and the index in ``s`` where the document ended.
357 This can be used to decode a JSON document from a string that may
358 have extraneous data at the end.
362 obj
, end
= self
.scan_once(s
, idx
)
363 except StopIteration:
364 raise ValueError("No JSON object could be decoded")