Issue #7117, continued: Change round implementation to use the correctly-rounded
[python.git] / Lib / json / decoder.py
blob44635a00e74f61f9f49f24d1027522e58ae5285e
1 """Implementation of JSONDecoder
2 """
3 import re
4 import sys
5 import struct
7 from json.scanner import make_scanner
8 try:
9 from _json import scanstring as c_scanstring
10 except ImportError:
11 c_scanstring = None
13 __all__ = ['JSONDecoder']
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
17 def _floatconstants():
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys.byteorder != 'big':
20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21 nan, inf = struct.unpack('dd', _BYTES)
22 return nan, inf, -inf
24 NaN, PosInf, NegInf = _floatconstants()
27 def linecol(doc, pos):
28 lineno = doc.count('\n', 0, pos) + 1
29 if lineno == 1:
30 colno = pos
31 else:
32 colno = pos - doc.rindex('\n', 0, pos)
33 return lineno, colno
36 def errmsg(msg, doc, pos, end=None):
37 # Note that this function is called from _json
38 lineno, colno = linecol(doc, pos)
39 if end is None:
40 fmt = '{0}: line {1} column {2} (char {3})'
41 return fmt.format(msg, lineno, colno, pos)
42 #fmt = '%s: line %d column %d (char %d)'
43 #return fmt % (msg, lineno, colno, pos)
44 endlineno, endcolno = linecol(doc, end)
45 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48 #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
51 _CONSTANTS = {
52 '-Infinity': NegInf,
53 'Infinity': PosInf,
54 'NaN': NaN,
57 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
58 BACKSLASH = {
59 '"': u'"', '\\': u'\\', '/': u'/',
60 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
63 DEFAULT_ENCODING = "utf-8"
65 def py_scanstring(s, end, encoding=None, strict=True,
66 _b=BACKSLASH, _m=STRINGCHUNK.match):
67 """Scan the string s for a JSON string. End is the index of the
68 character in s after the quote that started the JSON string.
69 Unescapes all valid JSON string escape sequences and raises ValueError
70 on attempt to decode an invalid string. If strict is False then literal
71 control characters are allowed in the string.
73 Returns a tuple of the decoded string and the index of the character in s
74 after the end quote."""
75 if encoding is None:
76 encoding = DEFAULT_ENCODING
77 chunks = []
78 _append = chunks.append
79 begin = end - 1
80 while 1:
81 chunk = _m(s, end)
82 if chunk is None:
83 raise ValueError(
84 errmsg("Unterminated string starting at", s, begin))
85 end = chunk.end()
86 content, terminator = chunk.groups()
87 # Content is contains zero or more unescaped string characters
88 if content:
89 if not isinstance(content, unicode):
90 content = unicode(content, encoding)
91 _append(content)
92 # Terminator is the end of string, a literal control character,
93 # or a backslash denoting that an escape sequence follows
94 if terminator == '"':
95 break
96 elif terminator != '\\':
97 if strict:
98 #msg = "Invalid control character %r at" % (terminator,)
99 msg = "Invalid control character {0!r} at".format(terminator)
100 raise ValueError(errmsg(msg, s, end))
101 else:
102 _append(terminator)
103 continue
104 try:
105 esc = s[end]
106 except IndexError:
107 raise ValueError(
108 errmsg("Unterminated string starting at", s, begin))
109 # If not a unicode escape sequence, must be in the lookup table
110 if esc != 'u':
111 try:
112 char = _b[esc]
113 except KeyError:
114 msg = "Invalid \\escape: " + repr(esc)
115 raise ValueError(errmsg(msg, s, end))
116 end += 1
117 else:
118 # Unicode escape sequence
119 esc = s[end + 1:end + 5]
120 next_end = end + 5
121 if len(esc) != 4:
122 msg = "Invalid \\uXXXX escape"
123 raise ValueError(errmsg(msg, s, end))
124 uni = int(esc, 16)
125 # Check for surrogate pair on UCS-4 systems
126 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
127 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
128 if not s[end + 5:end + 7] == '\\u':
129 raise ValueError(errmsg(msg, s, end))
130 esc2 = s[end + 7:end + 11]
131 if len(esc2) != 4:
132 raise ValueError(errmsg(msg, s, end))
133 uni2 = int(esc2, 16)
134 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
135 next_end += 6
136 char = unichr(uni)
137 end = next_end
138 # Append the unescaped character
139 _append(char)
140 return u''.join(chunks), end
143 # Use speedup if available
144 scanstring = c_scanstring or py_scanstring
146 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
147 WHITESPACE_STR = ' \t\n\r'
149 def JSONObject((s, end), encoding, strict, scan_once, object_hook,
150 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
151 pairs = []
152 pairs_append = pairs.append
153 # Use a slice to prevent IndexError from being raised, the following
154 # check will raise a more specific ValueError if the string is empty
155 nextchar = s[end:end + 1]
156 # Normally we expect nextchar == '"'
157 if nextchar != '"':
158 if nextchar in _ws:
159 end = _w(s, end).end()
160 nextchar = s[end:end + 1]
161 # Trivial empty object
162 if nextchar == '}':
163 return pairs, end + 1
164 elif nextchar != '"':
165 raise ValueError(errmsg("Expecting property name", s, end))
166 end += 1
167 while True:
168 key, end = scanstring(s, end, encoding, strict)
170 # To skip some function call overhead we optimize the fast paths where
171 # the JSON key separator is ": " or just ":".
172 if s[end:end + 1] != ':':
173 end = _w(s, end).end()
174 if s[end:end + 1] != ':':
175 raise ValueError(errmsg("Expecting : delimiter", s, end))
177 end += 1
179 try:
180 if s[end] in _ws:
181 end += 1
182 if s[end] in _ws:
183 end = _w(s, end + 1).end()
184 except IndexError:
185 pass
187 try:
188 value, end = scan_once(s, end)
189 except StopIteration:
190 raise ValueError(errmsg("Expecting object", s, end))
191 pairs_append((key, value))
193 try:
194 nextchar = s[end]
195 if nextchar in _ws:
196 end = _w(s, end + 1).end()
197 nextchar = s[end]
198 except IndexError:
199 nextchar = ''
200 end += 1
202 if nextchar == '}':
203 break
204 elif nextchar != ',':
205 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
207 try:
208 nextchar = s[end]
209 if nextchar in _ws:
210 end += 1
211 nextchar = s[end]
212 if nextchar in _ws:
213 end = _w(s, end + 1).end()
214 nextchar = s[end]
215 except IndexError:
216 nextchar = ''
218 end += 1
219 if nextchar != '"':
220 raise ValueError(errmsg("Expecting property name", s, end - 1))
222 if object_pairs_hook is not None:
223 result = object_pairs_hook(pairs)
224 return result, end
225 pairs = dict(pairs)
226 if object_hook is not None:
227 pairs = object_hook(pairs)
228 return pairs, end
230 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
231 values = []
232 nextchar = s[end:end + 1]
233 if nextchar in _ws:
234 end = _w(s, end + 1).end()
235 nextchar = s[end:end + 1]
236 # Look-ahead for trivial empty array
237 if nextchar == ']':
238 return values, end + 1
239 _append = values.append
240 while True:
241 try:
242 value, end = scan_once(s, end)
243 except StopIteration:
244 raise ValueError(errmsg("Expecting object", s, end))
245 _append(value)
246 nextchar = s[end:end + 1]
247 if nextchar in _ws:
248 end = _w(s, end + 1).end()
249 nextchar = s[end:end + 1]
250 end += 1
251 if nextchar == ']':
252 break
253 elif nextchar != ',':
254 raise ValueError(errmsg("Expecting , delimiter", s, end))
256 try:
257 if s[end] in _ws:
258 end += 1
259 if s[end] in _ws:
260 end = _w(s, end + 1).end()
261 except IndexError:
262 pass
264 return values, end
266 class JSONDecoder(object):
267 """Simple JSON <http://json.org> decoder
269 Performs the following translations in decoding by default:
271 +---------------+-------------------+
272 | JSON | Python |
273 +===============+===================+
274 | object | dict |
275 +---------------+-------------------+
276 | array | list |
277 +---------------+-------------------+
278 | string | unicode |
279 +---------------+-------------------+
280 | number (int) | int, long |
281 +---------------+-------------------+
282 | number (real) | float |
283 +---------------+-------------------+
284 | true | True |
285 +---------------+-------------------+
286 | false | False |
287 +---------------+-------------------+
288 | null | None |
289 +---------------+-------------------+
291 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
292 their corresponding ``float`` values, which is outside the JSON spec.
296 def __init__(self, encoding=None, object_hook=None, parse_float=None,
297 parse_int=None, parse_constant=None, strict=True,
298 object_pairs_hook=None):
299 """``encoding`` determines the encoding used to interpret any ``str``
300 objects decoded by this instance (utf-8 by default). It has no
301 effect when decoding ``unicode`` objects.
303 Note that currently only encodings that are a superset of ASCII work,
304 strings of other encodings should be passed in as ``unicode``.
306 ``object_hook``, if specified, will be called with the result
307 of every JSON object decoded and its return value will be used in
308 place of the given ``dict``. This can be used to provide custom
309 deserializations (e.g. to support JSON-RPC class hinting).
311 ``parse_float``, if specified, will be called with the string
312 of every JSON float to be decoded. By default this is equivalent to
313 float(num_str). This can be used to use another datatype or parser
314 for JSON floats (e.g. decimal.Decimal).
316 ``parse_int``, if specified, will be called with the string
317 of every JSON int to be decoded. By default this is equivalent to
318 int(num_str). This can be used to use another datatype or parser
319 for JSON integers (e.g. float).
321 ``parse_constant``, if specified, will be called with one of the
322 following strings: -Infinity, Infinity, NaN.
323 This can be used to raise an exception if invalid JSON numbers
324 are encountered.
327 self.encoding = encoding
328 self.object_hook = object_hook
329 self.object_pairs_hook = object_pairs_hook
330 self.parse_float = parse_float or float
331 self.parse_int = parse_int or int
332 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
333 self.strict = strict
334 self.parse_object = JSONObject
335 self.parse_array = JSONArray
336 self.parse_string = scanstring
337 self.scan_once = make_scanner(self)
339 def decode(self, s, _w=WHITESPACE.match):
340 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
341 instance containing a JSON document)
344 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
345 end = _w(s, end).end()
346 if end != len(s):
347 raise ValueError(errmsg("Extra data", s, end, len(s)))
348 return obj
350 def raw_decode(self, s, idx=0):
351 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
352 beginning with a JSON document) and return a 2-tuple of the Python
353 representation and the index in ``s`` where the document ended.
355 This can be used to decode a JSON document from a string that may
356 have extraneous data at the end.
359 try:
360 obj, end = self.scan_once(s, idx)
361 except StopIteration:
362 raise ValueError("No JSON object could be decoded")
363 return obj, end