models: Add a BrowserModel for use by the repobrowser
[git-cola.git] / simplejson / decoder.py
blobdf572f453b307e15936cdc8c5d80dc8217c9fe7b
1 """Implementation of JSONDecoder
2 """
3 import re
4 import sys
5 import struct
7 from simplejson.scanner import make_scanner
8 try:
9 from simplejson._speedups import scanstring as c_scanstring
10 except ImportError:
11 c_scanstring = None
13 __all__ = ['JSONDecoder']
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
17 def _floatconstants():
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 # The struct module in Python 2.4 would get frexp() out of range here
20 # when an endian is specified in the format string. Fixed in Python 2.5+
21 if sys.byteorder != 'big':
22 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
23 nan, inf = struct.unpack('dd', _BYTES)
24 return nan, inf, -inf
26 NaN, PosInf, NegInf = _floatconstants()
29 class JSONDecodeError(ValueError):
30 """Subclass of ValueError with the following additional properties:
32 msg: The unformatted error message
33 doc: The JSON document being parsed
34 pos: The start index of doc where parsing failed
35 end: The end index of doc where parsing failed (may be None)
36 lineno: The line corresponding to pos
37 colno: The column corresponding to pos
38 endlineno: The line corresponding to end (may be None)
39 endcolno: The column corresponding to end (may be None)
41 """
42 def __init__(self, msg, doc, pos, end=None):
43 ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
44 self.msg = msg
45 self.doc = doc
46 self.pos = pos
47 self.end = end
48 self.lineno, self.colno = linecol(doc, pos)
49 if end is not None:
50 self.endlineno, self.endcolno = linecol(doc, pos)
51 else:
52 self.endlineno, self.endcolno = None, None
55 def linecol(doc, pos):
56 lineno = doc.count('\n', 0, pos) + 1
57 if lineno == 1:
58 colno = pos
59 else:
60 colno = pos - doc.rindex('\n', 0, pos)
61 return lineno, colno
64 def errmsg(msg, doc, pos, end=None):
65 # Note that this function is called from _speedups
66 lineno, colno = linecol(doc, pos)
67 if end is None:
68 #fmt = '{0}: line {1} column {2} (char {3})'
69 #return fmt.format(msg, lineno, colno, pos)
70 fmt = '%s: line %d column %d (char %d)'
71 return fmt % (msg, lineno, colno, pos)
72 endlineno, endcolno = linecol(doc, end)
73 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
74 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
75 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
76 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
79 _CONSTANTS = {
80 '-Infinity': NegInf,
81 'Infinity': PosInf,
82 'NaN': NaN,
85 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
86 BACKSLASH = {
87 '"': u'"', '\\': u'\\', '/': u'/',
88 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
91 DEFAULT_ENCODING = "utf-8"
93 def py_scanstring(s, end, encoding=None, strict=True,
94 _b=BACKSLASH, _m=STRINGCHUNK.match):
95 """Scan the string s for a JSON string. End is the index of the
96 character in s after the quote that started the JSON string.
97 Unescapes all valid JSON string escape sequences and raises ValueError
98 on attempt to decode an invalid string. If strict is False then literal
99 control characters are allowed in the string.
101 Returns a tuple of the decoded string and the index of the character in s
102 after the end quote."""
103 if encoding is None:
104 encoding = DEFAULT_ENCODING
105 chunks = []
106 _append = chunks.append
107 begin = end - 1
108 while 1:
109 chunk = _m(s, end)
110 if chunk is None:
111 raise JSONDecodeError(
112 "Unterminated string starting at", s, begin)
113 end = chunk.end()
114 content, terminator = chunk.groups()
115 # Content is contains zero or more unescaped string characters
116 if content:
117 if not isinstance(content, unicode):
118 content = unicode(content, encoding)
119 _append(content)
120 # Terminator is the end of string, a literal control character,
121 # or a backslash denoting that an escape sequence follows
122 if terminator == '"':
123 break
124 elif terminator != '\\':
125 if strict:
126 msg = "Invalid control character %r at" % (terminator,)
127 #msg = "Invalid control character {0!r} at".format(terminator)
128 raise JSONDecodeError(msg, s, end)
129 else:
130 _append(terminator)
131 continue
132 try:
133 esc = s[end]
134 except IndexError:
135 raise JSONDecodeError(
136 "Unterminated string starting at", s, begin)
137 # If not a unicode escape sequence, must be in the lookup table
138 if esc != 'u':
139 try:
140 char = _b[esc]
141 except KeyError:
142 msg = "Invalid \\escape: " + repr(esc)
143 raise JSONDecodeError(msg, s, end)
144 end += 1
145 else:
146 # Unicode escape sequence
147 esc = s[end + 1:end + 5]
148 next_end = end + 5
149 if len(esc) != 4:
150 msg = "Invalid \\uXXXX escape"
151 raise JSONDecodeError(msg, s, end)
152 uni = int(esc, 16)
153 # Check for surrogate pair on UCS-4 systems
154 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
155 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
156 if not s[end + 5:end + 7] == '\\u':
157 raise JSONDecodeError(msg, s, end)
158 esc2 = s[end + 7:end + 11]
159 if len(esc2) != 4:
160 raise JSONDecodeError(msg, s, end)
161 uni2 = int(esc2, 16)
162 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
163 next_end += 6
164 char = unichr(uni)
165 end = next_end
166 # Append the unescaped character
167 _append(char)
168 return u''.join(chunks), end
171 # Use speedup if available
172 scanstring = c_scanstring or py_scanstring
174 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
175 WHITESPACE_STR = ' \t\n\r'
177 def JSONObject((s, end), encoding, strict, scan_once, object_hook,
178 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
179 pairs = []
180 # Use a slice to prevent IndexError from being raised, the following
181 # check will raise a more specific ValueError if the string is empty
182 nextchar = s[end:end + 1]
183 # Normally we expect nextchar == '"'
184 if nextchar != '"':
185 if nextchar in _ws:
186 end = _w(s, end).end()
187 nextchar = s[end:end + 1]
188 # Trivial empty object
189 if nextchar == '}':
190 return pairs, end + 1
191 elif nextchar != '"':
192 raise JSONDecodeError("Expecting property name", s, end)
193 end += 1
194 while True:
195 key, end = scanstring(s, end, encoding, strict)
197 # To skip some function call overhead we optimize the fast paths where
198 # the JSON key separator is ": " or just ":".
199 if s[end:end + 1] != ':':
200 end = _w(s, end).end()
201 if s[end:end + 1] != ':':
202 raise JSONDecodeError("Expecting : delimiter", s, end)
204 end += 1
206 try:
207 if s[end] in _ws:
208 end += 1
209 if s[end] in _ws:
210 end = _w(s, end + 1).end()
211 except IndexError:
212 pass
214 try:
215 value, end = scan_once(s, end)
216 except StopIteration:
217 raise JSONDecodeError("Expecting object", s, end)
218 pairs.append((key, value))
220 try:
221 nextchar = s[end]
222 if nextchar in _ws:
223 end = _w(s, end + 1).end()
224 nextchar = s[end]
225 except IndexError:
226 nextchar = ''
227 end += 1
229 if nextchar == '}':
230 break
231 elif nextchar != ',':
232 raise JSONDecodeError("Expecting , delimiter", s, end - 1)
234 try:
235 nextchar = s[end]
236 if nextchar in _ws:
237 end += 1
238 nextchar = s[end]
239 if nextchar in _ws:
240 end = _w(s, end + 1).end()
241 nextchar = s[end]
242 except IndexError:
243 nextchar = ''
245 end += 1
246 if nextchar != '"':
247 raise JSONDecodeError("Expecting property name", s, end - 1)
249 if object_pairs_hook is not None:
250 result = object_pairs_hook(pairs)
251 return result, end
252 pairs = dict(pairs)
253 if object_hook is not None:
254 pairs = object_hook(pairs)
255 return pairs, end
257 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
258 values = []
259 nextchar = s[end:end + 1]
260 if nextchar in _ws:
261 end = _w(s, end + 1).end()
262 nextchar = s[end:end + 1]
263 # Look-ahead for trivial empty array
264 if nextchar == ']':
265 return values, end + 1
266 _append = values.append
267 while True:
268 try:
269 value, end = scan_once(s, end)
270 except StopIteration:
271 raise JSONDecodeError("Expecting object", s, end)
272 _append(value)
273 nextchar = s[end:end + 1]
274 if nextchar in _ws:
275 end = _w(s, end + 1).end()
276 nextchar = s[end:end + 1]
277 end += 1
278 if nextchar == ']':
279 break
280 elif nextchar != ',':
281 raise JSONDecodeError("Expecting , delimiter", s, end)
283 try:
284 if s[end] in _ws:
285 end += 1
286 if s[end] in _ws:
287 end = _w(s, end + 1).end()
288 except IndexError:
289 pass
291 return values, end
293 class JSONDecoder(object):
294 """Simple JSON <http://json.org> decoder
296 Performs the following translations in decoding by default:
298 +---------------+-------------------+
299 | JSON | Python |
300 +===============+===================+
301 | object | dict |
302 +---------------+-------------------+
303 | array | list |
304 +---------------+-------------------+
305 | string | unicode |
306 +---------------+-------------------+
307 | number (int) | int, long |
308 +---------------+-------------------+
309 | number (real) | float |
310 +---------------+-------------------+
311 | true | True |
312 +---------------+-------------------+
313 | false | False |
314 +---------------+-------------------+
315 | null | None |
316 +---------------+-------------------+
318 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
319 their corresponding ``float`` values, which is outside the JSON spec.
323 def __init__(self, encoding=None, object_hook=None, parse_float=None,
324 parse_int=None, parse_constant=None, strict=True,
325 object_pairs_hook=None):
327 *encoding* determines the encoding used to interpret any
328 :class:`str` objects decoded by this instance (``'utf-8'`` by
329 default). It has no effect when decoding :class:`unicode` objects.
331 Note that currently only encodings that are a superset of ASCII work,
332 strings of other encodings should be passed in as :class:`unicode`.
334 *object_hook*, if specified, will be called with the result of every
335 JSON object decoded and its return value will be used in place of the
336 given :class:`dict`. This can be used to provide custom
337 deserializations (e.g. to support JSON-RPC class hinting).
339 *object_pairs_hook* is an optional function that will be called with
340 the result of any object literal decode with an ordered list of pairs.
341 The return value of *object_pairs_hook* will be used instead of the
342 :class:`dict`. This feature can be used to implement custom decoders
343 that rely on the order that the key and value pairs are decoded (for
344 example, :func:`collections.OrderedDict` will remember the order of
345 insertion). If *object_hook* is also defined, the *object_pairs_hook*
346 takes priority.
348 *parse_float*, if specified, will be called with the string of every
349 JSON float to be decoded. By default, this is equivalent to
350 ``float(num_str)``. This can be used to use another datatype or parser
351 for JSON floats (e.g. :class:`decimal.Decimal`).
353 *parse_int*, if specified, will be called with the string of every
354 JSON int to be decoded. By default, this is equivalent to
355 ``int(num_str)``. This can be used to use another datatype or parser
356 for JSON integers (e.g. :class:`float`).
358 *parse_constant*, if specified, will be called with one of the
359 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
360 can be used to raise an exception if invalid JSON numbers are
361 encountered.
363 *strict* controls the parser's behavior when it encounters an
364 invalid control character in a string. The default setting of
365 ``True`` means that unescaped control characters are parse errors, if
366 ``False`` then control characters will be allowed in strings.
369 self.encoding = encoding
370 self.object_hook = object_hook
371 self.object_pairs_hook = object_pairs_hook
372 self.parse_float = parse_float or float
373 self.parse_int = parse_int or int
374 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
375 self.strict = strict
376 self.parse_object = JSONObject
377 self.parse_array = JSONArray
378 self.parse_string = scanstring
379 self.scan_once = make_scanner(self)
381 def decode(self, s, _w=WHITESPACE.match):
382 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
383 instance containing a JSON document)
386 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
387 end = _w(s, end).end()
388 if end != len(s):
389 raise JSONDecodeError("Extra data", s, end, len(s))
390 return obj
392 def raw_decode(self, s, idx=0):
393 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
394 beginning with a JSON document) and return a 2-tuple of the Python
395 representation and the index in ``s`` where the document ended.
397 This can be used to decode a JSON document from a string that may
398 have extraneous data at the end.
401 try:
402 obj, end = self.scan_once(s, idx)
403 except StopIteration:
404 raise JSONDecodeError("No JSON object could be decoded", s, idx)
405 return obj, end