views.dag: memoize() expensive graph drawing methods
[git-cola.git] / thirdparty / simplejson / decoder.py
blob386db691329eaa512146021c4b2e6c3310018f24
1 """Implementation of JSONDecoder
2 """
3 import re
4 import sys
5 import struct
7 from simplejson.scanner import make_scanner
8 try:
9 from simplejson._speedups import scanstring as c_scanstring
10 except ImportError:
11 c_scanstring = None
13 __all__ = ['JSONDecoder']
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
17 def _floatconstants():
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 # The struct module in Python 2.4 would get frexp() out of range here
20 # when an endian is specified in the format string. Fixed in Python 2.5+
21 if sys.byteorder != 'big':
22 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
23 nan, inf = struct.unpack('dd', _BYTES)
24 return nan, inf, -inf
26 NaN, PosInf, NegInf = _floatconstants()
29 class JSONDecodeError(ValueError):
30 """Subclass of ValueError with the following additional properties:
32 msg: The unformatted error message
33 doc: The JSON document being parsed
34 pos: The start index of doc where parsing failed
35 end: The end index of doc where parsing failed (may be None)
36 lineno: The line corresponding to pos
37 colno: The column corresponding to pos
38 endlineno: The line corresponding to end (may be None)
39 endcolno: The column corresponding to end (may be None)
41 """
42 def __init__(self, msg, doc, pos, end=None):
43 ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
44 self.msg = msg
45 self.doc = doc
46 self.pos = pos
47 self.end = end
48 self.lineno, self.colno = linecol(doc, pos)
49 if end is not None:
50 self.endlineno, self.endcolno = linecol(doc, pos)
51 else:
52 self.endlineno, self.endcolno = None, None
55 def linecol(doc, pos):
56 lineno = doc.count('\n', 0, pos) + 1
57 if lineno == 1:
58 colno = pos
59 else:
60 colno = pos - doc.rindex('\n', 0, pos)
61 return lineno, colno
64 def errmsg(msg, doc, pos, end=None):
65 # Note that this function is called from _speedups
66 lineno, colno = linecol(doc, pos)
67 if end is None:
68 #fmt = '{0}: line {1} column {2} (char {3})'
69 #return fmt.format(msg, lineno, colno, pos)
70 fmt = '%s: line %d column %d (char %d)'
71 return fmt % (msg, lineno, colno, pos)
72 endlineno, endcolno = linecol(doc, end)
73 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
74 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
75 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
76 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
79 _CONSTANTS = {
80 '-Infinity': NegInf,
81 'Infinity': PosInf,
82 'NaN': NaN,
85 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
86 BACKSLASH = {
87 '"': u'"', '\\': u'\\', '/': u'/',
88 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
91 DEFAULT_ENCODING = "utf-8"
93 def py_scanstring(s, end, encoding=None, strict=True,
94 _b=BACKSLASH, _m=STRINGCHUNK.match):
95 """Scan the string s for a JSON string. End is the index of the
96 character in s after the quote that started the JSON string.
97 Unescapes all valid JSON string escape sequences and raises ValueError
98 on attempt to decode an invalid string. If strict is False then literal
99 control characters are allowed in the string.
101 Returns a tuple of the decoded string and the index of the character in s
102 after the end quote."""
103 if encoding is None:
104 encoding = DEFAULT_ENCODING
105 chunks = []
106 _append = chunks.append
107 begin = end - 1
108 while 1:
109 chunk = _m(s, end)
110 if chunk is None:
111 raise JSONDecodeError(
112 "Unterminated string starting at", s, begin)
113 end = chunk.end()
114 content, terminator = chunk.groups()
115 # Content is contains zero or more unescaped string characters
116 if content:
117 if not isinstance(content, unicode):
118 content = unicode(content, encoding)
119 _append(content)
120 # Terminator is the end of string, a literal control character,
121 # or a backslash denoting that an escape sequence follows
122 if terminator == '"':
123 break
124 elif terminator != '\\':
125 if strict:
126 msg = "Invalid control character %r at" % (terminator,)
127 #msg = "Invalid control character {0!r} at".format(terminator)
128 raise JSONDecodeError(msg, s, end)
129 else:
130 _append(terminator)
131 continue
132 try:
133 esc = s[end]
134 except IndexError:
135 raise JSONDecodeError(
136 "Unterminated string starting at", s, begin)
137 # If not a unicode escape sequence, must be in the lookup table
138 if esc != 'u':
139 try:
140 char = _b[esc]
141 except KeyError:
142 msg = "Invalid \\escape: " + repr(esc)
143 raise JSONDecodeError(msg, s, end)
144 end += 1
145 else:
146 # Unicode escape sequence
147 esc = s[end + 1:end + 5]
148 next_end = end + 5
149 if len(esc) != 4:
150 msg = "Invalid \\uXXXX escape"
151 raise JSONDecodeError(msg, s, end)
152 uni = int(esc, 16)
153 # Check for surrogate pair on UCS-4 systems
154 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
155 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
156 if not s[end + 5:end + 7] == '\\u':
157 raise JSONDecodeError(msg, s, end)
158 esc2 = s[end + 7:end + 11]
159 if len(esc2) != 4:
160 raise JSONDecodeError(msg, s, end)
161 uni2 = int(esc2, 16)
162 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
163 next_end += 6
164 char = unichr(uni)
165 end = next_end
166 # Append the unescaped character
167 _append(char)
168 return u''.join(chunks), end
171 # Use speedup if available
172 scanstring = c_scanstring or py_scanstring
174 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
175 WHITESPACE_STR = ' \t\n\r'
177 def JSONObject((s, end), encoding, strict, scan_once, object_hook,
178 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
179 pairs = []
180 # Use a slice to prevent IndexError from being raised, the following
181 # check will raise a more specific ValueError if the string is empty
182 nextchar = s[end:end + 1]
183 # Normally we expect nextchar == '"'
184 if nextchar != '"':
185 if nextchar in _ws:
186 end = _w(s, end).end()
187 nextchar = s[end:end + 1]
188 # Trivial empty object
189 if nextchar == '}':
190 if object_pairs_hook is not None:
191 result = object_pairs_hook(pairs)
192 return result, end
193 pairs = {}
194 if object_hook is not None:
195 pairs = object_hook(pairs)
196 return pairs, end + 1
197 elif nextchar != '"':
198 raise JSONDecodeError("Expecting property name", s, end)
199 end += 1
200 while True:
201 key, end = scanstring(s, end, encoding, strict)
203 # To skip some function call overhead we optimize the fast paths where
204 # the JSON key separator is ": " or just ":".
205 if s[end:end + 1] != ':':
206 end = _w(s, end).end()
207 if s[end:end + 1] != ':':
208 raise JSONDecodeError("Expecting : delimiter", s, end)
210 end += 1
212 try:
213 if s[end] in _ws:
214 end += 1
215 if s[end] in _ws:
216 end = _w(s, end + 1).end()
217 except IndexError:
218 pass
220 try:
221 value, end = scan_once(s, end)
222 except StopIteration:
223 raise JSONDecodeError("Expecting object", s, end)
224 pairs.append((key, value))
226 try:
227 nextchar = s[end]
228 if nextchar in _ws:
229 end = _w(s, end + 1).end()
230 nextchar = s[end]
231 except IndexError:
232 nextchar = ''
233 end += 1
235 if nextchar == '}':
236 break
237 elif nextchar != ',':
238 raise JSONDecodeError("Expecting , delimiter", s, end - 1)
240 try:
241 nextchar = s[end]
242 if nextchar in _ws:
243 end += 1
244 nextchar = s[end]
245 if nextchar in _ws:
246 end = _w(s, end + 1).end()
247 nextchar = s[end]
248 except IndexError:
249 nextchar = ''
251 end += 1
252 if nextchar != '"':
253 raise JSONDecodeError("Expecting property name", s, end - 1)
255 if object_pairs_hook is not None:
256 result = object_pairs_hook(pairs)
257 return result, end
258 pairs = dict(pairs)
259 if object_hook is not None:
260 pairs = object_hook(pairs)
261 return pairs, end
263 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
264 values = []
265 nextchar = s[end:end + 1]
266 if nextchar in _ws:
267 end = _w(s, end + 1).end()
268 nextchar = s[end:end + 1]
269 # Look-ahead for trivial empty array
270 if nextchar == ']':
271 return values, end + 1
272 _append = values.append
273 while True:
274 try:
275 value, end = scan_once(s, end)
276 except StopIteration:
277 raise JSONDecodeError("Expecting object", s, end)
278 _append(value)
279 nextchar = s[end:end + 1]
280 if nextchar in _ws:
281 end = _w(s, end + 1).end()
282 nextchar = s[end:end + 1]
283 end += 1
284 if nextchar == ']':
285 break
286 elif nextchar != ',':
287 raise JSONDecodeError("Expecting , delimiter", s, end)
289 try:
290 if s[end] in _ws:
291 end += 1
292 if s[end] in _ws:
293 end = _w(s, end + 1).end()
294 except IndexError:
295 pass
297 return values, end
299 class JSONDecoder(object):
300 """Simple JSON <http://json.org> decoder
302 Performs the following translations in decoding by default:
304 +---------------+-------------------+
305 | JSON | Python |
306 +===============+===================+
307 | object | dict |
308 +---------------+-------------------+
309 | array | list |
310 +---------------+-------------------+
311 | string | unicode |
312 +---------------+-------------------+
313 | number (int) | int, long |
314 +---------------+-------------------+
315 | number (real) | float |
316 +---------------+-------------------+
317 | true | True |
318 +---------------+-------------------+
319 | false | False |
320 +---------------+-------------------+
321 | null | None |
322 +---------------+-------------------+
324 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
325 their corresponding ``float`` values, which is outside the JSON spec.
329 def __init__(self, encoding=None, object_hook=None, parse_float=None,
330 parse_int=None, parse_constant=None, strict=True,
331 object_pairs_hook=None):
333 *encoding* determines the encoding used to interpret any
334 :class:`str` objects decoded by this instance (``'utf-8'`` by
335 default). It has no effect when decoding :class:`unicode` objects.
337 Note that currently only encodings that are a superset of ASCII work,
338 strings of other encodings should be passed in as :class:`unicode`.
340 *object_hook*, if specified, will be called with the result of every
341 JSON object decoded and its return value will be used in place of the
342 given :class:`dict`. This can be used to provide custom
343 deserializations (e.g. to support JSON-RPC class hinting).
345 *object_pairs_hook* is an optional function that will be called with
346 the result of any object literal decode with an ordered list of pairs.
347 The return value of *object_pairs_hook* will be used instead of the
348 :class:`dict`. This feature can be used to implement custom decoders
349 that rely on the order that the key and value pairs are decoded (for
350 example, :func:`collections.OrderedDict` will remember the order of
351 insertion). If *object_hook* is also defined, the *object_pairs_hook*
352 takes priority.
354 *parse_float*, if specified, will be called with the string of every
355 JSON float to be decoded. By default, this is equivalent to
356 ``float(num_str)``. This can be used to use another datatype or parser
357 for JSON floats (e.g. :class:`decimal.Decimal`).
359 *parse_int*, if specified, will be called with the string of every
360 JSON int to be decoded. By default, this is equivalent to
361 ``int(num_str)``. This can be used to use another datatype or parser
362 for JSON integers (e.g. :class:`float`).
364 *parse_constant*, if specified, will be called with one of the
365 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
366 can be used to raise an exception if invalid JSON numbers are
367 encountered.
369 *strict* controls the parser's behavior when it encounters an
370 invalid control character in a string. The default setting of
371 ``True`` means that unescaped control characters are parse errors, if
372 ``False`` then control characters will be allowed in strings.
375 self.encoding = encoding
376 self.object_hook = object_hook
377 self.object_pairs_hook = object_pairs_hook
378 self.parse_float = parse_float or float
379 self.parse_int = parse_int or int
380 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
381 self.strict = strict
382 self.parse_object = JSONObject
383 self.parse_array = JSONArray
384 self.parse_string = scanstring
385 self.scan_once = make_scanner(self)
387 def decode(self, s, _w=WHITESPACE.match):
388 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
389 instance containing a JSON document)
392 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
393 end = _w(s, end).end()
394 if end != len(s):
395 raise JSONDecodeError("Extra data", s, end, len(s))
396 return obj
398 def raw_decode(self, s, idx=0):
399 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
400 beginning with a JSON document) and return a 2-tuple of the Python
401 representation and the index in ``s`` where the document ended.
403 This can be used to decode a JSON document from a string that may
404 have extraneous data at the end.
407 try:
408 obj, end = self.scan_once(s, idx)
409 except StopIteration:
410 raise JSONDecodeError("No JSON object could be decoded", s, idx)
411 return obj, end