gitcmds: Add log_helper() and parse_rev_list()
[git-cola.git] / simplejson / encoder.py
blob6d75910cd01189086a942b0b3150cf8a55876c5c
1 """Implementation of JSONEncoder
2 """
3 import re
5 try:
6 from simplejson._speedups import encode_basestring_ascii as \
7 c_encode_basestring_ascii
8 except ImportError:
9 c_encode_basestring_ascii = None
10 try:
11 from simplejson._speedups import make_encoder as c_make_encoder
12 except ImportError:
13 c_make_encoder = None
15 from simplejson.decoder import PosInf
17 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
18 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
19 HAS_UTF8 = re.compile(r'[\x80-\xff]')
20 ESCAPE_DCT = {
21 '\\': '\\\\',
22 '"': '\\"',
23 '\b': '\\b',
24 '\f': '\\f',
25 '\n': '\\n',
26 '\r': '\\r',
27 '\t': '\\t',
29 for i in range(0x20):
30 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
31 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33 FLOAT_REPR = repr
35 def encode_basestring(s):
36 """Return a JSON representation of a Python string
38 """
39 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
40 s = s.decode('utf-8')
41 def replace(match):
42 return ESCAPE_DCT[match.group(0)]
43 return u'"' + ESCAPE.sub(replace, s) + u'"'
46 def py_encode_basestring_ascii(s):
47 """Return an ASCII-only JSON representation of a Python string
49 """
50 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
51 s = s.decode('utf-8')
52 def replace(match):
53 s = match.group(0)
54 try:
55 return ESCAPE_DCT[s]
56 except KeyError:
57 n = ord(s)
58 if n < 0x10000:
59 #return '\\u{0:04x}'.format(n)
60 return '\\u%04x' % (n,)
61 else:
62 # surrogate pair
63 n -= 0x10000
64 s1 = 0xd800 | ((n >> 10) & 0x3ff)
65 s2 = 0xdc00 | (n & 0x3ff)
66 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
67 return '\\u%04x\\u%04x' % (s1, s2)
68 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
71 encode_basestring_ascii = (
72 c_encode_basestring_ascii or py_encode_basestring_ascii)
74 class JSONEncoder(object):
75 """Extensible JSON <http://json.org> encoder for Python data structures.
77 Supports the following objects and types by default:
79 +-------------------+---------------+
80 | Python | JSON |
81 +===================+===============+
82 | dict | object |
83 +-------------------+---------------+
84 | list, tuple | array |
85 +-------------------+---------------+
86 | str, unicode | string |
87 +-------------------+---------------+
88 | int, long, float | number |
89 +-------------------+---------------+
90 | True | true |
91 +-------------------+---------------+
92 | False | false |
93 +-------------------+---------------+
94 | None | null |
95 +-------------------+---------------+
97 To extend this to recognize other objects, subclass and implement a
98 ``.default()`` method with another method that returns a serializable
99 object for ``o`` if possible, otherwise it should call the superclass
100 implementation (to raise ``TypeError``).
103 item_separator = ', '
104 key_separator = ': '
105 def __init__(self, skipkeys=False, ensure_ascii=True,
106 check_circular=True, allow_nan=True, sort_keys=False,
107 indent=None, separators=None, encoding='utf-8', default=None):
108 """Constructor for JSONEncoder, with sensible defaults.
110 If skipkeys is false, then it is a TypeError to attempt
111 encoding of keys that are not str, int, long, float or None. If
112 skipkeys is True, such items are simply skipped.
114 If ensure_ascii is true, the output is guaranteed to be str
115 objects with all incoming unicode characters escaped. If
116 ensure_ascii is false, the output will be unicode object.
118 If check_circular is true, then lists, dicts, and custom encoded
119 objects will be checked for circular references during encoding to
120 prevent an infinite recursion (which would cause an OverflowError).
121 Otherwise, no such check takes place.
123 If allow_nan is true, then NaN, Infinity, and -Infinity will be
124 encoded as such. This behavior is not JSON specification compliant,
125 but is consistent with most JavaScript based encoders and decoders.
126 Otherwise, it will be a ValueError to encode such floats.
128 If sort_keys is true, then the output of dictionaries will be
129 sorted by key; this is useful for regression tests to ensure
130 that JSON serializations can be compared on a day-to-day basis.
132 If indent is a non-negative integer, then JSON array
133 elements and object members will be pretty-printed with that
134 indent level. An indent level of 0 will only insert newlines.
135 None is the most compact representation.
137 If specified, separators should be a (item_separator, key_separator)
138 tuple. The default is (', ', ': '). To get the most compact JSON
139 representation you should specify (',', ':') to eliminate whitespace.
141 If specified, default is a function that gets called for objects
142 that can't otherwise be serialized. It should return a JSON encodable
143 version of the object or raise a ``TypeError``.
145 If encoding is not None, then all input strings will be
146 transformed into unicode using that encoding prior to JSON-encoding.
147 The default is UTF-8.
151 self.skipkeys = skipkeys
152 self.ensure_ascii = ensure_ascii
153 self.check_circular = check_circular
154 self.allow_nan = allow_nan
155 self.sort_keys = sort_keys
156 self.indent = indent
157 if separators is not None:
158 self.item_separator, self.key_separator = separators
159 if default is not None:
160 self.default = default
161 self.encoding = encoding
163 def default(self, o):
164 """Implement this method in a subclass such that it returns
165 a serializable object for ``o``, or calls the base implementation
166 (to raise a ``TypeError``).
168 For example, to support arbitrary iterators, you could
169 implement default like this::
171 def default(self, o):
172 try:
173 iterable = iter(o)
174 except TypeError:
175 pass
176 else:
177 return list(iterable)
178 return JSONEncoder.default(self, o)
181 raise TypeError(repr(o) + " is not JSON serializable")
183 def encode(self, o):
184 """Return a JSON string representation of a Python data structure.
186 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
187 '{"foo": ["bar", "baz"]}'
190 # This is for extremely simple cases and benchmarks.
191 if isinstance(o, basestring):
192 if isinstance(o, str):
193 _encoding = self.encoding
194 if (_encoding is not None
195 and not (_encoding == 'utf-8')):
196 o = o.decode(_encoding)
197 if self.ensure_ascii:
198 return encode_basestring_ascii(o)
199 else:
200 return encode_basestring(o)
201 # This doesn't pass the iterator directly to ''.join() because the
202 # exceptions aren't as detailed. The list call should be roughly
203 # equivalent to the PySequence_Fast that ''.join() would do.
204 chunks = self.iterencode(o, _one_shot=True)
205 if not isinstance(chunks, (list, tuple)):
206 chunks = list(chunks)
207 if self.ensure_ascii:
208 return ''.join(chunks)
209 else:
210 return u''.join(chunks)
212 def iterencode(self, o, _one_shot=False):
213 """Encode the given object and yield each string
214 representation as available.
216 For example::
218 for chunk in JSONEncoder().iterencode(bigobject):
219 mysocket.write(chunk)
222 if self.check_circular:
223 markers = {}
224 else:
225 markers = None
226 if self.ensure_ascii:
227 _encoder = encode_basestring_ascii
228 else:
229 _encoder = encode_basestring
230 if self.encoding != 'utf-8':
231 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
232 if isinstance(o, str):
233 o = o.decode(_encoding)
234 return _orig_encoder(o)
236 def floatstr(o, allow_nan=self.allow_nan,
237 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
238 # Check for specials. Note that this type of test is processor
239 # and/or platform-specific, so do tests which don't depend on
240 # the internals.
242 if o != o:
243 text = 'NaN'
244 elif o == _inf:
245 text = 'Infinity'
246 elif o == _neginf:
247 text = '-Infinity'
248 else:
249 return _repr(o)
251 if not allow_nan:
252 raise ValueError(
253 "Out of range float values are not JSON compliant: " +
254 repr(o))
256 return text
259 if (_one_shot and c_make_encoder is not None
260 and not self.indent and not self.sort_keys):
261 _iterencode = c_make_encoder(
262 markers, self.default, _encoder, self.indent,
263 self.key_separator, self.item_separator, self.sort_keys,
264 self.skipkeys, self.allow_nan)
265 else:
266 _iterencode = _make_iterencode(
267 markers, self.default, _encoder, self.indent, floatstr,
268 self.key_separator, self.item_separator, self.sort_keys,
269 self.skipkeys, _one_shot)
270 return _iterencode(o, 0)
272 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
273 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
274 ## HACK: hand-optimized bytecode; turn globals into locals
275 False=False,
276 True=True,
277 ValueError=ValueError,
278 basestring=basestring,
279 dict=dict,
280 float=float,
281 id=id,
282 int=int,
283 isinstance=isinstance,
284 list=list,
285 long=long,
286 str=str,
287 tuple=tuple,
290 def _iterencode_list(lst, _current_indent_level):
291 if not lst:
292 yield '[]'
293 return
294 if markers is not None:
295 markerid = id(lst)
296 if markerid in markers:
297 raise ValueError("Circular reference detected")
298 markers[markerid] = lst
299 buf = '['
300 if _indent is not None:
301 _current_indent_level += 1
302 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
303 separator = _item_separator + newline_indent
304 buf += newline_indent
305 else:
306 newline_indent = None
307 separator = _item_separator
308 first = True
309 for value in lst:
310 if first:
311 first = False
312 else:
313 buf = separator
314 if isinstance(value, basestring):
315 yield buf + _encoder(value)
316 elif value is None:
317 yield buf + 'null'
318 elif value is True:
319 yield buf + 'true'
320 elif value is False:
321 yield buf + 'false'
322 elif isinstance(value, (int, long)):
323 yield buf + str(value)
324 elif isinstance(value, float):
325 yield buf + _floatstr(value)
326 else:
327 yield buf
328 if isinstance(value, (list, tuple)):
329 chunks = _iterencode_list(value, _current_indent_level)
330 elif isinstance(value, dict):
331 chunks = _iterencode_dict(value, _current_indent_level)
332 else:
333 chunks = _iterencode(value, _current_indent_level)
334 for chunk in chunks:
335 yield chunk
336 if newline_indent is not None:
337 _current_indent_level -= 1
338 yield '\n' + (' ' * (_indent * _current_indent_level))
339 yield ']'
340 if markers is not None:
341 del markers[markerid]
343 def _iterencode_dict(dct, _current_indent_level):
344 if not dct:
345 yield '{}'
346 return
347 if markers is not None:
348 markerid = id(dct)
349 if markerid in markers:
350 raise ValueError("Circular reference detected")
351 markers[markerid] = dct
352 yield '{'
353 if _indent is not None:
354 _current_indent_level += 1
355 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
356 item_separator = _item_separator + newline_indent
357 yield newline_indent
358 else:
359 newline_indent = None
360 item_separator = _item_separator
361 first = True
362 if _sort_keys:
363 items = dct.items()
364 items.sort(key=lambda kv: kv[0])
365 else:
366 items = dct.iteritems()
367 for key, value in items:
368 if isinstance(key, basestring):
369 pass
370 # JavaScript is weakly typed for these, so it makes sense to
371 # also allow them. Many encoders seem to do something like this.
372 elif isinstance(key, float):
373 key = _floatstr(key)
374 elif key is True:
375 key = 'true'
376 elif key is False:
377 key = 'false'
378 elif key is None:
379 key = 'null'
380 elif isinstance(key, (int, long)):
381 key = str(key)
382 elif _skipkeys:
383 continue
384 else:
385 raise TypeError("key " + repr(key) + " is not a string")
386 if first:
387 first = False
388 else:
389 yield item_separator
390 yield _encoder(key)
391 yield _key_separator
392 if isinstance(value, basestring):
393 yield _encoder(value)
394 elif value is None:
395 yield 'null'
396 elif value is True:
397 yield 'true'
398 elif value is False:
399 yield 'false'
400 elif isinstance(value, (int, long)):
401 yield str(value)
402 elif isinstance(value, float):
403 yield _floatstr(value)
404 else:
405 if isinstance(value, (list, tuple)):
406 chunks = _iterencode_list(value, _current_indent_level)
407 elif isinstance(value, dict):
408 chunks = _iterencode_dict(value, _current_indent_level)
409 else:
410 chunks = _iterencode(value, _current_indent_level)
411 for chunk in chunks:
412 yield chunk
413 if newline_indent is not None:
414 _current_indent_level -= 1
415 yield '\n' + (' ' * (_indent * _current_indent_level))
416 yield '}'
417 if markers is not None:
418 del markers[markerid]
420 def _iterencode(o, _current_indent_level):
421 if isinstance(o, basestring):
422 yield _encoder(o)
423 elif o is None:
424 yield 'null'
425 elif o is True:
426 yield 'true'
427 elif o is False:
428 yield 'false'
429 elif isinstance(o, (int, long)):
430 yield str(o)
431 elif isinstance(o, float):
432 yield _floatstr(o)
433 elif isinstance(o, (list, tuple)):
434 for chunk in _iterencode_list(o, _current_indent_level):
435 yield chunk
436 elif isinstance(o, dict):
437 for chunk in _iterencode_dict(o, _current_indent_level):
438 yield chunk
439 else:
440 if markers is not None:
441 markerid = id(o)
442 if markerid in markers:
443 raise ValueError("Circular reference detected")
444 markers[markerid] = o
445 o = _default(o)
446 for chunk in _iterencode(o, _current_indent_level):
447 yield chunk
448 if markers is not None:
449 del markers[markerid]
451 return _iterencode