commands: Don't expandvars() the command since 'sh -c' doess it for us
[git-cola.git] / thirdparty / simplejson / encoder.py
blob31c3023d5325b3d08a725a7ca26f212ab874736f
1 """Implementation of JSONEncoder
2 """
3 import re
5 try:
6 from simplejson._speedups import encode_basestring_ascii as \
7 c_encode_basestring_ascii
8 except ImportError:
9 c_encode_basestring_ascii = None
10 try:
11 from simplejson._speedups import make_encoder as c_make_encoder
12 except ImportError:
13 c_make_encoder = None
15 from simplejson.decoder import PosInf
17 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
18 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
19 HAS_UTF8 = re.compile(r'[\x80-\xff]')
20 ESCAPE_DCT = {
21 '\\': '\\\\',
22 '"': '\\"',
23 '\b': '\\b',
24 '\f': '\\f',
25 '\n': '\\n',
26 '\r': '\\r',
27 '\t': '\\t',
29 for i in range(0x20):
30 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
31 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33 FLOAT_REPR = repr
35 def encode_basestring(s):
36 """Return a JSON representation of a Python string
38 """
39 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
40 s = s.decode('utf-8')
41 def replace(match):
42 return ESCAPE_DCT[match.group(0)]
43 return u'"' + ESCAPE.sub(replace, s) + u'"'
46 def py_encode_basestring_ascii(s):
47 """Return an ASCII-only JSON representation of a Python string
49 """
50 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
51 s = s.decode('utf-8')
52 def replace(match):
53 s = match.group(0)
54 try:
55 return ESCAPE_DCT[s]
56 except KeyError:
57 n = ord(s)
58 if n < 0x10000:
59 #return '\\u{0:04x}'.format(n)
60 return '\\u%04x' % (n,)
61 else:
62 # surrogate pair
63 n -= 0x10000
64 s1 = 0xd800 | ((n >> 10) & 0x3ff)
65 s2 = 0xdc00 | (n & 0x3ff)
66 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
67 return '\\u%04x\\u%04x' % (s1, s2)
68 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
71 encode_basestring_ascii = (
72 c_encode_basestring_ascii or py_encode_basestring_ascii)
74 class JSONEncoder(object):
75 """Extensible JSON <http://json.org> encoder for Python data structures.
77 Supports the following objects and types by default:
79 +-------------------+---------------+
80 | Python | JSON |
81 +===================+===============+
82 | dict | object |
83 +-------------------+---------------+
84 | list, tuple | array |
85 +-------------------+---------------+
86 | str, unicode | string |
87 +-------------------+---------------+
88 | int, long, float | number |
89 +-------------------+---------------+
90 | True | true |
91 +-------------------+---------------+
92 | False | false |
93 +-------------------+---------------+
94 | None | null |
95 +-------------------+---------------+
97 To extend this to recognize other objects, subclass and implement a
98 ``.default()`` method with another method that returns a serializable
99 object for ``o`` if possible, otherwise it should call the superclass
100 implementation (to raise ``TypeError``).
103 item_separator = ', '
104 key_separator = ': '
105 def __init__(self, skipkeys=False, ensure_ascii=True,
106 check_circular=True, allow_nan=True, sort_keys=False,
107 indent=None, separators=None, encoding='utf-8', default=None):
108 """Constructor for JSONEncoder, with sensible defaults.
110 If skipkeys is false, then it is a TypeError to attempt
111 encoding of keys that are not str, int, long, float or None. If
112 skipkeys is True, such items are simply skipped.
114 If ensure_ascii is true, the output is guaranteed to be str
115 objects with all incoming unicode characters escaped. If
116 ensure_ascii is false, the output will be unicode object.
118 If check_circular is true, then lists, dicts, and custom encoded
119 objects will be checked for circular references during encoding to
120 prevent an infinite recursion (which would cause an OverflowError).
121 Otherwise, no such check takes place.
123 If allow_nan is true, then NaN, Infinity, and -Infinity will be
124 encoded as such. This behavior is not JSON specification compliant,
125 but is consistent with most JavaScript based encoders and decoders.
126 Otherwise, it will be a ValueError to encode such floats.
128 If sort_keys is true, then the output of dictionaries will be
129 sorted by key; this is useful for regression tests to ensure
130 that JSON serializations can be compared on a day-to-day basis.
132 If indent is a string, then JSON array elements and object members
133 will be pretty-printed with a newline followed by that string repeated
134 for each level of nesting. ``None`` (the default) selects the most compact
135 representation without any newlines. For backwards compatibility with
136 versions of simplejson earlier than 2.1.0, an integer is also accepted
137 and is converted to a string with that many spaces.
139 If specified, separators should be a (item_separator, key_separator)
140 tuple. The default is (', ', ': '). To get the most compact JSON
141 representation you should specify (',', ':') to eliminate whitespace.
143 If specified, default is a function that gets called for objects
144 that can't otherwise be serialized. It should return a JSON encodable
145 version of the object or raise a ``TypeError``.
147 If encoding is not None, then all input strings will be
148 transformed into unicode using that encoding prior to JSON-encoding.
149 The default is UTF-8.
153 self.skipkeys = skipkeys
154 self.ensure_ascii = ensure_ascii
155 self.check_circular = check_circular
156 self.allow_nan = allow_nan
157 self.sort_keys = sort_keys
158 if isinstance(indent, (int, long)):
159 indent = ' ' * indent
160 self.indent = indent
161 if separators is not None:
162 self.item_separator, self.key_separator = separators
163 if default is not None:
164 self.default = default
165 self.encoding = encoding
167 def default(self, o):
168 """Implement this method in a subclass such that it returns
169 a serializable object for ``o``, or calls the base implementation
170 (to raise a ``TypeError``).
172 For example, to support arbitrary iterators, you could
173 implement default like this::
175 def default(self, o):
176 try:
177 iterable = iter(o)
178 except TypeError:
179 pass
180 else:
181 return list(iterable)
182 return JSONEncoder.default(self, o)
185 raise TypeError(repr(o) + " is not JSON serializable")
187 def encode(self, o):
188 """Return a JSON string representation of a Python data structure.
190 >>> from simplejson import JSONEncoder
191 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
192 '{"foo": ["bar", "baz"]}'
195 # This is for extremely simple cases and benchmarks.
196 if isinstance(o, basestring):
197 if isinstance(o, str):
198 _encoding = self.encoding
199 if (_encoding is not None
200 and not (_encoding == 'utf-8')):
201 o = o.decode(_encoding)
202 if self.ensure_ascii:
203 return encode_basestring_ascii(o)
204 else:
205 return encode_basestring(o)
206 # This doesn't pass the iterator directly to ''.join() because the
207 # exceptions aren't as detailed. The list call should be roughly
208 # equivalent to the PySequence_Fast that ''.join() would do.
209 chunks = self.iterencode(o, _one_shot=True)
210 if not isinstance(chunks, (list, tuple)):
211 chunks = list(chunks)
212 if self.ensure_ascii:
213 return ''.join(chunks)
214 else:
215 return u''.join(chunks)
217 def iterencode(self, o, _one_shot=False):
218 """Encode the given object and yield each string
219 representation as available.
221 For example::
223 for chunk in JSONEncoder().iterencode(bigobject):
224 mysocket.write(chunk)
227 if self.check_circular:
228 markers = {}
229 else:
230 markers = None
231 if self.ensure_ascii:
232 _encoder = encode_basestring_ascii
233 else:
234 _encoder = encode_basestring
235 if self.encoding != 'utf-8':
236 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
237 if isinstance(o, str):
238 o = o.decode(_encoding)
239 return _orig_encoder(o)
241 def floatstr(o, allow_nan=self.allow_nan,
242 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
243 # Check for specials. Note that this type of test is processor
244 # and/or platform-specific, so do tests which don't depend on
245 # the internals.
247 if o != o:
248 text = 'NaN'
249 elif o == _inf:
250 text = 'Infinity'
251 elif o == _neginf:
252 text = '-Infinity'
253 else:
254 return _repr(o)
256 if not allow_nan:
257 raise ValueError(
258 "Out of range float values are not JSON compliant: " +
259 repr(o))
261 return text
264 if (_one_shot and c_make_encoder is not None
265 and not self.indent and not self.sort_keys):
266 _iterencode = c_make_encoder(
267 markers, self.default, _encoder, self.indent,
268 self.key_separator, self.item_separator, self.sort_keys,
269 self.skipkeys, self.allow_nan)
270 else:
271 _iterencode = _make_iterencode(
272 markers, self.default, _encoder, self.indent, floatstr,
273 self.key_separator, self.item_separator, self.sort_keys,
274 self.skipkeys, _one_shot)
275 return _iterencode(o, 0)
277 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
278 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
279 ## HACK: hand-optimized bytecode; turn globals into locals
280 False=False,
281 True=True,
282 ValueError=ValueError,
283 basestring=basestring,
284 dict=dict,
285 float=float,
286 id=id,
287 int=int,
288 isinstance=isinstance,
289 list=list,
290 long=long,
291 str=str,
292 tuple=tuple,
295 def _iterencode_list(lst, _current_indent_level):
296 if not lst:
297 yield '[]'
298 return
299 if markers is not None:
300 markerid = id(lst)
301 if markerid in markers:
302 raise ValueError("Circular reference detected")
303 markers[markerid] = lst
304 buf = '['
305 if _indent is not None:
306 _current_indent_level += 1
307 newline_indent = '\n' + (_indent * _current_indent_level)
308 separator = _item_separator + newline_indent
309 buf += newline_indent
310 else:
311 newline_indent = None
312 separator = _item_separator
313 first = True
314 for value in lst:
315 if first:
316 first = False
317 else:
318 buf = separator
319 if isinstance(value, basestring):
320 yield buf + _encoder(value)
321 elif value is None:
322 yield buf + 'null'
323 elif value is True:
324 yield buf + 'true'
325 elif value is False:
326 yield buf + 'false'
327 elif isinstance(value, (int, long)):
328 yield buf + str(value)
329 elif isinstance(value, float):
330 yield buf + _floatstr(value)
331 else:
332 yield buf
333 if isinstance(value, (list, tuple)):
334 chunks = _iterencode_list(value, _current_indent_level)
335 elif isinstance(value, dict):
336 chunks = _iterencode_dict(value, _current_indent_level)
337 else:
338 chunks = _iterencode(value, _current_indent_level)
339 for chunk in chunks:
340 yield chunk
341 if newline_indent is not None:
342 _current_indent_level -= 1
343 yield '\n' + (_indent * _current_indent_level)
344 yield ']'
345 if markers is not None:
346 del markers[markerid]
348 def _iterencode_dict(dct, _current_indent_level):
349 if not dct:
350 yield '{}'
351 return
352 if markers is not None:
353 markerid = id(dct)
354 if markerid in markers:
355 raise ValueError("Circular reference detected")
356 markers[markerid] = dct
357 yield '{'
358 if _indent is not None:
359 _current_indent_level += 1
360 newline_indent = '\n' + (_indent * _current_indent_level)
361 item_separator = _item_separator + newline_indent
362 yield newline_indent
363 else:
364 newline_indent = None
365 item_separator = _item_separator
366 first = True
367 if _sort_keys:
368 items = dct.items()
369 items.sort(key=lambda kv: kv[0])
370 else:
371 items = dct.iteritems()
372 for key, value in items:
373 if isinstance(key, basestring):
374 pass
375 # JavaScript is weakly typed for these, so it makes sense to
376 # also allow them. Many encoders seem to do something like this.
377 elif isinstance(key, float):
378 key = _floatstr(key)
379 elif key is True:
380 key = 'true'
381 elif key is False:
382 key = 'false'
383 elif key is None:
384 key = 'null'
385 elif isinstance(key, (int, long)):
386 key = str(key)
387 elif _skipkeys:
388 continue
389 else:
390 raise TypeError("key " + repr(key) + " is not a string")
391 if first:
392 first = False
393 else:
394 yield item_separator
395 yield _encoder(key)
396 yield _key_separator
397 if isinstance(value, basestring):
398 yield _encoder(value)
399 elif value is None:
400 yield 'null'
401 elif value is True:
402 yield 'true'
403 elif value is False:
404 yield 'false'
405 elif isinstance(value, (int, long)):
406 yield str(value)
407 elif isinstance(value, float):
408 yield _floatstr(value)
409 else:
410 if isinstance(value, (list, tuple)):
411 chunks = _iterencode_list(value, _current_indent_level)
412 elif isinstance(value, dict):
413 chunks = _iterencode_dict(value, _current_indent_level)
414 else:
415 chunks = _iterencode(value, _current_indent_level)
416 for chunk in chunks:
417 yield chunk
418 if newline_indent is not None:
419 _current_indent_level -= 1
420 yield '\n' + (_indent * _current_indent_level)
421 yield '}'
422 if markers is not None:
423 del markers[markerid]
425 def _iterencode(o, _current_indent_level):
426 if isinstance(o, basestring):
427 yield _encoder(o)
428 elif o is None:
429 yield 'null'
430 elif o is True:
431 yield 'true'
432 elif o is False:
433 yield 'false'
434 elif isinstance(o, (int, long)):
435 yield str(o)
436 elif isinstance(o, float):
437 yield _floatstr(o)
438 elif isinstance(o, (list, tuple)):
439 for chunk in _iterencode_list(o, _current_indent_level):
440 yield chunk
441 elif isinstance(o, dict):
442 for chunk in _iterencode_dict(o, _current_indent_level):
443 yield chunk
444 else:
445 if markers is not None:
446 markerid = id(o)
447 if markerid in markers:
448 raise ValueError("Circular reference detected")
449 markers[markerid] = o
450 o = _default(o)
451 for chunk in _iterencode(o, _current_indent_level):
452 yield chunk
453 if markers is not None:
454 del markers[markerid]
456 return _iterencode