issue5063: Fixes for building RPM on CentOS plus misc .spec file enhancements.
[python.git] / Lib / json / encoder.py
blob027a1a6b3cd08458e95e5f401d6a8c4a31f3b49c
1 """Implementation of JSONEncoder
2 """
3 import re
5 try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7 except ImportError:
8 c_encode_basestring_ascii = None
9 try:
10 from _json import make_encoder as c_make_encoder
11 except ImportError:
12 c_make_encoder = None
14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16 HAS_UTF8 = re.compile(r'[\x80-\xff]')
17 ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
26 for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
30 # Assume this produces an infinity on all machines (probably not guaranteed)
31 INFINITY = float('1e66666')
32 FLOAT_REPR = repr
34 def encode_basestring(s):
35 """Return a JSON representation of a Python string
37 """
38 def replace(match):
39 return ESCAPE_DCT[match.group(0)]
40 return '"' + ESCAPE.sub(replace, s) + '"'
43 def py_encode_basestring_ascii(s):
44 """Return an ASCII-only JSON representation of a Python string
46 """
47 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
48 s = s.decode('utf-8')
49 def replace(match):
50 s = match.group(0)
51 try:
52 return ESCAPE_DCT[s]
53 except KeyError:
54 n = ord(s)
55 if n < 0x10000:
56 return '\\u{0:04x}'.format(n)
57 #return '\\u%04x' % (n,)
58 else:
59 # surrogate pair
60 n -= 0x10000
61 s1 = 0xd800 | ((n >> 10) & 0x3ff)
62 s2 = 0xdc00 | (n & 0x3ff)
63 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
64 #return '\\u%04x\\u%04x' % (s1, s2)
65 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
68 encode_basestring_ascii = (
69 c_encode_basestring_ascii or py_encode_basestring_ascii)
71 class JSONEncoder(object):
72 """Extensible JSON <http://json.org> encoder for Python data structures.
74 Supports the following objects and types by default:
76 +-------------------+---------------+
77 | Python | JSON |
78 +===================+===============+
79 | dict | object |
80 +-------------------+---------------+
81 | list, tuple | array |
82 +-------------------+---------------+
83 | str, unicode | string |
84 +-------------------+---------------+
85 | int, long, float | number |
86 +-------------------+---------------+
87 | True | true |
88 +-------------------+---------------+
89 | False | false |
90 +-------------------+---------------+
91 | None | null |
92 +-------------------+---------------+
94 To extend this to recognize other objects, subclass and implement a
95 ``.default()`` method with another method that returns a serializable
96 object for ``o`` if possible, otherwise it should call the superclass
97 implementation (to raise ``TypeError``).
99 """
100 item_separator = ', '
101 key_separator = ': '
102 def __init__(self, skipkeys=False, ensure_ascii=True,
103 check_circular=True, allow_nan=True, sort_keys=False,
104 indent=None, separators=None, encoding='utf-8', default=None):
105 """Constructor for JSONEncoder, with sensible defaults.
107 If skipkeys is false, then it is a TypeError to attempt
108 encoding of keys that are not str, int, long, float or None. If
109 skipkeys is True, such items are simply skipped.
111 If ensure_ascii is true, the output is guaranteed to be str
112 objects with all incoming unicode characters escaped. If
113 ensure_ascii is false, the output will be unicode object.
115 If check_circular is true, then lists, dicts, and custom encoded
116 objects will be checked for circular references during encoding to
117 prevent an infinite recursion (which would cause an OverflowError).
118 Otherwise, no such check takes place.
120 If allow_nan is true, then NaN, Infinity, and -Infinity will be
121 encoded as such. This behavior is not JSON specification compliant,
122 but is consistent with most JavaScript based encoders and decoders.
123 Otherwise, it will be a ValueError to encode such floats.
125 If sort_keys is true, then the output of dictionaries will be
126 sorted by key; this is useful for regression tests to ensure
127 that JSON serializations can be compared on a day-to-day basis.
129 If indent is a non-negative integer, then JSON array
130 elements and object members will be pretty-printed with that
131 indent level. An indent level of 0 will only insert newlines.
132 None is the most compact representation.
134 If specified, separators should be a (item_separator, key_separator)
135 tuple. The default is (', ', ': '). To get the most compact JSON
136 representation you should specify (',', ':') to eliminate whitespace.
138 If specified, default is a function that gets called for objects
139 that can't otherwise be serialized. It should return a JSON encodable
140 version of the object or raise a ``TypeError``.
142 If encoding is not None, then all input strings will be
143 transformed into unicode using that encoding prior to JSON-encoding.
144 The default is UTF-8.
148 self.skipkeys = skipkeys
149 self.ensure_ascii = ensure_ascii
150 self.check_circular = check_circular
151 self.allow_nan = allow_nan
152 self.sort_keys = sort_keys
153 self.indent = indent
154 if separators is not None:
155 self.item_separator, self.key_separator = separators
156 if default is not None:
157 self.default = default
158 self.encoding = encoding
160 def default(self, o):
161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
165 For example, to support arbitrary iterators, you could
166 implement default like this::
168 def default(self, o):
169 try:
170 iterable = iter(o)
171 except TypeError:
172 pass
173 else:
174 return list(iterable)
175 return JSONEncoder.default(self, o)
178 raise TypeError(repr(o) + " is not JSON serializable")
180 def encode(self, o):
181 """Return a JSON string representation of a Python data structure.
183 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
184 '{"foo": ["bar", "baz"]}'
187 # This is for extremely simple cases and benchmarks.
188 if isinstance(o, basestring):
189 if isinstance(o, str):
190 _encoding = self.encoding
191 if (_encoding is not None
192 and not (_encoding == 'utf-8')):
193 o = o.decode(_encoding)
194 if self.ensure_ascii:
195 return encode_basestring_ascii(o)
196 else:
197 return encode_basestring(o)
198 # This doesn't pass the iterator directly to ''.join() because the
199 # exceptions aren't as detailed. The list call should be roughly
200 # equivalent to the PySequence_Fast that ''.join() would do.
201 chunks = self.iterencode(o, _one_shot=True)
202 if not isinstance(chunks, (list, tuple)):
203 chunks = list(chunks)
204 return ''.join(chunks)
206 def iterencode(self, o, _one_shot=False):
207 """Encode the given object and yield each string
208 representation as available.
210 For example::
212 for chunk in JSONEncoder().iterencode(bigobject):
213 mysocket.write(chunk)
216 if self.check_circular:
217 markers = {}
218 else:
219 markers = None
220 if self.ensure_ascii:
221 _encoder = encode_basestring_ascii
222 else:
223 _encoder = encode_basestring
224 if self.encoding != 'utf-8':
225 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
226 if isinstance(o, str):
227 o = o.decode(_encoding)
228 return _orig_encoder(o)
230 def floatstr(o, allow_nan=self.allow_nan,
231 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
232 # Check for specials. Note that this type of test is processor
233 # and/or platform-specific, so do tests which don't depend on the
234 # internals.
236 if o != o:
237 text = 'NaN'
238 elif o == _inf:
239 text = 'Infinity'
240 elif o == _neginf:
241 text = '-Infinity'
242 else:
243 return _repr(o)
245 if not allow_nan:
246 raise ValueError(
247 "Out of range float values are not JSON compliant: " +
248 repr(o))
250 return text
253 if (_one_shot and c_make_encoder is not None
254 and not self.indent and not self.sort_keys):
255 _iterencode = c_make_encoder(
256 markers, self.default, _encoder, self.indent,
257 self.key_separator, self.item_separator, self.sort_keys,
258 self.skipkeys, self.allow_nan)
259 else:
260 _iterencode = _make_iterencode(
261 markers, self.default, _encoder, self.indent, floatstr,
262 self.key_separator, self.item_separator, self.sort_keys,
263 self.skipkeys, _one_shot)
264 return _iterencode(o, 0)
266 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
267 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
268 ## HACK: hand-optimized bytecode; turn globals into locals
269 False=False,
270 True=True,
271 ValueError=ValueError,
272 basestring=basestring,
273 dict=dict,
274 float=float,
275 id=id,
276 int=int,
277 isinstance=isinstance,
278 list=list,
279 long=long,
280 str=str,
281 tuple=tuple,
284 def _iterencode_list(lst, _current_indent_level):
285 if not lst:
286 yield '[]'
287 return
288 if markers is not None:
289 markerid = id(lst)
290 if markerid in markers:
291 raise ValueError("Circular reference detected")
292 markers[markerid] = lst
293 buf = '['
294 if _indent is not None:
295 _current_indent_level += 1
296 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
297 separator = _item_separator + newline_indent
298 buf += newline_indent
299 else:
300 newline_indent = None
301 separator = _item_separator
302 first = True
303 for value in lst:
304 if first:
305 first = False
306 else:
307 buf = separator
308 if isinstance(value, basestring):
309 yield buf + _encoder(value)
310 elif value is None:
311 yield buf + 'null'
312 elif value is True:
313 yield buf + 'true'
314 elif value is False:
315 yield buf + 'false'
316 elif isinstance(value, (int, long)):
317 yield buf + str(value)
318 elif isinstance(value, float):
319 yield buf + _floatstr(value)
320 else:
321 yield buf
322 if isinstance(value, (list, tuple)):
323 chunks = _iterencode_list(value, _current_indent_level)
324 elif isinstance(value, dict):
325 chunks = _iterencode_dict(value, _current_indent_level)
326 else:
327 chunks = _iterencode(value, _current_indent_level)
328 for chunk in chunks:
329 yield chunk
330 if newline_indent is not None:
331 _current_indent_level -= 1
332 yield '\n' + (' ' * (_indent * _current_indent_level))
333 yield ']'
334 if markers is not None:
335 del markers[markerid]
337 def _iterencode_dict(dct, _current_indent_level):
338 if not dct:
339 yield '{}'
340 return
341 if markers is not None:
342 markerid = id(dct)
343 if markerid in markers:
344 raise ValueError("Circular reference detected")
345 markers[markerid] = dct
346 yield '{'
347 if _indent is not None:
348 _current_indent_level += 1
349 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
350 item_separator = _item_separator + newline_indent
351 yield newline_indent
352 else:
353 newline_indent = None
354 item_separator = _item_separator
355 first = True
356 if _sort_keys:
357 items = dct.items()
358 items.sort(key=lambda kv: kv[0])
359 else:
360 items = dct.iteritems()
361 for key, value in items:
362 if isinstance(key, basestring):
363 pass
364 # JavaScript is weakly typed for these, so it makes sense to
365 # also allow them. Many encoders seem to do something like this.
366 elif isinstance(key, float):
367 key = _floatstr(key)
368 elif key is True:
369 key = 'true'
370 elif key is False:
371 key = 'false'
372 elif key is None:
373 key = 'null'
374 elif isinstance(key, (int, long)):
375 key = str(key)
376 elif _skipkeys:
377 continue
378 else:
379 raise TypeError("key " + repr(key) + " is not a string")
380 if first:
381 first = False
382 else:
383 yield item_separator
384 yield _encoder(key)
385 yield _key_separator
386 if isinstance(value, basestring):
387 yield _encoder(value)
388 elif value is None:
389 yield 'null'
390 elif value is True:
391 yield 'true'
392 elif value is False:
393 yield 'false'
394 elif isinstance(value, (int, long)):
395 yield str(value)
396 elif isinstance(value, float):
397 yield _floatstr(value)
398 else:
399 if isinstance(value, (list, tuple)):
400 chunks = _iterencode_list(value, _current_indent_level)
401 elif isinstance(value, dict):
402 chunks = _iterencode_dict(value, _current_indent_level)
403 else:
404 chunks = _iterencode(value, _current_indent_level)
405 for chunk in chunks:
406 yield chunk
407 if newline_indent is not None:
408 _current_indent_level -= 1
409 yield '\n' + (' ' * (_indent * _current_indent_level))
410 yield '}'
411 if markers is not None:
412 del markers[markerid]
414 def _iterencode(o, _current_indent_level):
415 if isinstance(o, basestring):
416 yield _encoder(o)
417 elif o is None:
418 yield 'null'
419 elif o is True:
420 yield 'true'
421 elif o is False:
422 yield 'false'
423 elif isinstance(o, (int, long)):
424 yield str(o)
425 elif isinstance(o, float):
426 yield _floatstr(o)
427 elif isinstance(o, (list, tuple)):
428 for chunk in _iterencode_list(o, _current_indent_level):
429 yield chunk
430 elif isinstance(o, dict):
431 for chunk in _iterencode_dict(o, _current_indent_level):
432 yield chunk
433 else:
434 if markers is not None:
435 markerid = id(o)
436 if markerid in markers:
437 raise ValueError("Circular reference detected")
438 markers[markerid] = o
439 o = _default(o)
440 for chunk in _iterencode(o, _current_indent_level):
441 yield chunk
442 if markers is not None:
443 del markers[markerid]
445 return _iterencode