1 """Implementation of JSONEncoder
6 from _json
import encode_basestring_ascii
as c_encode_basestring_ascii
8 c_encode_basestring_ascii
= None
10 from _json
import make_encoder
as c_make_encoder
14 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
16 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
27 ESCAPE_DCT
.setdefault(chr(i
), '\\u{0:04x}'.format(i
))
28 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
30 # Assume this produces an infinity on all machines (probably not guaranteed)
31 INFINITY
= float('1e66666')
34 def encode_basestring(s
):
35 """Return a JSON representation of a Python string
39 return ESCAPE_DCT
[match
.group(0)]
40 return '"' + ESCAPE
.sub(replace
, s
) + '"'
43 def py_encode_basestring_ascii(s
):
44 """Return an ASCII-only JSON representation of a Python string
47 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
56 return '\\u{0:04x}'.format(n
)
57 #return '\\u%04x' % (n,)
61 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
62 s2
= 0xdc00 |
(n
& 0x3ff)
63 return '\\u{0:04x}\\u{1:04x}'.format(s1
, s2
)
64 #return '\\u%04x\\u%04x' % (s1, s2)
65 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
68 encode_basestring_ascii
= (
69 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
71 class JSONEncoder(object):
72 """Extensible JSON <http://json.org> encoder for Python data structures.
74 Supports the following objects and types by default:
76 +-------------------+---------------+
78 +===================+===============+
80 +-------------------+---------------+
81 | list, tuple | array |
82 +-------------------+---------------+
83 | str, unicode | string |
84 +-------------------+---------------+
85 | int, long, float | number |
86 +-------------------+---------------+
88 +-------------------+---------------+
90 +-------------------+---------------+
92 +-------------------+---------------+
94 To extend this to recognize other objects, subclass and implement a
95 ``.default()`` method with another method that returns a serializable
96 object for ``o`` if possible, otherwise it should call the superclass
97 implementation (to raise ``TypeError``).
100 item_separator
= ', '
102 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
103 check_circular
=True, allow_nan
=True, sort_keys
=False,
104 indent
=None, separators
=None, encoding
='utf-8', default
=None):
105 """Constructor for JSONEncoder, with sensible defaults.
107 If skipkeys is false, then it is a TypeError to attempt
108 encoding of keys that are not str, int, long, float or None. If
109 skipkeys is True, such items are simply skipped.
111 If ensure_ascii is true, the output is guaranteed to be str
112 objects with all incoming unicode characters escaped. If
113 ensure_ascii is false, the output will be unicode object.
115 If check_circular is true, then lists, dicts, and custom encoded
116 objects will be checked for circular references during encoding to
117 prevent an infinite recursion (which would cause an OverflowError).
118 Otherwise, no such check takes place.
120 If allow_nan is true, then NaN, Infinity, and -Infinity will be
121 encoded as such. This behavior is not JSON specification compliant,
122 but is consistent with most JavaScript based encoders and decoders.
123 Otherwise, it will be a ValueError to encode such floats.
125 If sort_keys is true, then the output of dictionaries will be
126 sorted by key; this is useful for regression tests to ensure
127 that JSON serializations can be compared on a day-to-day basis.
129 If indent is a non-negative integer, then JSON array
130 elements and object members will be pretty-printed with that
131 indent level. An indent level of 0 will only insert newlines.
132 None is the most compact representation.
134 If specified, separators should be a (item_separator, key_separator)
135 tuple. The default is (', ', ': '). To get the most compact JSON
136 representation you should specify (',', ':') to eliminate whitespace.
138 If specified, default is a function that gets called for objects
139 that can't otherwise be serialized. It should return a JSON encodable
140 version of the object or raise a ``TypeError``.
142 If encoding is not None, then all input strings will be
143 transformed into unicode using that encoding prior to JSON-encoding.
144 The default is UTF-8.
148 self
.skipkeys
= skipkeys
149 self
.ensure_ascii
= ensure_ascii
150 self
.check_circular
= check_circular
151 self
.allow_nan
= allow_nan
152 self
.sort_keys
= sort_keys
154 if separators
is not None:
155 self
.item_separator
, self
.key_separator
= separators
156 if default
is not None:
157 self
.default
= default
158 self
.encoding
= encoding
160 def default(self
, o
):
161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
165 For example, to support arbitrary iterators, you could
166 implement default like this::
168 def default(self, o):
174 return list(iterable)
175 return JSONEncoder.default(self, o)
178 raise TypeError(repr(o
) + " is not JSON serializable")
181 """Return a JSON string representation of a Python data structure.
183 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
184 '{"foo": ["bar", "baz"]}'
187 # This is for extremely simple cases and benchmarks.
188 if isinstance(o
, basestring
):
189 if isinstance(o
, str):
190 _encoding
= self
.encoding
191 if (_encoding
is not None
192 and not (_encoding
== 'utf-8')):
193 o
= o
.decode(_encoding
)
194 if self
.ensure_ascii
:
195 return encode_basestring_ascii(o
)
197 return encode_basestring(o
)
198 # This doesn't pass the iterator directly to ''.join() because the
199 # exceptions aren't as detailed. The list call should be roughly
200 # equivalent to the PySequence_Fast that ''.join() would do.
201 chunks
= self
.iterencode(o
, _one_shot
=True)
202 if not isinstance(chunks
, (list, tuple)):
203 chunks
= list(chunks
)
204 return ''.join(chunks
)
206 def iterencode(self
, o
, _one_shot
=False):
207 """Encode the given object and yield each string
208 representation as available.
212 for chunk in JSONEncoder().iterencode(bigobject):
213 mysocket.write(chunk)
216 if self
.check_circular
:
220 if self
.ensure_ascii
:
221 _encoder
= encode_basestring_ascii
223 _encoder
= encode_basestring
224 if self
.encoding
!= 'utf-8':
225 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
226 if isinstance(o
, str):
227 o
= o
.decode(_encoding
)
228 return _orig_encoder(o
)
230 def floatstr(o
, allow_nan
=self
.allow_nan
,
231 _repr
=FLOAT_REPR
, _inf
=INFINITY
, _neginf
=-INFINITY
):
232 # Check for specials. Note that this type of test is processor
233 # and/or platform-specific, so do tests which don't depend on the
247 "Out of range float values are not JSON compliant: " +
253 if (_one_shot
and c_make_encoder
is not None
254 and not self
.indent
and not self
.sort_keys
):
255 _iterencode
= c_make_encoder(
256 markers
, self
.default
, _encoder
, self
.indent
,
257 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
258 self
.skipkeys
, self
.allow_nan
)
260 _iterencode
= _make_iterencode(
261 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
262 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
263 self
.skipkeys
, _one_shot
)
264 return _iterencode(o
, 0)
266 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
267 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
268 ## HACK: hand-optimized bytecode; turn globals into locals
271 ValueError=ValueError,
272 basestring
=basestring
,
277 isinstance=isinstance,
284 def _iterencode_list(lst
, _current_indent_level
):
288 if markers
is not None:
290 if markerid
in markers
:
291 raise ValueError("Circular reference detected")
292 markers
[markerid
] = lst
294 if _indent
is not None:
295 _current_indent_level
+= 1
296 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
297 separator
= _item_separator
+ newline_indent
298 buf
+= newline_indent
300 newline_indent
= None
301 separator
= _item_separator
308 if isinstance(value
, basestring
):
309 yield buf
+ _encoder(value
)
316 elif isinstance(value
, (int, long)):
317 yield buf
+ str(value
)
318 elif isinstance(value
, float):
319 yield buf
+ _floatstr(value
)
322 if isinstance(value
, (list, tuple)):
323 chunks
= _iterencode_list(value
, _current_indent_level
)
324 elif isinstance(value
, dict):
325 chunks
= _iterencode_dict(value
, _current_indent_level
)
327 chunks
= _iterencode(value
, _current_indent_level
)
330 if newline_indent
is not None:
331 _current_indent_level
-= 1
332 yield '\n' + (' ' * (_indent
* _current_indent_level
))
334 if markers
is not None:
335 del markers
[markerid
]
337 def _iterencode_dict(dct
, _current_indent_level
):
341 if markers
is not None:
343 if markerid
in markers
:
344 raise ValueError("Circular reference detected")
345 markers
[markerid
] = dct
347 if _indent
is not None:
348 _current_indent_level
+= 1
349 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
350 item_separator
= _item_separator
+ newline_indent
353 newline_indent
= None
354 item_separator
= _item_separator
358 items
.sort(key
=lambda kv
: kv
[0])
360 items
= dct
.iteritems()
361 for key
, value
in items
:
362 if isinstance(key
, basestring
):
364 # JavaScript is weakly typed for these, so it makes sense to
365 # also allow them. Many encoders seem to do something like this.
366 elif isinstance(key
, float):
374 elif isinstance(key
, (int, long)):
379 raise TypeError("key " + repr(key
) + " is not a string")
386 if isinstance(value
, basestring
):
387 yield _encoder(value
)
394 elif isinstance(value
, (int, long)):
396 elif isinstance(value
, float):
397 yield _floatstr(value
)
399 if isinstance(value
, (list, tuple)):
400 chunks
= _iterencode_list(value
, _current_indent_level
)
401 elif isinstance(value
, dict):
402 chunks
= _iterencode_dict(value
, _current_indent_level
)
404 chunks
= _iterencode(value
, _current_indent_level
)
407 if newline_indent
is not None:
408 _current_indent_level
-= 1
409 yield '\n' + (' ' * (_indent
* _current_indent_level
))
411 if markers
is not None:
412 del markers
[markerid
]
414 def _iterencode(o
, _current_indent_level
):
415 if isinstance(o
, basestring
):
423 elif isinstance(o
, (int, long)):
425 elif isinstance(o
, float):
427 elif isinstance(o
, (list, tuple)):
428 for chunk
in _iterencode_list(o
, _current_indent_level
):
430 elif isinstance(o
, dict):
431 for chunk
in _iterencode_dict(o
, _current_indent_level
):
434 if markers
is not None:
436 if markerid
in markers
:
437 raise ValueError("Circular reference detected")
438 markers
[markerid
] = o
440 for chunk
in _iterencode(o
, _current_indent_level
):
442 if markers
is not None:
443 del markers
[markerid
]