1 """Implementation of JSONEncoder
6 from simplejson
._speedups
import encode_basestring_ascii
as \
7 c_encode_basestring_ascii
9 c_encode_basestring_ascii
= None
11 from simplejson
._speedups
import make_encoder
as c_make_encoder
15 from simplejson
.decoder
import PosInf
17 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
18 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
19 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
30 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
31 ESCAPE_DCT
.setdefault(chr(i
), '\\u%04x' % (i
,))
35 def encode_basestring(s
):
36 """Return a JSON representation of a Python string
39 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
42 return ESCAPE_DCT
[match
.group(0)]
43 return u
'"' + ESCAPE
.sub(replace
, s
) + u
'"'
46 def py_encode_basestring_ascii(s
):
47 """Return an ASCII-only JSON representation of a Python string
50 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
59 #return '\\u{0:04x}'.format(n)
60 return '\\u%04x' % (n
,)
64 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
65 s2
= 0xdc00 |
(n
& 0x3ff)
66 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
67 return '\\u%04x\\u%04x' % (s1
, s2
)
68 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
71 encode_basestring_ascii
= (
72 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
74 class JSONEncoder(object):
75 """Extensible JSON <http://json.org> encoder for Python data structures.
77 Supports the following objects and types by default:
79 +-------------------+---------------+
81 +===================+===============+
83 +-------------------+---------------+
84 | list, tuple | array |
85 +-------------------+---------------+
86 | str, unicode | string |
87 +-------------------+---------------+
88 | int, long, float | number |
89 +-------------------+---------------+
91 +-------------------+---------------+
93 +-------------------+---------------+
95 +-------------------+---------------+
97 To extend this to recognize other objects, subclass and implement a
98 ``.default()`` method with another method that returns a serializable
99 object for ``o`` if possible, otherwise it should call the superclass
100 implementation (to raise ``TypeError``).
103 item_separator
= ', '
105 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
106 check_circular
=True, allow_nan
=True, sort_keys
=False,
107 indent
=None, separators
=None, encoding
='utf-8', default
=None):
108 """Constructor for JSONEncoder, with sensible defaults.
110 If skipkeys is false, then it is a TypeError to attempt
111 encoding of keys that are not str, int, long, float or None. If
112 skipkeys is True, such items are simply skipped.
114 If ensure_ascii is true, the output is guaranteed to be str
115 objects with all incoming unicode characters escaped. If
116 ensure_ascii is false, the output will be unicode object.
118 If check_circular is true, then lists, dicts, and custom encoded
119 objects will be checked for circular references during encoding to
120 prevent an infinite recursion (which would cause an OverflowError).
121 Otherwise, no such check takes place.
123 If allow_nan is true, then NaN, Infinity, and -Infinity will be
124 encoded as such. This behavior is not JSON specification compliant,
125 but is consistent with most JavaScript based encoders and decoders.
126 Otherwise, it will be a ValueError to encode such floats.
128 If sort_keys is true, then the output of dictionaries will be
129 sorted by key; this is useful for regression tests to ensure
130 that JSON serializations can be compared on a day-to-day basis.
132 If indent is a non-negative integer, then JSON array
133 elements and object members will be pretty-printed with that
134 indent level. An indent level of 0 will only insert newlines.
135 None is the most compact representation.
137 If specified, separators should be a (item_separator, key_separator)
138 tuple. The default is (', ', ': '). To get the most compact JSON
139 representation you should specify (',', ':') to eliminate whitespace.
141 If specified, default is a function that gets called for objects
142 that can't otherwise be serialized. It should return a JSON encodable
143 version of the object or raise a ``TypeError``.
145 If encoding is not None, then all input strings will be
146 transformed into unicode using that encoding prior to JSON-encoding.
147 The default is UTF-8.
151 self
.skipkeys
= skipkeys
152 self
.ensure_ascii
= ensure_ascii
153 self
.check_circular
= check_circular
154 self
.allow_nan
= allow_nan
155 self
.sort_keys
= sort_keys
157 if separators
is not None:
158 self
.item_separator
, self
.key_separator
= separators
159 if default
is not None:
160 self
.default
= default
161 self
.encoding
= encoding
163 def default(self
, o
):
164 """Implement this method in a subclass such that it returns
165 a serializable object for ``o``, or calls the base implementation
166 (to raise a ``TypeError``).
168 For example, to support arbitrary iterators, you could
169 implement default like this::
171 def default(self, o):
177 return list(iterable)
178 return JSONEncoder.default(self, o)
181 raise TypeError(repr(o
) + " is not JSON serializable")
184 """Return a JSON string representation of a Python data structure.
186 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
187 '{"foo": ["bar", "baz"]}'
190 # This is for extremely simple cases and benchmarks.
191 if isinstance(o
, basestring
):
192 if isinstance(o
, str):
193 _encoding
= self
.encoding
194 if (_encoding
is not None
195 and not (_encoding
== 'utf-8')):
196 o
= o
.decode(_encoding
)
197 if self
.ensure_ascii
:
198 return encode_basestring_ascii(o
)
200 return encode_basestring(o
)
201 # This doesn't pass the iterator directly to ''.join() because the
202 # exceptions aren't as detailed. The list call should be roughly
203 # equivalent to the PySequence_Fast that ''.join() would do.
204 chunks
= self
.iterencode(o
, _one_shot
=True)
205 if not isinstance(chunks
, (list, tuple)):
206 chunks
= list(chunks
)
207 if self
.ensure_ascii
:
208 return ''.join(chunks
)
210 return u
''.join(chunks
)
212 def iterencode(self
, o
, _one_shot
=False):
213 """Encode the given object and yield each string
214 representation as available.
218 for chunk in JSONEncoder().iterencode(bigobject):
219 mysocket.write(chunk)
222 if self
.check_circular
:
226 if self
.ensure_ascii
:
227 _encoder
= encode_basestring_ascii
229 _encoder
= encode_basestring
230 if self
.encoding
!= 'utf-8':
231 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
232 if isinstance(o
, str):
233 o
= o
.decode(_encoding
)
234 return _orig_encoder(o
)
236 def floatstr(o
, allow_nan
=self
.allow_nan
,
237 _repr
=FLOAT_REPR
, _inf
=PosInf
, _neginf
=-PosInf
):
238 # Check for specials. Note that this type of test is processor
239 # and/or platform-specific, so do tests which don't depend on
253 "Out of range float values are not JSON compliant: " +
259 if (_one_shot
and c_make_encoder
is not None
260 and not self
.indent
and not self
.sort_keys
):
261 _iterencode
= c_make_encoder(
262 markers
, self
.default
, _encoder
, self
.indent
,
263 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
264 self
.skipkeys
, self
.allow_nan
)
266 _iterencode
= _make_iterencode(
267 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
268 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
269 self
.skipkeys
, _one_shot
)
270 return _iterencode(o
, 0)
272 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
273 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
274 ## HACK: hand-optimized bytecode; turn globals into locals
277 ValueError=ValueError,
278 basestring
=basestring
,
283 isinstance=isinstance,
290 def _iterencode_list(lst
, _current_indent_level
):
294 if markers
is not None:
296 if markerid
in markers
:
297 raise ValueError("Circular reference detected")
298 markers
[markerid
] = lst
300 if _indent
is not None:
301 _current_indent_level
+= 1
302 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
303 separator
= _item_separator
+ newline_indent
304 buf
+= newline_indent
306 newline_indent
= None
307 separator
= _item_separator
314 if isinstance(value
, basestring
):
315 yield buf
+ _encoder(value
)
322 elif isinstance(value
, (int, long)):
323 yield buf
+ str(value
)
324 elif isinstance(value
, float):
325 yield buf
+ _floatstr(value
)
328 if isinstance(value
, (list, tuple)):
329 chunks
= _iterencode_list(value
, _current_indent_level
)
330 elif isinstance(value
, dict):
331 chunks
= _iterencode_dict(value
, _current_indent_level
)
333 chunks
= _iterencode(value
, _current_indent_level
)
336 if newline_indent
is not None:
337 _current_indent_level
-= 1
338 yield '\n' + (' ' * (_indent
* _current_indent_level
))
340 if markers
is not None:
341 del markers
[markerid
]
343 def _iterencode_dict(dct
, _current_indent_level
):
347 if markers
is not None:
349 if markerid
in markers
:
350 raise ValueError("Circular reference detected")
351 markers
[markerid
] = dct
353 if _indent
is not None:
354 _current_indent_level
+= 1
355 newline_indent
= '\n' + (' ' * (_indent
* _current_indent_level
))
356 item_separator
= _item_separator
+ newline_indent
359 newline_indent
= None
360 item_separator
= _item_separator
364 items
.sort(key
=lambda kv
: kv
[0])
366 items
= dct
.iteritems()
367 for key
, value
in items
:
368 if isinstance(key
, basestring
):
370 # JavaScript is weakly typed for these, so it makes sense to
371 # also allow them. Many encoders seem to do something like this.
372 elif isinstance(key
, float):
380 elif isinstance(key
, (int, long)):
385 raise TypeError("key " + repr(key
) + " is not a string")
392 if isinstance(value
, basestring
):
393 yield _encoder(value
)
400 elif isinstance(value
, (int, long)):
402 elif isinstance(value
, float):
403 yield _floatstr(value
)
405 if isinstance(value
, (list, tuple)):
406 chunks
= _iterencode_list(value
, _current_indent_level
)
407 elif isinstance(value
, dict):
408 chunks
= _iterencode_dict(value
, _current_indent_level
)
410 chunks
= _iterencode(value
, _current_indent_level
)
413 if newline_indent
is not None:
414 _current_indent_level
-= 1
415 yield '\n' + (' ' * (_indent
* _current_indent_level
))
417 if markers
is not None:
418 del markers
[markerid
]
420 def _iterencode(o
, _current_indent_level
):
421 if isinstance(o
, basestring
):
429 elif isinstance(o
, (int, long)):
431 elif isinstance(o
, float):
433 elif isinstance(o
, (list, tuple)):
434 for chunk
in _iterencode_list(o
, _current_indent_level
):
436 elif isinstance(o
, dict):
437 for chunk
in _iterencode_dict(o
, _current_indent_level
):
440 if markers
is not None:
442 if markerid
in markers
:
443 raise ValueError("Circular reference detected")
444 markers
[markerid
] = o
446 for chunk
in _iterencode(o
, _current_indent_level
):
448 if markers
is not None:
449 del markers
[markerid
]