1 """Implementation of JSONEncoder
6 from simplejson
._speedups
import encode_basestring_ascii
as \
7 c_encode_basestring_ascii
9 c_encode_basestring_ascii
= None
11 from simplejson
._speedups
import make_encoder
as c_make_encoder
15 from simplejson
.decoder
import PosInf
17 ESCAPE
= re
.compile(r
'[\x00-\x1f\\"\b\f\n\r\t]')
18 ESCAPE_ASCII
= re
.compile(r
'([\\"]|[^\ -~])')
19 HAS_UTF8
= re
.compile(r
'[\x80-\xff]')
30 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
31 ESCAPE_DCT
.setdefault(chr(i
), '\\u%04x' % (i
,))
35 def encode_basestring(s
):
36 """Return a JSON representation of a Python string
39 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
42 return ESCAPE_DCT
[match
.group(0)]
43 return u
'"' + ESCAPE
.sub(replace
, s
) + u
'"'
46 def py_encode_basestring_ascii(s
):
47 """Return an ASCII-only JSON representation of a Python string
50 if isinstance(s
, str) and HAS_UTF8
.search(s
) is not None:
59 #return '\\u{0:04x}'.format(n)
60 return '\\u%04x' % (n
,)
64 s1
= 0xd800 |
((n
>> 10) & 0x3ff)
65 s2
= 0xdc00 |
(n
& 0x3ff)
66 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
67 return '\\u%04x\\u%04x' % (s1
, s2
)
68 return '"' + str(ESCAPE_ASCII
.sub(replace
, s
)) + '"'
71 encode_basestring_ascii
= (
72 c_encode_basestring_ascii
or py_encode_basestring_ascii
)
74 class JSONEncoder(object):
75 """Extensible JSON <http://json.org> encoder for Python data structures.
77 Supports the following objects and types by default:
79 +-------------------+---------------+
81 +===================+===============+
83 +-------------------+---------------+
84 | list, tuple | array |
85 +-------------------+---------------+
86 | str, unicode | string |
87 +-------------------+---------------+
88 | int, long, float | number |
89 +-------------------+---------------+
91 +-------------------+---------------+
93 +-------------------+---------------+
95 +-------------------+---------------+
97 To extend this to recognize other objects, subclass and implement a
98 ``.default()`` method with another method that returns a serializable
99 object for ``o`` if possible, otherwise it should call the superclass
100 implementation (to raise ``TypeError``).
103 item_separator
= ', '
105 def __init__(self
, skipkeys
=False, ensure_ascii
=True,
106 check_circular
=True, allow_nan
=True, sort_keys
=False,
107 indent
=None, separators
=None, encoding
='utf-8', default
=None):
108 """Constructor for JSONEncoder, with sensible defaults.
110 If skipkeys is false, then it is a TypeError to attempt
111 encoding of keys that are not str, int, long, float or None. If
112 skipkeys is True, such items are simply skipped.
114 If ensure_ascii is true, the output is guaranteed to be str
115 objects with all incoming unicode characters escaped. If
116 ensure_ascii is false, the output will be unicode object.
118 If check_circular is true, then lists, dicts, and custom encoded
119 objects will be checked for circular references during encoding to
120 prevent an infinite recursion (which would cause an OverflowError).
121 Otherwise, no such check takes place.
123 If allow_nan is true, then NaN, Infinity, and -Infinity will be
124 encoded as such. This behavior is not JSON specification compliant,
125 but is consistent with most JavaScript based encoders and decoders.
126 Otherwise, it will be a ValueError to encode such floats.
128 If sort_keys is true, then the output of dictionaries will be
129 sorted by key; this is useful for regression tests to ensure
130 that JSON serializations can be compared on a day-to-day basis.
132 If indent is a string, then JSON array elements and object members
133 will be pretty-printed with a newline followed by that string repeated
134 for each level of nesting. ``None`` (the default) selects the most compact
135 representation without any newlines. For backwards compatibility with
136 versions of simplejson earlier than 2.1.0, an integer is also accepted
137 and is converted to a string with that many spaces.
139 If specified, separators should be a (item_separator, key_separator)
140 tuple. The default is (', ', ': '). To get the most compact JSON
141 representation you should specify (',', ':') to eliminate whitespace.
143 If specified, default is a function that gets called for objects
144 that can't otherwise be serialized. It should return a JSON encodable
145 version of the object or raise a ``TypeError``.
147 If encoding is not None, then all input strings will be
148 transformed into unicode using that encoding prior to JSON-encoding.
149 The default is UTF-8.
153 self
.skipkeys
= skipkeys
154 self
.ensure_ascii
= ensure_ascii
155 self
.check_circular
= check_circular
156 self
.allow_nan
= allow_nan
157 self
.sort_keys
= sort_keys
158 if isinstance(indent
, (int, long)):
159 indent
= ' ' * indent
161 if separators
is not None:
162 self
.item_separator
, self
.key_separator
= separators
163 if default
is not None:
164 self
.default
= default
165 self
.encoding
= encoding
167 def default(self
, o
):
168 """Implement this method in a subclass such that it returns
169 a serializable object for ``o``, or calls the base implementation
170 (to raise a ``TypeError``).
172 For example, to support arbitrary iterators, you could
173 implement default like this::
175 def default(self, o):
181 return list(iterable)
182 return JSONEncoder.default(self, o)
185 raise TypeError(repr(o
) + " is not JSON serializable")
188 """Return a JSON string representation of a Python data structure.
190 >>> from simplejson import JSONEncoder
191 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
192 '{"foo": ["bar", "baz"]}'
195 # This is for extremely simple cases and benchmarks.
196 if isinstance(o
, basestring
):
197 if isinstance(o
, str):
198 _encoding
= self
.encoding
199 if (_encoding
is not None
200 and not (_encoding
== 'utf-8')):
201 o
= o
.decode(_encoding
)
202 if self
.ensure_ascii
:
203 return encode_basestring_ascii(o
)
205 return encode_basestring(o
)
206 # This doesn't pass the iterator directly to ''.join() because the
207 # exceptions aren't as detailed. The list call should be roughly
208 # equivalent to the PySequence_Fast that ''.join() would do.
209 chunks
= self
.iterencode(o
, _one_shot
=True)
210 if not isinstance(chunks
, (list, tuple)):
211 chunks
= list(chunks
)
212 if self
.ensure_ascii
:
213 return ''.join(chunks
)
215 return u
''.join(chunks
)
217 def iterencode(self
, o
, _one_shot
=False):
218 """Encode the given object and yield each string
219 representation as available.
223 for chunk in JSONEncoder().iterencode(bigobject):
224 mysocket.write(chunk)
227 if self
.check_circular
:
231 if self
.ensure_ascii
:
232 _encoder
= encode_basestring_ascii
234 _encoder
= encode_basestring
235 if self
.encoding
!= 'utf-8':
236 def _encoder(o
, _orig_encoder
=_encoder
, _encoding
=self
.encoding
):
237 if isinstance(o
, str):
238 o
= o
.decode(_encoding
)
239 return _orig_encoder(o
)
241 def floatstr(o
, allow_nan
=self
.allow_nan
,
242 _repr
=FLOAT_REPR
, _inf
=PosInf
, _neginf
=-PosInf
):
243 # Check for specials. Note that this type of test is processor
244 # and/or platform-specific, so do tests which don't depend on
258 "Out of range float values are not JSON compliant: " +
264 if (_one_shot
and c_make_encoder
is not None
265 and not self
.indent
and not self
.sort_keys
):
266 _iterencode
= c_make_encoder(
267 markers
, self
.default
, _encoder
, self
.indent
,
268 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
269 self
.skipkeys
, self
.allow_nan
)
271 _iterencode
= _make_iterencode(
272 markers
, self
.default
, _encoder
, self
.indent
, floatstr
,
273 self
.key_separator
, self
.item_separator
, self
.sort_keys
,
274 self
.skipkeys
, _one_shot
)
275 return _iterencode(o
, 0)
277 def _make_iterencode(markers
, _default
, _encoder
, _indent
, _floatstr
,
278 _key_separator
, _item_separator
, _sort_keys
, _skipkeys
, _one_shot
,
279 ## HACK: hand-optimized bytecode; turn globals into locals
282 ValueError=ValueError,
283 basestring
=basestring
,
288 isinstance=isinstance,
295 def _iterencode_list(lst
, _current_indent_level
):
299 if markers
is not None:
301 if markerid
in markers
:
302 raise ValueError("Circular reference detected")
303 markers
[markerid
] = lst
305 if _indent
is not None:
306 _current_indent_level
+= 1
307 newline_indent
= '\n' + (_indent
* _current_indent_level
)
308 separator
= _item_separator
+ newline_indent
309 buf
+= newline_indent
311 newline_indent
= None
312 separator
= _item_separator
319 if isinstance(value
, basestring
):
320 yield buf
+ _encoder(value
)
327 elif isinstance(value
, (int, long)):
328 yield buf
+ str(value
)
329 elif isinstance(value
, float):
330 yield buf
+ _floatstr(value
)
333 if isinstance(value
, (list, tuple)):
334 chunks
= _iterencode_list(value
, _current_indent_level
)
335 elif isinstance(value
, dict):
336 chunks
= _iterencode_dict(value
, _current_indent_level
)
338 chunks
= _iterencode(value
, _current_indent_level
)
341 if newline_indent
is not None:
342 _current_indent_level
-= 1
343 yield '\n' + (_indent
* _current_indent_level
)
345 if markers
is not None:
346 del markers
[markerid
]
348 def _iterencode_dict(dct
, _current_indent_level
):
352 if markers
is not None:
354 if markerid
in markers
:
355 raise ValueError("Circular reference detected")
356 markers
[markerid
] = dct
358 if _indent
is not None:
359 _current_indent_level
+= 1
360 newline_indent
= '\n' + (_indent
* _current_indent_level
)
361 item_separator
= _item_separator
+ newline_indent
364 newline_indent
= None
365 item_separator
= _item_separator
369 items
.sort(key
=lambda kv
: kv
[0])
371 items
= dct
.iteritems()
372 for key
, value
in items
:
373 if isinstance(key
, basestring
):
375 # JavaScript is weakly typed for these, so it makes sense to
376 # also allow them. Many encoders seem to do something like this.
377 elif isinstance(key
, float):
385 elif isinstance(key
, (int, long)):
390 raise TypeError("key " + repr(key
) + " is not a string")
397 if isinstance(value
, basestring
):
398 yield _encoder(value
)
405 elif isinstance(value
, (int, long)):
407 elif isinstance(value
, float):
408 yield _floatstr(value
)
410 if isinstance(value
, (list, tuple)):
411 chunks
= _iterencode_list(value
, _current_indent_level
)
412 elif isinstance(value
, dict):
413 chunks
= _iterencode_dict(value
, _current_indent_level
)
415 chunks
= _iterencode(value
, _current_indent_level
)
418 if newline_indent
is not None:
419 _current_indent_level
-= 1
420 yield '\n' + (_indent
* _current_indent_level
)
422 if markers
is not None:
423 del markers
[markerid
]
425 def _iterencode(o
, _current_indent_level
):
426 if isinstance(o
, basestring
):
434 elif isinstance(o
, (int, long)):
436 elif isinstance(o
, float):
438 elif isinstance(o
, (list, tuple)):
439 for chunk
in _iterencode_list(o
, _current_indent_level
):
441 elif isinstance(o
, dict):
442 for chunk
in _iterencode_dict(o
, _current_indent_level
):
445 if markers
is not None:
447 if markerid
in markers
:
448 raise ValueError("Circular reference detected")
449 markers
[markerid
] = o
451 for chunk
in _iterencode(o
, _current_indent_level
):
453 if markers
is not None:
454 del markers
[markerid
]