thirdparty/simplejson/encoder.py

   1 """Implementation of JSONEncoder
   2 """
   3 import re
   4
   5 try:
   6     from simplejson._speedups import encode_basestring_ascii as \
   7         c_encode_basestring_ascii
   8 except ImportError:
   9     c_encode_basestring_ascii = None
  10 try:
  11     from simplejson._speedups import make_encoder as c_make_encoder
  12 except ImportError:
  13     c_make_encoder = None
  14
  15 from simplejson.decoder import PosInf
  16
  17 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
  18 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
  19 HAS_UTF8 = re.compile(r'[\x80-\xff]')
  20 ESCAPE_DCT = {
  21     '\\': '\\\\',
  22     '"': '\\"',
  23     '\b': '\\b',
  24     '\f': '\\f',
  25     '\n': '\\n',
  26     '\r': '\\r',
  27     '\t': '\\t',
  28 }
  29 for i in range(0x20):
  30     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
  31     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
  32
  33 FLOAT_REPR = repr
  34
  35 def encode_basestring(s):
  36     """Return a JSON representation of a Python string
  37
  38     """
  39     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  40         s = s.decode('utf-8')
  41     def replace(match):
  42         return ESCAPE_DCT[match.group(0)]
  43     return u'"' + ESCAPE.sub(replace, s) + u'"'
  44
  45
  46 def py_encode_basestring_ascii(s):
  47     """Return an ASCII-only JSON representation of a Python string
  48
  49     """
  50     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
  51         s = s.decode('utf-8')
  52     def replace(match):
  53         s = match.group(0)
  54         try:
  55             return ESCAPE_DCT[s]
  56         except KeyError:
  57             n = ord(s)
  58             if n < 0x10000:
  59                 #return '\\u{0:04x}'.format(n)
  60                 return '\\u%04x' % (n,)
  61             else:
  62                 # surrogate pair
  63                 n -= 0x10000
  64                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
  65                 s2 = 0xdc00 | (n & 0x3ff)
  66                 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
  67                 return '\\u%04x\\u%04x' % (s1, s2)
  68     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
  69
  70
  71 encode_basestring_ascii = (
  72     c_encode_basestring_ascii or py_encode_basestring_ascii)
  73
  74 class JSONEncoder(object):
  75     """Extensible JSON <http://json.org> encoder for Python data structures.
  76
  77     Supports the following objects and types by default:
  78
  79     +-------------------+---------------+
  80     | Python            | JSON          |
  81     +===================+===============+
  82     | dict              | object        |
  83     +-------------------+---------------+
  84     | list, tuple       | array         |
  85     +-------------------+---------------+
  86     | str, unicode      | string        |
  87     +-------------------+---------------+
  88     | int, long, float  | number        |
  89     +-------------------+---------------+
  90     | True              | true          |
  91     +-------------------+---------------+
  92     | False             | false         |
  93     +-------------------+---------------+
  94     | None              | null          |
  95     +-------------------+---------------+
  96
  97     To extend this to recognize other objects, subclass and implement a
  98     ``.default()`` method with another method that returns a serializable
  99     object for ``o`` if possible, otherwise it should call the superclass
 100     implementation (to raise ``TypeError``).
 101
 102     """
 103     item_separator = ', '
 104     key_separator = ': '
 105     def __init__(self, skipkeys=False, ensure_ascii=True,
 106             check_circular=True, allow_nan=True, sort_keys=False,
 107             indent=None, separators=None, encoding='utf-8', default=None):
 108         """Constructor for JSONEncoder, with sensible defaults.
 109
 110         If skipkeys is false, then it is a TypeError to attempt
 111         encoding of keys that are not str, int, long, float or None.  If
 112         skipkeys is True, such items are simply skipped.
 113
 114         If ensure_ascii is true, the output is guaranteed to be str
 115         objects with all incoming unicode characters escaped.  If
 116         ensure_ascii is false, the output will be unicode object.
 117
 118         If check_circular is true, then lists, dicts, and custom encoded
 119         objects will be checked for circular references during encoding to
 120         prevent an infinite recursion (which would cause an OverflowError).
 121         Otherwise, no such check takes place.
 122
 123         If allow_nan is true, then NaN, Infinity, and -Infinity will be
 124         encoded as such.  This behavior is not JSON specification compliant,
 125         but is consistent with most JavaScript based encoders and decoders.
 126         Otherwise, it will be a ValueError to encode such floats.
 127
 128         If sort_keys is true, then the output of dictionaries will be
 129         sorted by key; this is useful for regression tests to ensure
 130         that JSON serializations can be compared on a day-to-day basis.
 131
 132         If indent is a string, then JSON array elements and object members
 133         will be pretty-printed with a newline followed by that string repeated
 134         for each level of nesting. ``None`` (the default) selects the most compact
 135         representation without any newlines. For backwards compatibility with
 136         versions of simplejson earlier than 2.1.0, an integer is also accepted
 137         and is converted to a string with that many spaces.
 138
 139         If specified, separators should be a (item_separator, key_separator)
 140         tuple.  The default is (', ', ': ').  To get the most compact JSON
 141         representation you should specify (',', ':') to eliminate whitespace.
 142
 143         If specified, default is a function that gets called for objects
 144         that can't otherwise be serialized.  It should return a JSON encodable
 145         version of the object or raise a ``TypeError``.
 146
 147         If encoding is not None, then all input strings will be
 148         transformed into unicode using that encoding prior to JSON-encoding.
 149         The default is UTF-8.
 150
 151         """
 152
 153         self.skipkeys = skipkeys
 154         self.ensure_ascii = ensure_ascii
 155         self.check_circular = check_circular
 156         self.allow_nan = allow_nan
 157         self.sort_keys = sort_keys
 158         if isinstance(indent, (int, long)):
 159             indent = ' ' * indent
 160         self.indent = indent
 161         if separators is not None:
 162             self.item_separator, self.key_separator = separators
 163         if default is not None:
 164             self.default = default
 165         self.encoding = encoding
 166
 167     def default(self, o):
 168         """Implement this method in a subclass such that it returns
 169         a serializable object for ``o``, or calls the base implementation
 170         (to raise a ``TypeError``).
 171
 172         For example, to support arbitrary iterators, you could
 173         implement default like this::
 174
 175             def default(self, o):
 176                 try:
 177                     iterable = iter(o)
 178                 except TypeError:
 179                     pass
 180                 else:
 181                     return list(iterable)
 182                 return JSONEncoder.default(self, o)
 183
 184         """
 185         raise TypeError(repr(o) + " is not JSON serializable")
 186
 187     def encode(self, o):
 188         """Return a JSON string representation of a Python data structure.
 189
 190         >>> from simplejson import JSONEncoder
 191         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 192         '{"foo": ["bar", "baz"]}'
 193
 194         """
 195         # This is for extremely simple cases and benchmarks.
 196         if isinstance(o, basestring):
 197             if isinstance(o, str):
 198                 _encoding = self.encoding
 199                 if (_encoding is not None
 200                         and not (_encoding == 'utf-8')):
 201                     o = o.decode(_encoding)
 202             if self.ensure_ascii:
 203                 return encode_basestring_ascii(o)
 204             else:
 205                 return encode_basestring(o)
 206         # This doesn't pass the iterator directly to ''.join() because the
 207         # exceptions aren't as detailed.  The list call should be roughly
 208         # equivalent to the PySequence_Fast that ''.join() would do.
 209         chunks = self.iterencode(o, _one_shot=True)
 210         if not isinstance(chunks, (list, tuple)):
 211             chunks = list(chunks)
 212         if self.ensure_ascii:
 213             return ''.join(chunks)
 214         else:
 215             return u''.join(chunks)
 216
 217     def iterencode(self, o, _one_shot=False):
 218         """Encode the given object and yield each string
 219         representation as available.
 220
 221         For example::
 222
 223             for chunk in JSONEncoder().iterencode(bigobject):
 224                 mysocket.write(chunk)
 225
 226         """
 227         if self.check_circular:
 228             markers = {}
 229         else:
 230             markers = None
 231         if self.ensure_ascii:
 232             _encoder = encode_basestring_ascii
 233         else:
 234             _encoder = encode_basestring
 235         if self.encoding != 'utf-8':
 236             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
 237                 if isinstance(o, str):
 238                     o = o.decode(_encoding)
 239                 return _orig_encoder(o)
 240
 241         def floatstr(o, allow_nan=self.allow_nan,
 242                 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
 243             # Check for specials. Note that this type of test is processor
 244             # and/or platform-specific, so do tests which don't depend on
 245             # the internals.
 246
 247             if o != o:
 248                 text = 'NaN'
 249             elif o == _inf:
 250                 text = 'Infinity'
 251             elif o == _neginf:
 252                 text = '-Infinity'
 253             else:
 254                 return _repr(o)
 255
 256             if not allow_nan:
 257                 raise ValueError(
 258                     "Out of range float values are not JSON compliant: " +
 259                     repr(o))
 260
 261             return text
 262
 263
 264         if (_one_shot and c_make_encoder is not None
 265                 and not self.indent and not self.sort_keys):
 266             _iterencode = c_make_encoder(
 267                 markers, self.default, _encoder, self.indent,
 268                 self.key_separator, self.item_separator, self.sort_keys,
 269                 self.skipkeys, self.allow_nan)
 270         else:
 271             _iterencode = _make_iterencode(
 272                 markers, self.default, _encoder, self.indent, floatstr,
 273                 self.key_separator, self.item_separator, self.sort_keys,
 274                 self.skipkeys, _one_shot)
 275         return _iterencode(o, 0)
 276
 277 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
 278         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
 279         ## HACK: hand-optimized bytecode; turn globals into locals
 280         False=False,
 281         True=True,
 282         ValueError=ValueError,
 283         basestring=basestring,
 284         dict=dict,
 285         float=float,
 286         id=id,
 287         int=int,
 288         isinstance=isinstance,
 289         list=list,
 290         long=long,
 291         str=str,
 292         tuple=tuple,
 293     ):
 294
 295     def _iterencode_list(lst, _current_indent_level):
 296         if not lst:
 297             yield '[]'
 298             return
 299         if markers is not None:
 300             markerid = id(lst)
 301             if markerid in markers:
 302                 raise ValueError("Circular reference detected")
 303             markers[markerid] = lst
 304         buf = '['
 305         if _indent is not None:
 306             _current_indent_level += 1
 307             newline_indent = '\n' + (_indent * _current_indent_level)
 308             separator = _item_separator + newline_indent
 309             buf += newline_indent
 310         else:
 311             newline_indent = None
 312             separator = _item_separator
 313         first = True
 314         for value in lst:
 315             if first:
 316                 first = False
 317             else:
 318                 buf = separator
 319             if isinstance(value, basestring):
 320                 yield buf + _encoder(value)
 321             elif value is None:
 322                 yield buf + 'null'
 323             elif value is True:
 324                 yield buf + 'true'
 325             elif value is False:
 326                 yield buf + 'false'
 327             elif isinstance(value, (int, long)):
 328                 yield buf + str(value)
 329             elif isinstance(value, float):
 330                 yield buf + _floatstr(value)
 331             else:
 332                 yield buf
 333                 if isinstance(value, (list, tuple)):
 334                     chunks = _iterencode_list(value, _current_indent_level)
 335                 elif isinstance(value, dict):
 336                     chunks = _iterencode_dict(value, _current_indent_level)
 337                 else:
 338                     chunks = _iterencode(value, _current_indent_level)
 339                 for chunk in chunks:
 340                     yield chunk
 341         if newline_indent is not None:
 342             _current_indent_level -= 1
 343             yield '\n' + (_indent * _current_indent_level)
 344         yield ']'
 345         if markers is not None:
 346             del markers[markerid]
 347
 348     def _iterencode_dict(dct, _current_indent_level):
 349         if not dct:
 350             yield '{}'
 351             return
 352         if markers is not None:
 353             markerid = id(dct)
 354             if markerid in markers:
 355                 raise ValueError("Circular reference detected")
 356             markers[markerid] = dct
 357         yield '{'
 358         if _indent is not None:
 359             _current_indent_level += 1
 360             newline_indent = '\n' + (_indent * _current_indent_level)
 361             item_separator = _item_separator + newline_indent
 362             yield newline_indent
 363         else:
 364             newline_indent = None
 365             item_separator = _item_separator
 366         first = True
 367         if _sort_keys:
 368             items = dct.items()
 369             items.sort(key=lambda kv: kv[0])
 370         else:
 371             items = dct.iteritems()
 372         for key, value in items:
 373             if isinstance(key, basestring):
 374                 pass
 375             # JavaScript is weakly typed for these, so it makes sense to
 376             # also allow them.  Many encoders seem to do something like this.
 377             elif isinstance(key, float):
 378                 key = _floatstr(key)
 379             elif key is True:
 380                 key = 'true'
 381             elif key is False:
 382                 key = 'false'
 383             elif key is None:
 384                 key = 'null'
 385             elif isinstance(key, (int, long)):
 386                 key = str(key)
 387             elif _skipkeys:
 388                 continue
 389             else:
 390                 raise TypeError("key " + repr(key) + " is not a string")
 391             if first:
 392                 first = False
 393             else:
 394                 yield item_separator
 395             yield _encoder(key)
 396             yield _key_separator
 397             if isinstance(value, basestring):
 398                 yield _encoder(value)
 399             elif value is None:
 400                 yield 'null'
 401             elif value is True:
 402                 yield 'true'
 403             elif value is False:
 404                 yield 'false'
 405             elif isinstance(value, (int, long)):
 406                 yield str(value)
 407             elif isinstance(value, float):
 408                 yield _floatstr(value)
 409             else:
 410                 if isinstance(value, (list, tuple)):
 411                     chunks = _iterencode_list(value, _current_indent_level)
 412                 elif isinstance(value, dict):
 413                     chunks = _iterencode_dict(value, _current_indent_level)
 414                 else:
 415                     chunks = _iterencode(value, _current_indent_level)
 416                 for chunk in chunks:
 417                     yield chunk
 418         if newline_indent is not None:
 419             _current_indent_level -= 1
 420             yield '\n' + (_indent * _current_indent_level)
 421         yield '}'
 422         if markers is not None:
 423             del markers[markerid]
 424
 425     def _iterencode(o, _current_indent_level):
 426         if isinstance(o, basestring):
 427             yield _encoder(o)
 428         elif o is None:
 429             yield 'null'
 430         elif o is True:
 431             yield 'true'
 432         elif o is False:
 433             yield 'false'
 434         elif isinstance(o, (int, long)):
 435             yield str(o)
 436         elif isinstance(o, float):
 437             yield _floatstr(o)
 438         elif isinstance(o, (list, tuple)):
 439             for chunk in _iterencode_list(o, _current_indent_level):
 440                 yield chunk
 441         elif isinstance(o, dict):
 442             for chunk in _iterencode_dict(o, _current_indent_level):
 443                 yield chunk
 444         else:
 445             if markers is not None:
 446                 markerid = id(o)
 447                 if markerid in markers:
 448                     raise ValueError("Circular reference detected")
 449                 markers[markerid] = o
 450             o = _default(o)
 451             for chunk in _iterencode(o, _current_indent_level):
 452                 yield chunk
 453             if markers is not None:
 454                 del markers[markerid]
 455
 456     return _iterencode