Rename to slixmpp
[slixmpp.git] / slixmpp / xmlstream / tostring.py
blobc1e4911a33848a5c15ebfa9088053b199aff0fc8
1 # -*- coding: utf-8 -*-
2 """
3 slixmpp.xmlstream.tostring
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 This module converts XML objects into Unicode strings and
7 intelligently includes namespaces only when necessary to
8 keep the output readable.
10 Part of Slixmpp: The Slick XMPP Library
12 :copyright: (c) 2011 Nathanael C. Fritz
13 :license: MIT, see LICENSE for more details
14 """
16 from __future__ import unicode_literals
18 import sys
20 if sys.version_info < (3, 0):
21 import types
24 XML_NS = 'http://www.w3.org/XML/1998/namespace'
27 def tostring(xml=None, xmlns='', stream=None, outbuffer='',
28 top_level=False, open_only=False, namespaces=None):
29 """Serialize an XML object to a Unicode string.
31 If an outer xmlns is provided using ``xmlns``, then the current element's
32 namespace will not be included if it matches the outer namespace. An
33 exception is made for elements that have an attached stream, and appear
34 at the stream root.
36 :param XML xml: The XML object to serialize.
37 :param string xmlns: Optional namespace of an element wrapping the XML
38 object.
39 :param stream: The XML stream that generated the XML object.
40 :param string outbuffer: Optional buffer for storing serializations
41 during recursive calls.
42 :param bool top_level: Indicates that the element is the outermost
43 element.
44 :param set namespaces: Track which namespaces are in active use so
45 that new ones can be declared when needed.
47 :type xml: :py:class:`~xml.etree.ElementTree.Element`
48 :type stream: :class:`~slixmpp.xmlstream.xmlstream.XMLStream`
50 :rtype: Unicode string
51 """
52 # Add previous results to the start of the output.
53 output = [outbuffer]
55 # Extract the element's tag name.
56 tag_name = xml.tag.split('}', 1)[-1]
58 # Extract the element's namespace if it is defined.
59 if '}' in xml.tag:
60 tag_xmlns = xml.tag.split('}', 1)[0][1:]
61 else:
62 tag_xmlns = ''
64 default_ns = ''
65 stream_ns = ''
66 use_cdata = False
68 if stream:
69 default_ns = stream.default_ns
70 stream_ns = stream.stream_ns
71 use_cdata = stream.use_cdata
73 # Output the tag name and derived namespace of the element.
74 namespace = ''
75 if tag_xmlns:
76 if top_level and tag_xmlns not in [default_ns, xmlns, stream_ns] \
77 or not top_level and tag_xmlns != xmlns:
78 namespace = ' xmlns="%s"' % tag_xmlns
79 if stream and tag_xmlns in stream.namespace_map:
80 mapped_namespace = stream.namespace_map[tag_xmlns]
81 if mapped_namespace:
82 tag_name = "%s:%s" % (mapped_namespace, tag_name)
83 output.append("<%s" % tag_name)
84 output.append(namespace)
86 # Output escaped attribute values.
87 new_namespaces = set()
88 for attrib, value in xml.attrib.items():
89 value = escape(value, use_cdata)
90 if '}' not in attrib:
91 output.append(' %s="%s"' % (attrib, value))
92 else:
93 attrib_ns = attrib.split('}')[0][1:]
94 attrib = attrib.split('}')[1]
95 if attrib_ns == XML_NS:
96 output.append(' xml:%s="%s"' % (attrib, value))
97 elif stream and attrib_ns in stream.namespace_map:
98 mapped_ns = stream.namespace_map[attrib_ns]
99 if mapped_ns:
100 if namespaces is None:
101 namespaces = set()
102 if attrib_ns not in namespaces:
103 namespaces.add(attrib_ns)
104 new_namespaces.add(attrib_ns)
105 output.append(' xmlns:%s="%s"' % (
106 mapped_ns, attrib_ns))
107 output.append(' %s:%s="%s"' % (
108 mapped_ns, attrib, value))
110 if open_only:
111 # Only output the opening tag, regardless of content.
112 output.append(">")
113 return ''.join(output)
115 if len(xml) or xml.text:
116 # If there are additional child elements to serialize.
117 output.append(">")
118 if xml.text:
119 output.append(escape(xml.text, use_cdata))
120 if len(xml):
121 for child in xml:
122 output.append(tostring(child, tag_xmlns, stream,
123 namespaces=namespaces))
124 output.append("</%s>" % tag_name)
125 elif xml.text:
126 # If we only have text content.
127 output.append(">%s</%s>" % (escape(xml.text, use_cdata), tag_name))
128 else:
129 # Empty element.
130 output.append(" />")
131 if xml.tail:
132 # If there is additional text after the element.
133 output.append(escape(xml.tail, use_cdata))
134 for ns in new_namespaces:
135 # Remove namespaces introduced in this context. This is necessary
136 # because the namespaces object continues to be shared with other
137 # contexts.
138 namespaces.remove(ns)
139 return ''.join(output)
142 def escape(text, use_cdata=False):
143 """Convert special characters in XML to escape sequences.
145 :param string text: The XML text to convert.
146 :rtype: Unicode string
148 if sys.version_info < (3, 0):
149 if type(text) != types.UnicodeType:
150 text = unicode(text, 'utf-8', 'ignore')
152 escapes = {'&': '&amp;',
153 '<': '&lt;',
154 '>': '&gt;',
155 "'": '&apos;',
156 '"': '&quot;'}
158 if not use_cdata:
159 text = list(text)
160 for i, c in enumerate(text):
161 text[i] = escapes.get(c, c)
162 return ''.join(text)
163 else:
164 escape_needed = False
165 for c in text:
166 if c in escapes:
167 escape_needed = True
168 break
169 if escape_needed:
170 escaped = map(lambda x : "<![CDATA[%s]]>" % x, text.split("]]>"))
171 return "<![CDATA[]]]><![CDATA[]>]]>".join(escaped)
172 return text