1 # -*- coding: utf-8 -*-
3 slixmpp.xmlstream.tostring
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 This module converts XML objects into Unicode strings and
7 intelligently includes namespaces only when necessary to
8 keep the output readable.
10 Part of Slixmpp: The Slick XMPP Library
12 :copyright: (c) 2011 Nathanael C. Fritz
13 :license: MIT, see LICENSE for more details
16 from __future__
import unicode_literals
20 if sys
.version_info
< (3, 0):
24 XML_NS
= 'http://www.w3.org/XML/1998/namespace'
27 def tostring(xml
=None, xmlns
='', stream
=None, outbuffer
='',
28 top_level
=False, open_only
=False, namespaces
=None):
29 """Serialize an XML object to a Unicode string.
31 If an outer xmlns is provided using ``xmlns``, then the current element's
32 namespace will not be included if it matches the outer namespace. An
33 exception is made for elements that have an attached stream, and appear
36 :param XML xml: The XML object to serialize.
37 :param string xmlns: Optional namespace of an element wrapping the XML
39 :param stream: The XML stream that generated the XML object.
40 :param string outbuffer: Optional buffer for storing serializations
41 during recursive calls.
42 :param bool top_level: Indicates that the element is the outermost
44 :param set namespaces: Track which namespaces are in active use so
45 that new ones can be declared when needed.
47 :type xml: :py:class:`~xml.etree.ElementTree.Element`
48 :type stream: :class:`~slixmpp.xmlstream.xmlstream.XMLStream`
50 :rtype: Unicode string
52 # Add previous results to the start of the output.
55 # Extract the element's tag name.
56 tag_name
= xml
.tag
.split('}', 1)[-1]
58 # Extract the element's namespace if it is defined.
60 tag_xmlns
= xml
.tag
.split('}', 1)[0][1:]
69 default_ns
= stream
.default_ns
70 stream_ns
= stream
.stream_ns
71 use_cdata
= stream
.use_cdata
73 # Output the tag name and derived namespace of the element.
76 if top_level
and tag_xmlns
not in [default_ns
, xmlns
, stream_ns
] \
77 or not top_level
and tag_xmlns
!= xmlns
:
78 namespace
= ' xmlns="%s"' % tag_xmlns
79 if stream
and tag_xmlns
in stream
.namespace_map
:
80 mapped_namespace
= stream
.namespace_map
[tag_xmlns
]
82 tag_name
= "%s:%s" % (mapped_namespace
, tag_name
)
83 output
.append("<%s" % tag_name
)
84 output
.append(namespace
)
86 # Output escaped attribute values.
87 new_namespaces
= set()
88 for attrib
, value
in xml
.attrib
.items():
89 value
= escape(value
, use_cdata
)
91 output
.append(' %s="%s"' % (attrib
, value
))
93 attrib_ns
= attrib
.split('}')[0][1:]
94 attrib
= attrib
.split('}')[1]
95 if attrib_ns
== XML_NS
:
96 output
.append(' xml:%s="%s"' % (attrib
, value
))
97 elif stream
and attrib_ns
in stream
.namespace_map
:
98 mapped_ns
= stream
.namespace_map
[attrib_ns
]
100 if namespaces
is None:
102 if attrib_ns
not in namespaces
:
103 namespaces
.add(attrib_ns
)
104 new_namespaces
.add(attrib_ns
)
105 output
.append(' xmlns:%s="%s"' % (
106 mapped_ns
, attrib_ns
))
107 output
.append(' %s:%s="%s"' % (
108 mapped_ns
, attrib
, value
))
111 # Only output the opening tag, regardless of content.
113 return ''.join(output
)
115 if len(xml
) or xml
.text
:
116 # If there are additional child elements to serialize.
119 output
.append(escape(xml
.text
, use_cdata
))
122 output
.append(tostring(child
, tag_xmlns
, stream
,
123 namespaces
=namespaces
))
124 output
.append("</%s>" % tag_name
)
126 # If we only have text content.
127 output
.append(">%s</%s>" % (escape(xml
.text
, use_cdata
), tag_name
))
132 # If there is additional text after the element.
133 output
.append(escape(xml
.tail
, use_cdata
))
134 for ns
in new_namespaces
:
135 # Remove namespaces introduced in this context. This is necessary
136 # because the namespaces object continues to be shared with other
138 namespaces
.remove(ns
)
139 return ''.join(output
)
142 def escape(text
, use_cdata
=False):
143 """Convert special characters in XML to escape sequences.
145 :param string text: The XML text to convert.
146 :rtype: Unicode string
148 if sys
.version_info
< (3, 0):
149 if type(text
) != types
.UnicodeType
:
150 text
= unicode(text
, 'utf-8', 'ignore')
152 escapes
= {'&': '&',
160 for i
, c
in enumerate(text
):
161 text
[i
] = escapes
.get(c
, c
)
164 escape_needed
= False
170 escaped
= map(lambda x
: "<![CDATA[%s]]>" % x
, text
.split("]]>"))
171 return "<![CDATA[]]]><![CDATA[]>]]>".join(escaped
)