slixmpp/xmlstream/tostring.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     slixmpp.xmlstream.tostring
   4     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   5
   6     This module converts XML objects into Unicode strings and
   7     intelligently includes namespaces only when necessary to
   8     keep the output readable.
   9
  10     Part of Slixmpp: The Slick XMPP Library
  11
  12     :copyright: (c) 2011 Nathanael C. Fritz
  13     :license: MIT, see LICENSE for more details
  14 """
  15
  16 from __future__ import unicode_literals
  17
  18 import sys
  19
  20 if sys.version_info < (3, 0):
  21     import types
  22
  23
  24 XML_NS = 'http://www.w3.org/XML/1998/namespace'
  25
  26
  27 def tostring(xml=None, xmlns='', stream=None, outbuffer='',
  28              top_level=False, open_only=False, namespaces=None):
  29     """Serialize an XML object to a Unicode string.
  30
  31     If an outer xmlns is provided using ``xmlns``, then the current element's
  32     namespace will not be included if it matches the outer namespace. An
  33     exception is made for elements that have an attached stream, and appear
  34     at the stream root.
  35
  36     :param XML xml: The XML object to serialize.
  37     :param string xmlns: Optional namespace of an element wrapping the XML
  38                          object.
  39     :param stream: The XML stream that generated the XML object.
  40     :param string outbuffer: Optional buffer for storing serializations
  41                              during recursive calls.
  42     :param bool top_level: Indicates that the element is the outermost
  43                            element.
  44     :param set namespaces: Track which namespaces are in active use so
  45                            that new ones can be declared when needed.
  46
  47     :type xml: :py:class:`~xml.etree.ElementTree.Element`
  48     :type stream: :class:`~slixmpp.xmlstream.xmlstream.XMLStream`
  49
  50     :rtype: Unicode string
  51     """
  52     # Add previous results to the start of the output.
  53     output = [outbuffer]
  54
  55     # Extract the element's tag name.
  56     tag_name = xml.tag.split('}', 1)[-1]
  57
  58     # Extract the element's namespace if it is defined.
  59     if '}' in xml.tag:
  60         tag_xmlns = xml.tag.split('}', 1)[0][1:]
  61     else:
  62         tag_xmlns = ''
  63
  64     default_ns = ''
  65     stream_ns = ''
  66     use_cdata = False
  67
  68     if stream:
  69         default_ns = stream.default_ns
  70         stream_ns = stream.stream_ns
  71         use_cdata = stream.use_cdata
  72
  73     # Output the tag name and derived namespace of the element.
  74     namespace = ''
  75     if tag_xmlns:
  76         if top_level and tag_xmlns not in [default_ns, xmlns, stream_ns] \
  77           or not top_level and tag_xmlns != xmlns:
  78             namespace = ' xmlns="%s"' % tag_xmlns
  79     if stream and tag_xmlns in stream.namespace_map:
  80         mapped_namespace = stream.namespace_map[tag_xmlns]
  81         if mapped_namespace:
  82             tag_name = "%s:%s" % (mapped_namespace, tag_name)
  83     output.append("<%s" % tag_name)
  84     output.append(namespace)
  85
  86     # Output escaped attribute values.
  87     new_namespaces = set()
  88     for attrib, value in xml.attrib.items():
  89         value = escape(value, use_cdata)
  90         if '}' not in attrib:
  91             output.append(' %s="%s"' % (attrib, value))
  92         else:
  93             attrib_ns = attrib.split('}')[0][1:]
  94             attrib = attrib.split('}')[1]
  95             if attrib_ns == XML_NS:
  96                 output.append(' xml:%s="%s"' % (attrib, value))
  97             elif stream and attrib_ns in stream.namespace_map:
  98                 mapped_ns = stream.namespace_map[attrib_ns]
  99                 if mapped_ns:
 100                     if namespaces is None:
 101                         namespaces = set()
 102                     if attrib_ns not in namespaces:
 103                         namespaces.add(attrib_ns)
 104                         new_namespaces.add(attrib_ns)
 105                         output.append(' xmlns:%s="%s"' % (
 106                             mapped_ns, attrib_ns))
 107                     output.append(' %s:%s="%s"' % (
 108                         mapped_ns, attrib, value))
 109
 110     if open_only:
 111         # Only output the opening tag, regardless of content.
 112         output.append(">")
 113         return ''.join(output)
 114
 115     if len(xml) or xml.text:
 116         # If there are additional child elements to serialize.
 117         output.append(">")
 118         if xml.text:
 119             output.append(escape(xml.text, use_cdata))
 120         if len(xml):
 121             for child in xml:
 122                 output.append(tostring(child, tag_xmlns, stream,
 123                     namespaces=namespaces))
 124         output.append("</%s>" % tag_name)
 125     elif xml.text:
 126         # If we only have text content.
 127         output.append(">%s</%s>" % (escape(xml.text, use_cdata), tag_name))
 128     else:
 129         # Empty element.
 130         output.append(" />")
 131     if xml.tail:
 132         # If there is additional text after the element.
 133         output.append(escape(xml.tail, use_cdata))
 134     for ns in new_namespaces:
 135         # Remove namespaces introduced in this context. This is necessary
 136         # because the namespaces object continues to be shared with other
 137         # contexts.
 138         namespaces.remove(ns)
 139     return ''.join(output)
 140
 141
 142 def escape(text, use_cdata=False):
 143     """Convert special characters in XML to escape sequences.
 144
 145     :param string text: The XML text to convert.
 146     :rtype: Unicode string
 147     """
 148     if sys.version_info < (3, 0):
 149         if type(text) != types.UnicodeType:
 150             text = unicode(text, 'utf-8', 'ignore')
 151
 152     escapes = {'&': '&amp;',
 153                '<': '&lt;',
 154                '>': '&gt;',
 155                "'": '&apos;',
 156                '"': '&quot;'}
 157
 158     if not use_cdata:
 159         text = list(text)
 160         for i, c in enumerate(text):
 161             text[i] = escapes.get(c, c)
 162         return ''.join(text)
 163     else:
 164         escape_needed = False
 165         for c in text:
 166             if c in escapes:
 167                 escape_needed = True
 168                 break
 169         if escape_needed:
 170             escaped = map(lambda x : "<![CDATA[%s]]>" % x, text.split("]]>"))
 171             return "<![CDATA[]]]><![CDATA[]>]]>".join(escaped)
 172         return text