1 # markdown is released under the BSD license
2 # Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
3 # Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
4 # Copyright 2004 Manfred Stienstra (the original version)
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
11 # * Redistributions of source code must retain the above copyright
12 # notice, this list of conditions and the following disclaimer.
13 # * Redistributions in binary form must reproduce the above copyright
14 # notice, this list of conditions and the following disclaimer in the
15 # documentation and/or other materials provided with the distribution.
16 # * Neither the name of the <organization> nor the
17 # names of its contributors may be used to endorse or promote products
18 # derived from this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE PYTHON MARKDOWN PROJECT ''AS IS'' AND ANY
21 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 # DISCLAIMED. IN NO EVENT SHALL ANY CONTRIBUTORS TO THE PYTHON MARKDOWN PROJECT
24 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
33 # markdown/searializers.py
35 # Add x/html serialization to Elementree
36 # Taken from ElementTree 1.3 preview with slight modifications
38 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
40 # fredrik@pythonware.com
41 # http://www.pythonware.com
43 # --------------------------------------------------------------------
44 # The ElementTree toolkit is
46 # Copyright (c) 1999-2007 by Fredrik Lundh
48 # By obtaining, using, and/or copying this software and/or its
49 # associated documentation, you agree that you have read, understood,
50 # and will comply with the following terms and conditions:
52 # Permission to use, copy, modify, and distribute this software and
53 # its associated documentation for any purpose and without fee is
54 # hereby granted, provided that the above copyright notice appears in
55 # all copies, and that both that copyright notice and this permission
56 # notice appear in supporting documentation, and that the name of
57 # Secret Labs AB or the author not be used in advertising or publicity
58 # pertaining to distribution of the software without specific, written
61 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
62 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
63 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
64 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
65 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
66 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
67 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
69 # --------------------------------------------------------------------
72 from __future__
import absolute_import
73 from __future__
import unicode_literals
75 ElementTree
= util
.etree
.ElementTree
76 QName
= util
.etree
.QName
77 if hasattr(util
.etree
, 'test_comment'):
78 Comment
= util
.etree
.test_comment
80 Comment
= util
.etree
.Comment
82 ProcessingInstruction
= util
.etree
.ProcessingInstruction
84 __all__
= ['to_html_string', 'to_xhtml_string']
86 HTML_EMPTY
= ("area", "base", "basefont", "br", "col", "frame", "hr",
87 "img", "input", "isindex", "link", "meta" "param")
90 HTML_EMPTY
= set(HTML_EMPTY
)
95 # "well-known" namespace prefixes
96 "http://www.w3.org/XML/1998/namespace": "xml",
97 "http://www.w3.org/1999/xhtml": "html",
98 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
99 "http://schemas.xmlsoap.org/wsdl/": "wsdl",
101 "http://www.w3.org/2001/XMLSchema": "xs",
102 "http://www.w3.org/2001/XMLSchema-instance": "xsi",
104 "http://purl.org/dc/elements/1.1/": "dc",
108 def _raise_serialization_error(text
):
110 "cannot serialize %r (type %s)" % (text
, type(text
).__name
__)
113 def _encode(text
, encoding
):
115 return text
.encode(encoding
, "xmlcharrefreplace")
116 except (TypeError, AttributeError):
117 _raise_serialization_error(text
)
119 def _escape_cdata(text
):
120 # escape character data
122 # it's worth avoiding do-nothing calls for strings that are
123 # shorter than 500 character, or so. assume that's, by far,
124 # the most common case in most applications.
126 text
= text
.replace("&", "&")
128 text
= text
.replace("<", "<")
130 text
= text
.replace(">", ">")
132 except (TypeError, AttributeError):
133 _raise_serialization_error(text
)
136 def _escape_attrib(text
):
137 # escape attribute value
140 text
= text
.replace("&", "&")
142 text
= text
.replace("<", "<")
144 text
= text
.replace(">", ">")
146 text
= text
.replace("\"", """)
148 text
= text
.replace("\n", " ")
150 except (TypeError, AttributeError):
151 _raise_serialization_error(text
)
153 def _escape_attrib_html(text
):
154 # escape attribute value
157 text
= text
.replace("&", "&")
159 text
= text
.replace("<", "<")
161 text
= text
.replace(">", ">")
163 text
= text
.replace("\"", """)
165 except (TypeError, AttributeError):
166 _raise_serialization_error(text
)
169 def _serialize_html(write
, elem
, qnames
, namespaces
, format
):
173 write("<!--%s-->" % _escape_cdata(text
))
174 elif tag
is ProcessingInstruction
:
175 write("<?%s?>" % _escape_cdata(text
))
180 write(_escape_cdata(text
))
182 _serialize_html(write
, e
, qnames
, None, format
)
186 if items
or namespaces
:
187 items
.sort() # lexical order
189 if isinstance(k
, QName
):
191 if isinstance(v
, QName
):
194 v
= _escape_attrib_html(v
)
195 if qnames
[k
] == v
and format
== 'html':
196 # handle boolean attributes
199 write(" %s=\"%s\"" % (qnames
[k
], v
))
201 items
= namespaces
.items()
202 items
.sort(key
=lambda x
: x
[1]) # sort on prefix
206 write(" xmlns%s=\"%s\"" % (k
, _escape_attrib(v
)))
207 if format
== "xhtml" and tag
in HTML_EMPTY
:
213 if tag
== "script" or tag
== "style":
216 write(_escape_cdata(text
))
218 _serialize_html(write
, e
, qnames
, None, format
)
219 if tag
not in HTML_EMPTY
:
220 write("</" + tag
+ ">")
222 write(_escape_cdata(elem
.tail
))
224 def _write_html(root
,
226 default_namespace
=None,
228 assert root
is not None
231 qnames
, namespaces
= _namespaces(root
, default_namespace
)
232 _serialize_html(write
, root
, qnames
, namespaces
, format
)
236 return _encode("".join(data
))
239 # --------------------------------------------------------------------
240 # serialization support
242 def _namespaces(elem
, default_namespace
=None):
243 # identify namespaces used in this tree
245 # maps qnames to *encoded* prefix:local names
246 qnames
= {None: None}
248 # maps uri:s to prefixes
250 if default_namespace
:
251 namespaces
[default_namespace
] = ""
253 def add_qname(qname
):
254 # calculate serialized qname representation
257 uri
, tag
= qname
[1:].split("}", 1)
258 prefix
= namespaces
.get(uri
)
260 prefix
= _namespace_map
.get(uri
)
262 prefix
= "ns%d" % len(namespaces
)
264 namespaces
[uri
] = prefix
266 qnames
[qname
] = "%s:%s" % (prefix
, tag
)
268 qnames
[qname
] = tag
# default element
270 if default_namespace
:
272 "cannot use non-qualified names with "
273 "default_namespace option"
275 qnames
[qname
] = qname
277 _raise_serialization_error(qname
)
279 # populate qname and namespaces table
282 except AttributeError:
283 iterate
= elem
.getiterator
# cET compatibility
284 for elem
in iterate():
286 if isinstance(tag
, QName
) and tag
.text
not in qnames
:
288 elif isinstance(tag
, util
.string_type
):
289 if tag
not in qnames
:
291 elif tag
is not None and tag
is not Comment
and tag
is not PI
:
292 _raise_serialization_error(tag
)
293 for key
, value
in elem
.items():
294 if isinstance(key
, QName
):
296 if key
not in qnames
:
298 if isinstance(value
, QName
) and value
.text
not in qnames
:
299 add_qname(value
.text
)
301 if isinstance(text
, QName
) and text
.text
not in qnames
:
303 return qnames
, namespaces
305 def to_html_string(element
):
306 return _write_html(ElementTree(element
).getroot(), format
="html")
308 def to_xhtml_string(element
):
309 return _write_html(ElementTree(element
).getroot(), format
="xhtml")