Add NEWS entry as per RDM's suggestion (the bug was actually present
[python.git] / Lib / xml / sax / saxutils.py
blob46818f3d818d6b9b216b39ee640bc1c3c9f3301b
1 """\
2 A library of useful helper classes to the SAX classes, for the
3 convenience of application and driver writers.
4 """
6 import os, urlparse, urllib, types
7 import handler
8 import xmlreader
10 try:
11 _StringTypes = [types.StringType, types.UnicodeType]
12 except AttributeError:
13 _StringTypes = [types.StringType]
15 # See whether the xmlcharrefreplace error handler is
16 # supported
17 try:
18 from codecs import xmlcharrefreplace_errors
19 _error_handling = "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
21 except ImportError:
22 _error_handling = "strict"
24 def __dict_replace(s, d):
25 """Replace substrings of a string using a dictionary."""
26 for key, value in d.items():
27 s = s.replace(key, value)
28 return s
30 def escape(data, entities={}):
31 """Escape &, <, and > in a string of data.
33 You can escape other strings of data by passing a dictionary as
34 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
36 """
38 # must do ampersand first
39 data = data.replace("&", "&amp;")
40 data = data.replace(">", "&gt;")
41 data = data.replace("<", "&lt;")
42 if entities:
43 data = __dict_replace(data, entities)
44 return data
46 def unescape(data, entities={}):
47 """Unescape &amp;, &lt;, and &gt; in a string of data.
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
52 """
53 data = data.replace("&lt;", "<")
54 data = data.replace("&gt;", ">")
55 if entities:
56 data = __dict_replace(data, entities)
57 # must do ampersand last
58 return data.replace("&amp;", "&")
60 def quoteattr(data, entities={}):
61 """Escape and quote an attribute value.
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
65 necessary.
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
70 """
71 entities = entities.copy()
72 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
73 data = escape(data, entities)
74 if '"' in data:
75 if "'" in data:
76 data = '"%s"' % data.replace('"', "&quot;")
77 else:
78 data = "'%s'" % data
79 else:
80 data = '"%s"' % data
81 return data
84 class XMLGenerator(handler.ContentHandler):
86 def __init__(self, out=None, encoding="iso-8859-1"):
87 if out is None:
88 import sys
89 out = sys.stdout
90 handler.ContentHandler.__init__(self)
91 self._out = out
92 self._ns_contexts = [{}] # contains uri -> prefix dicts
93 self._current_context = self._ns_contexts[-1]
94 self._undeclared_ns_maps = []
95 self._encoding = encoding
97 def _write(self, text):
98 if isinstance(text, str):
99 self._out.write(text)
100 else:
101 self._out.write(text.encode(self._encoding, _error_handling))
103 def _qname(self, name):
104 """Builds a qualified name from a (ns_url, localname) pair"""
105 if name[0]:
106 # The name is in a non-empty namespace
107 prefix = self._current_context[name[0]]
108 if prefix:
109 # If it is not the default namespace, prepend the prefix
110 return prefix + ":" + name[1]
111 # Return the unqualified name
112 return name[1]
114 # ContentHandler methods
116 def startDocument(self):
117 self._write('<?xml version="1.0" encoding="%s"?>\n' %
118 self._encoding)
120 def startPrefixMapping(self, prefix, uri):
121 self._ns_contexts.append(self._current_context.copy())
122 self._current_context[uri] = prefix
123 self._undeclared_ns_maps.append((prefix, uri))
125 def endPrefixMapping(self, prefix):
126 self._current_context = self._ns_contexts[-1]
127 del self._ns_contexts[-1]
129 def startElement(self, name, attrs):
130 self._write('<' + name)
131 for (name, value) in attrs.items():
132 self._write(' %s=%s' % (name, quoteattr(value)))
133 self._write('>')
135 def endElement(self, name):
136 self._write('</%s>' % name)
138 def startElementNS(self, name, qname, attrs):
139 self._write('<' + self._qname(name))
141 for prefix, uri in self._undeclared_ns_maps:
142 if prefix:
143 self._out.write(' xmlns:%s="%s"' % (prefix, uri))
144 else:
145 self._out.write(' xmlns="%s"' % uri)
146 self._undeclared_ns_maps = []
148 for (name, value) in attrs.items():
149 self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
150 self._write('>')
152 def endElementNS(self, name, qname):
153 self._write('</%s>' % self._qname(name))
155 def characters(self, content):
156 self._write(escape(content))
158 def ignorableWhitespace(self, content):
159 self._write(content)
161 def processingInstruction(self, target, data):
162 self._write('<?%s %s?>' % (target, data))
165 class XMLFilterBase(xmlreader.XMLReader):
166 """This class is designed to sit between an XMLReader and the
167 client application's event handlers. By default, it does nothing
168 but pass requests up to the reader and events on to the handlers
169 unmodified, but subclasses can override specific methods to modify
170 the event stream or the configuration requests as they pass
171 through."""
173 def __init__(self, parent = None):
174 xmlreader.XMLReader.__init__(self)
175 self._parent = parent
177 # ErrorHandler methods
179 def error(self, exception):
180 self._err_handler.error(exception)
182 def fatalError(self, exception):
183 self._err_handler.fatalError(exception)
185 def warning(self, exception):
186 self._err_handler.warning(exception)
188 # ContentHandler methods
190 def setDocumentLocator(self, locator):
191 self._cont_handler.setDocumentLocator(locator)
193 def startDocument(self):
194 self._cont_handler.startDocument()
196 def endDocument(self):
197 self._cont_handler.endDocument()
199 def startPrefixMapping(self, prefix, uri):
200 self._cont_handler.startPrefixMapping(prefix, uri)
202 def endPrefixMapping(self, prefix):
203 self._cont_handler.endPrefixMapping(prefix)
205 def startElement(self, name, attrs):
206 self._cont_handler.startElement(name, attrs)
208 def endElement(self, name):
209 self._cont_handler.endElement(name)
211 def startElementNS(self, name, qname, attrs):
212 self._cont_handler.startElementNS(name, qname, attrs)
214 def endElementNS(self, name, qname):
215 self._cont_handler.endElementNS(name, qname)
217 def characters(self, content):
218 self._cont_handler.characters(content)
220 def ignorableWhitespace(self, chars):
221 self._cont_handler.ignorableWhitespace(chars)
223 def processingInstruction(self, target, data):
224 self._cont_handler.processingInstruction(target, data)
226 def skippedEntity(self, name):
227 self._cont_handler.skippedEntity(name)
229 # DTDHandler methods
231 def notationDecl(self, name, publicId, systemId):
232 self._dtd_handler.notationDecl(name, publicId, systemId)
234 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
235 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
237 # EntityResolver methods
239 def resolveEntity(self, publicId, systemId):
240 return self._ent_handler.resolveEntity(publicId, systemId)
242 # XMLReader methods
244 def parse(self, source):
245 self._parent.setContentHandler(self)
246 self._parent.setErrorHandler(self)
247 self._parent.setEntityResolver(self)
248 self._parent.setDTDHandler(self)
249 self._parent.parse(source)
251 def setLocale(self, locale):
252 self._parent.setLocale(locale)
254 def getFeature(self, name):
255 return self._parent.getFeature(name)
257 def setFeature(self, name, state):
258 self._parent.setFeature(name, state)
260 def getProperty(self, name):
261 return self._parent.getProperty(name)
263 def setProperty(self, name, value):
264 self._parent.setProperty(name, value)
266 # XMLFilter methods
268 def getParent(self):
269 return self._parent
271 def setParent(self, parent):
272 self._parent = parent
274 # --- Utility functions
276 def prepare_input_source(source, base = ""):
277 """This function takes an InputSource and an optional base URL and
278 returns a fully resolved InputSource object ready for reading."""
280 if type(source) in _StringTypes:
281 source = xmlreader.InputSource(source)
282 elif hasattr(source, "read"):
283 f = source
284 source = xmlreader.InputSource()
285 source.setByteStream(f)
286 if hasattr(f, "name"):
287 source.setSystemId(f.name)
289 if source.getByteStream() is None:
290 sysid = source.getSystemId()
291 basehead = os.path.dirname(os.path.normpath(base))
292 sysidfilename = os.path.join(basehead, sysid)
293 if os.path.isfile(sysidfilename):
294 source.setSystemId(sysidfilename)
295 f = open(sysidfilename, "rb")
296 else:
297 source.setSystemId(urlparse.urljoin(base, sysid))
298 f = urllib.urlopen(source.getSystemId())
300 source.setByteStream(f)
302 return source