2 A library of useful helper classes to the SAX classes, for the
3 convenience of application and driver writers.
6 import os
, urlparse
, urllib
, types
11 _StringTypes
= [types
.StringType
, types
.UnicodeType
]
12 except AttributeError:
13 _StringTypes
= [types
.StringType
]
15 # See whether the xmlcharrefreplace error handler is
18 from codecs
import xmlcharrefreplace_errors
19 _error_handling
= "xmlcharrefreplace"
20 del xmlcharrefreplace_errors
22 _error_handling
= "strict"
24 def __dict_replace(s
, d
):
25 """Replace substrings of a string using a dictionary."""
26 for key
, value
in d
.items():
27 s
= s
.replace(key
, value
)
30 def escape(data
, entities
={}):
31 """Escape &, <, and > in a string of data.
33 You can escape other strings of data by passing a dictionary as
34 the optional entities parameter. The keys and values must all be
35 strings; each key will be replaced with its corresponding value.
38 # must do ampersand first
39 data
= data
.replace("&", "&")
40 data
= data
.replace(">", ">")
41 data
= data
.replace("<", "<")
43 data
= __dict_replace(data
, entities
)
46 def unescape(data
, entities
={}):
47 """Unescape &, <, and > in a string of data.
49 You can unescape other strings of data by passing a dictionary as
50 the optional entities parameter. The keys and values must all be
51 strings; each key will be replaced with its corresponding value.
53 data
= data
.replace("<", "<")
54 data
= data
.replace(">", ">")
56 data
= __dict_replace(data
, entities
)
57 # must do ampersand last
58 return data
.replace("&", "&")
60 def quoteattr(data
, entities
={}):
61 """Escape and quote an attribute value.
63 Escape &, <, and > in a string of data, then quote it for use as
64 an attribute value. The \" character will be escaped as well, if
67 You can escape other strings of data by passing a dictionary as
68 the optional entities parameter. The keys and values must all be
69 strings; each key will be replaced with its corresponding value.
71 entities
= entities
.copy()
72 entities
.update({'\n': ' ', '\r': ' ', '\t':'	'})
73 data
= escape(data
, entities
)
76 data
= '"%s"' % data
.replace('"', """)
84 class XMLGenerator(handler
.ContentHandler
):
86 def __init__(self
, out
=None, encoding
="iso-8859-1"):
90 handler
.ContentHandler
.__init
__(self
)
92 self
._ns
_contexts
= [{}] # contains uri -> prefix dicts
93 self
._current
_context
= self
._ns
_contexts
[-1]
94 self
._undeclared
_ns
_maps
= []
95 self
._encoding
= encoding
97 def _write(self
, text
):
98 if isinstance(text
, str):
101 self
._out
.write(text
.encode(self
._encoding
, _error_handling
))
103 def _qname(self
, name
):
104 """Builds a qualified name from a (ns_url, localname) pair"""
106 # The name is in a non-empty namespace
107 prefix
= self
._current
_context
[name
[0]]
109 # If it is not the default namespace, prepend the prefix
110 return prefix
+ ":" + name
[1]
111 # Return the unqualified name
114 # ContentHandler methods
116 def startDocument(self
):
117 self
._write
('<?xml version="1.0" encoding="%s"?>\n' %
120 def startPrefixMapping(self
, prefix
, uri
):
121 self
._ns
_contexts
.append(self
._current
_context
.copy())
122 self
._current
_context
[uri
] = prefix
123 self
._undeclared
_ns
_maps
.append((prefix
, uri
))
125 def endPrefixMapping(self
, prefix
):
126 self
._current
_context
= self
._ns
_contexts
[-1]
127 del self
._ns
_contexts
[-1]
129 def startElement(self
, name
, attrs
):
130 self
._write
('<' + name
)
131 for (name
, value
) in attrs
.items():
132 self
._write
(' %s=%s' % (name
, quoteattr(value
)))
135 def endElement(self
, name
):
136 self
._write
('</%s>' % name
)
138 def startElementNS(self
, name
, qname
, attrs
):
139 self
._write
('<' + self
._qname
(name
))
141 for prefix
, uri
in self
._undeclared
_ns
_maps
:
143 self
._out
.write(' xmlns:%s="%s"' % (prefix
, uri
))
145 self
._out
.write(' xmlns="%s"' % uri
)
146 self
._undeclared
_ns
_maps
= []
148 for (name
, value
) in attrs
.items():
149 self
._write
(' %s=%s' % (self
._qname
(name
), quoteattr(value
)))
152 def endElementNS(self
, name
, qname
):
153 self
._write
('</%s>' % self
._qname
(name
))
155 def characters(self
, content
):
156 self
._write
(escape(content
))
158 def ignorableWhitespace(self
, content
):
161 def processingInstruction(self
, target
, data
):
162 self
._write
('<?%s %s?>' % (target
, data
))
165 class XMLFilterBase(xmlreader
.XMLReader
):
166 """This class is designed to sit between an XMLReader and the
167 client application's event handlers. By default, it does nothing
168 but pass requests up to the reader and events on to the handlers
169 unmodified, but subclasses can override specific methods to modify
170 the event stream or the configuration requests as they pass
173 def __init__(self
, parent
= None):
174 xmlreader
.XMLReader
.__init
__(self
)
175 self
._parent
= parent
177 # ErrorHandler methods
179 def error(self
, exception
):
180 self
._err
_handler
.error(exception
)
182 def fatalError(self
, exception
):
183 self
._err
_handler
.fatalError(exception
)
185 def warning(self
, exception
):
186 self
._err
_handler
.warning(exception
)
188 # ContentHandler methods
190 def setDocumentLocator(self
, locator
):
191 self
._cont
_handler
.setDocumentLocator(locator
)
193 def startDocument(self
):
194 self
._cont
_handler
.startDocument()
196 def endDocument(self
):
197 self
._cont
_handler
.endDocument()
199 def startPrefixMapping(self
, prefix
, uri
):
200 self
._cont
_handler
.startPrefixMapping(prefix
, uri
)
202 def endPrefixMapping(self
, prefix
):
203 self
._cont
_handler
.endPrefixMapping(prefix
)
205 def startElement(self
, name
, attrs
):
206 self
._cont
_handler
.startElement(name
, attrs
)
208 def endElement(self
, name
):
209 self
._cont
_handler
.endElement(name
)
211 def startElementNS(self
, name
, qname
, attrs
):
212 self
._cont
_handler
.startElementNS(name
, qname
, attrs
)
214 def endElementNS(self
, name
, qname
):
215 self
._cont
_handler
.endElementNS(name
, qname
)
217 def characters(self
, content
):
218 self
._cont
_handler
.characters(content
)
220 def ignorableWhitespace(self
, chars
):
221 self
._cont
_handler
.ignorableWhitespace(chars
)
223 def processingInstruction(self
, target
, data
):
224 self
._cont
_handler
.processingInstruction(target
, data
)
226 def skippedEntity(self
, name
):
227 self
._cont
_handler
.skippedEntity(name
)
231 def notationDecl(self
, name
, publicId
, systemId
):
232 self
._dtd
_handler
.notationDecl(name
, publicId
, systemId
)
234 def unparsedEntityDecl(self
, name
, publicId
, systemId
, ndata
):
235 self
._dtd
_handler
.unparsedEntityDecl(name
, publicId
, systemId
, ndata
)
237 # EntityResolver methods
239 def resolveEntity(self
, publicId
, systemId
):
240 return self
._ent
_handler
.resolveEntity(publicId
, systemId
)
244 def parse(self
, source
):
245 self
._parent
.setContentHandler(self
)
246 self
._parent
.setErrorHandler(self
)
247 self
._parent
.setEntityResolver(self
)
248 self
._parent
.setDTDHandler(self
)
249 self
._parent
.parse(source
)
251 def setLocale(self
, locale
):
252 self
._parent
.setLocale(locale
)
254 def getFeature(self
, name
):
255 return self
._parent
.getFeature(name
)
257 def setFeature(self
, name
, state
):
258 self
._parent
.setFeature(name
, state
)
260 def getProperty(self
, name
):
261 return self
._parent
.getProperty(name
)
263 def setProperty(self
, name
, value
):
264 self
._parent
.setProperty(name
, value
)
271 def setParent(self
, parent
):
272 self
._parent
= parent
274 # --- Utility functions
276 def prepare_input_source(source
, base
= ""):
277 """This function takes an InputSource and an optional base URL and
278 returns a fully resolved InputSource object ready for reading."""
280 if type(source
) in _StringTypes
:
281 source
= xmlreader
.InputSource(source
)
282 elif hasattr(source
, "read"):
284 source
= xmlreader
.InputSource()
285 source
.setByteStream(f
)
286 if hasattr(f
, "name"):
287 source
.setSystemId(f
.name
)
289 if source
.getByteStream() is None:
290 sysid
= source
.getSystemId()
291 basehead
= os
.path
.dirname(os
.path
.normpath(base
))
292 sysidfilename
= os
.path
.join(basehead
, sysid
)
293 if os
.path
.isfile(sysidfilename
):
294 source
.setSystemId(sysidfilename
)
295 f
= open(sysidfilename
, "rb")
297 source
.setSystemId(urlparse
.urljoin(base
, sysid
))
298 f
= urllib
.urlopen(source
.getSystemId())
300 source
.setByteStream(f
)