2 SAX driver for the pyexpat C module. This driver works with
3 pyexpat.__version__ == '2.22'.
8 from xml
.sax
._exceptions
import *
9 from xml
.sax
.handler
import feature_validation
, feature_namespaces
10 from xml
.sax
.handler
import feature_namespace_prefixes
11 from xml
.sax
.handler
import feature_external_ges
, feature_external_pes
12 from xml
.sax
.handler
import feature_string_interning
13 from xml
.sax
.handler
import property_xml_string
, property_interning_dict
15 # xml.parsers.expat does not raise ImportError in Jython
17 if sys
.platform
[:4] == "java":
18 raise SAXReaderNotAvailable("expat not available in Java", None)
22 from xml
.parsers
import expat
24 raise SAXReaderNotAvailable("expat not supported", None)
26 if not hasattr(expat
, "ParserCreate"):
27 raise SAXReaderNotAvailable("expat not supported", None)
28 from xml
.sax
import xmlreader
, saxutils
, handler
30 AttributesImpl
= xmlreader
.AttributesImpl
31 AttributesNSImpl
= xmlreader
.AttributesNSImpl
33 # If we're using a sufficiently recent version of Python, we can use
34 # weak references to avoid cycles between the parser and content
35 # handler, otherwise we'll just have to pretend.
43 _mkproxy
= weakref
.proxy
48 class ExpatLocator(xmlreader
.Locator
):
49 """Locator for use with the ExpatParser class.
51 This uses a weak reference to the parser object to avoid creating
52 a circular reference between the parser and the content handler.
54 def __init__(self
, parser
):
55 self
._ref
= _mkproxy(parser
)
57 def getColumnNumber(self
):
59 if parser
._parser
is None:
61 return parser
._parser
.ErrorColumnNumber
63 def getLineNumber(self
):
65 if parser
._parser
is None:
67 return parser
._parser
.ErrorLineNumber
69 def getPublicId(self
):
73 return parser
._source
.getPublicId()
75 def getSystemId(self
):
79 return parser
._source
.getSystemId()
84 class ExpatParser(xmlreader
.IncrementalParser
, xmlreader
.Locator
):
85 """SAX driver for the pyexpat C module."""
87 def __init__(self
, namespaceHandling
=0, bufsize
=2**16-20):
88 xmlreader
.IncrementalParser
.__init
__(self
, bufsize
)
89 self
._source
= xmlreader
.InputSource()
91 self
._namespaces
= namespaceHandling
92 self
._lex
_handler
_prop
= None
94 self
._entity
_stack
= []
95 self
._external
_ges
= 1
96 self
._interning
= None
100 def parse(self
, source
):
101 "Parse an XML document from a URL or an InputSource."
102 source
= saxutils
.prepare_input_source(source
)
104 self
._source
= source
106 self
._cont
_handler
.setDocumentLocator(ExpatLocator(self
))
107 xmlreader
.IncrementalParser
.parse(self
, source
)
109 def prepareParser(self
, source
):
110 if source
.getSystemId() is not None:
111 self
._parser
.SetBase(source
.getSystemId())
113 # Redefined setContentHandler to allow changing handlers during parsing
115 def setContentHandler(self
, handler
):
116 xmlreader
.IncrementalParser
.setContentHandler(self
, handler
)
118 self
._reset
_cont
_handler
()
120 def getFeature(self
, name
):
121 if name
== feature_namespaces
:
122 return self
._namespaces
123 elif name
== feature_string_interning
:
124 return self
._interning
is not None
125 elif name
in (feature_validation
, feature_external_pes
,
126 feature_namespace_prefixes
):
128 elif name
== feature_external_ges
:
129 return self
._external
_ges
130 raise SAXNotRecognizedException("Feature '%s' not recognized" % name
)
132 def setFeature(self
, name
, state
):
134 raise SAXNotSupportedException("Cannot set features while parsing")
136 if name
== feature_namespaces
:
137 self
._namespaces
= state
138 elif name
== feature_external_ges
:
139 self
._external
_ges
= state
140 elif name
== feature_string_interning
:
142 if self
._interning
is None:
145 self
._interning
= None
146 elif name
== feature_validation
:
148 raise SAXNotSupportedException(
149 "expat does not support validation")
150 elif name
== feature_external_pes
:
152 raise SAXNotSupportedException(
153 "expat does not read external parameter entities")
154 elif name
== feature_namespace_prefixes
:
156 raise SAXNotSupportedException(
157 "expat does not report namespace prefixes")
159 raise SAXNotRecognizedException(
160 "Feature '%s' not recognized" % name
)
162 def getProperty(self
, name
):
163 if name
== handler
.property_lexical_handler
:
164 return self
._lex
_handler
_prop
165 elif name
== property_interning_dict
:
166 return self
._interning
167 elif name
== property_xml_string
:
169 if hasattr(self
._parser
, "GetInputContext"):
170 return self
._parser
.GetInputContext()
172 raise SAXNotRecognizedException(
173 "This version of expat does not support getting"
176 raise SAXNotSupportedException(
177 "XML string cannot be returned when not parsing")
178 raise SAXNotRecognizedException("Property '%s' not recognized" % name
)
180 def setProperty(self
, name
, value
):
181 if name
== handler
.property_lexical_handler
:
182 self
._lex
_handler
_prop
= value
184 self
._reset
_lex
_handler
_prop
()
185 elif name
== property_interning_dict
:
186 self
._interning
= value
187 elif name
== property_xml_string
:
188 raise SAXNotSupportedException("Property '%s' cannot be set" %
191 raise SAXNotRecognizedException("Property '%s' not recognized" %
194 # IncrementalParser methods
196 def feed(self
, data
, isFinal
= 0):
197 if not self
._parsing
:
200 self
._cont
_handler
.startDocument()
203 # The isFinal parameter is internal to the expat reader.
204 # If it is set to true, expat will check validity of the entire
205 # document. When feeding chunks, they are not normally final -
206 # except when invoked from close.
207 self
._parser
.Parse(data
, isFinal
)
208 except expat
.error
, e
:
209 exc
= SAXParseException(expat
.ErrorString(e
.code
), e
, self
)
210 # FIXME: when to invoke error()?
211 self
._err
_handler
.fatalError(exc
)
214 if self
._entity
_stack
:
215 # If we are completing an external entity, do nothing here
217 self
.feed("", isFinal
= 1)
218 self
._cont
_handler
.endDocument()
220 # break cycle created by expat handlers pointing to our methods
223 def _reset_cont_handler(self
):
224 self
._parser
.ProcessingInstructionHandler
= \
225 self
._cont
_handler
.processingInstruction
226 self
._parser
.CharacterDataHandler
= self
._cont
_handler
.characters
228 def _reset_lex_handler_prop(self
):
229 lex
= self
._lex
_handler
_prop
230 parser
= self
._parser
232 parser
.CommentHandler
= None
233 parser
.StartCdataSectionHandler
= None
234 parser
.EndCdataSectionHandler
= None
235 parser
.StartDoctypeDeclHandler
= None
236 parser
.EndDoctypeDeclHandler
= None
238 parser
.CommentHandler
= lex
.comment
239 parser
.StartCdataSectionHandler
= lex
.startCDATA
240 parser
.EndCdataSectionHandler
= lex
.endCDATA
241 parser
.StartDoctypeDeclHandler
= self
.start_doctype_decl
242 parser
.EndDoctypeDeclHandler
= lex
.endDTD
246 self
._parser
= expat
.ParserCreate(self
._source
.getEncoding(), " ",
247 intern=self
._interning
)
248 self
._parser
.namespace_prefixes
= 1
249 self
._parser
.StartElementHandler
= self
.start_element_ns
250 self
._parser
.EndElementHandler
= self
.end_element_ns
252 self
._parser
= expat
.ParserCreate(self
._source
.getEncoding(),
253 intern = self
._interning
)
254 self
._parser
.StartElementHandler
= self
.start_element
255 self
._parser
.EndElementHandler
= self
.end_element
257 self
._reset
_cont
_handler
()
258 self
._parser
.UnparsedEntityDeclHandler
= self
.unparsed_entity_decl
259 self
._parser
.NotationDeclHandler
= self
.notation_decl
260 self
._parser
.StartNamespaceDeclHandler
= self
.start_namespace_decl
261 self
._parser
.EndNamespaceDeclHandler
= self
.end_namespace_decl
263 self
._decl
_handler
_prop
= None
264 if self
._lex
_handler
_prop
:
265 self
._reset
_lex
_handler
_prop
()
266 # self._parser.DefaultHandler =
267 # self._parser.DefaultHandlerExpand =
268 # self._parser.NotStandaloneHandler =
269 self
._parser
.ExternalEntityRefHandler
= self
.external_entity_ref
271 self
._parser
.SkippedEntityHandler
= self
.skipped_entity_handler
272 except AttributeError:
273 # This pyexpat does not support SkippedEntity
275 self
._parser
.SetParamEntityParsing(
276 expat
.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE
)
279 self
._entity
_stack
= []
283 def getColumnNumber(self
):
284 if self
._parser
is None:
286 return self
._parser
.ErrorColumnNumber
288 def getLineNumber(self
):
289 if self
._parser
is None:
291 return self
._parser
.ErrorLineNumber
293 def getPublicId(self
):
294 return self
._source
.getPublicId()
296 def getSystemId(self
):
297 return self
._source
.getSystemId()
300 def start_element(self
, name
, attrs
):
301 self
._cont
_handler
.startElement(name
, AttributesImpl(attrs
))
303 def end_element(self
, name
):
304 self
._cont
_handler
.endElement(name
)
306 def start_element_ns(self
, name
, attrs
):
312 pair
= pair
[0], pair
[1]
319 for (aname
, value
) in attrs
.items():
320 parts
= aname
.split()
325 apair
= (None, aname
)
327 qname
= "%s:%s" % (parts
[2], parts
[1])
328 apair
= parts
[0], parts
[1]
334 newattrs
[apair
] = value
335 qnames
[apair
] = qname
337 self
._cont
_handler
.startElementNS(pair
, None,
338 AttributesNSImpl(newattrs
, qnames
))
340 def end_element_ns(self
, name
):
345 pair
= pair
[0], pair
[1]
349 self
._cont
_handler
.endElementNS(pair
, None)
351 # this is not used (call directly to ContentHandler)
352 def processing_instruction(self
, target
, data
):
353 self
._cont
_handler
.processingInstruction(target
, data
)
355 # this is not used (call directly to ContentHandler)
356 def character_data(self
, data
):
357 self
._cont
_handler
.characters(data
)
359 def start_namespace_decl(self
, prefix
, uri
):
360 self
._cont
_handler
.startPrefixMapping(prefix
, uri
)
362 def end_namespace_decl(self
, prefix
):
363 self
._cont
_handler
.endPrefixMapping(prefix
)
365 def start_doctype_decl(self
, name
, sysid
, pubid
, has_internal_subset
):
366 self
._lex
_handler
_prop
.startDTD(name
, pubid
, sysid
)
368 def unparsed_entity_decl(self
, name
, base
, sysid
, pubid
, notation_name
):
369 self
._dtd
_handler
.unparsedEntityDecl(name
, pubid
, sysid
, notation_name
)
371 def notation_decl(self
, name
, base
, sysid
, pubid
):
372 self
._dtd
_handler
.notationDecl(name
, pubid
, sysid
)
374 def external_entity_ref(self
, context
, base
, sysid
, pubid
):
375 if not self
._external
_ges
:
378 source
= self
._ent
_handler
.resolveEntity(pubid
, sysid
)
379 source
= saxutils
.prepare_input_source(source
,
380 self
._source
.getSystemId() or
383 self
._entity
_stack
.append((self
._parser
, self
._source
))
384 self
._parser
= self
._parser
.ExternalEntityParserCreate(context
)
385 self
._source
= source
388 xmlreader
.IncrementalParser
.parse(self
, source
)
390 return 0 # FIXME: save error info here?
392 (self
._parser
, self
._source
) = self
._entity
_stack
[-1]
393 del self
._entity
_stack
[-1]
396 def skipped_entity_handler(self
, name
, is_pe
):
398 # The SAX spec requires to report skipped PEs with a '%'
400 self
._cont
_handler
.skippedEntity(name
)
404 def create_parser(*args
, **kwargs
):
405 return ExpatParser(*args
, **kwargs
)
409 if __name__
== "__main__":
410 import xml
.sax
.saxutils
412 p
.setContentHandler(xml
.sax
.saxutils
.XMLGenerator())
413 p
.setErrorHandler(xml
.sax
.ErrorHandler())
414 p
.parse("http://www.ibiblio.org/xml/examples/shakespeare/hamlet.xml")