2 import xmlcore
.sax
.handler
6 _StringTypes
= [types
.StringType
, types
.UnicodeType
]
8 _StringTypes
= [types
.StringType
]
10 START_ELEMENT
= "START_ELEMENT"
11 END_ELEMENT
= "END_ELEMENT"
13 START_DOCUMENT
= "START_DOCUMENT"
14 END_DOCUMENT
= "END_DOCUMENT"
15 PROCESSING_INSTRUCTION
= "PROCESSING_INSTRUCTION"
16 IGNORABLE_WHITESPACE
= "IGNORABLE_WHITESPACE"
17 CHARACTERS
= "CHARACTERS"
19 class PullDOM(xmlcore
.sax
.ContentHandler
):
23 def __init__(self
, documentFactory
=None):
24 from xmlcore
.dom
import XML_NAMESPACE
25 self
.documentFactory
= documentFactory
26 self
.firstEvent
= [None, None]
27 self
.lastEvent
= self
.firstEvent
28 self
.elementStack
= []
29 self
.push
= self
.elementStack
.append
31 self
.pop
= self
.elementStack
.pop
32 except AttributeError:
33 # use class' pop instead
35 self
._ns
_contexts
= [{XML_NAMESPACE
:'xml'}] # contains uri -> prefix dicts
36 self
._current
_context
= self
._ns
_contexts
[-1]
37 self
.pending_events
= []
40 result
= self
.elementStack
[-1]
41 del self
.elementStack
[-1]
44 def setDocumentLocator(self
, locator
):
45 self
._locator
= locator
47 def startPrefixMapping(self
, prefix
, uri
):
48 if not hasattr(self
, '_xmlns_attrs'):
49 self
._xmlns
_attrs
= []
50 self
._xmlns
_attrs
.append((prefix
or 'xmlns', uri
))
51 self
._ns
_contexts
.append(self
._current
_context
.copy())
52 self
._current
_context
[uri
] = prefix
or None
54 def endPrefixMapping(self
, prefix
):
55 self
._current
_context
= self
._ns
_contexts
.pop()
57 def startElementNS(self
, name
, tagName
, attrs
):
58 # Retrieve xml namespace declaration attributes.
59 xmlns_uri
= 'http://www.w3.org/2000/xmlns/'
60 xmlns_attrs
= getattr(self
, '_xmlns_attrs', None)
61 if xmlns_attrs
is not None:
62 for aname
, value
in xmlns_attrs
:
63 attrs
._attrs
[(xmlns_uri
, aname
)] = value
64 self
._xmlns
_attrs
= []
67 # When using namespaces, the reader may or may not
68 # provide us with the original name. If not, create
69 # *a* valid tagName from the current context.
71 prefix
= self
._current
_context
[uri
]
73 tagName
= prefix
+ ":" + localname
77 node
= self
.document
.createElementNS(uri
, tagName
)
79 node
= self
.buildDocument(uri
, tagName
)
81 # When the tagname is not prefixed, it just appears as
84 node
= self
.document
.createElement(localname
)
86 node
= self
.buildDocument(None, localname
)
88 for aname
,value
in attrs
.items():
89 a_uri
, a_localname
= aname
90 if a_uri
== xmlns_uri
:
91 if a_localname
== 'xmlns':
94 qname
= 'xmlns:' + a_localname
95 attr
= self
.document
.createAttributeNS(a_uri
, qname
)
96 node
.setAttributeNodeNS(attr
)
98 prefix
= self
._current
_context
[a_uri
]
100 qname
= prefix
+ ":" + a_localname
103 attr
= self
.document
.createAttributeNS(a_uri
, qname
)
104 node
.setAttributeNodeNS(attr
)
106 attr
= self
.document
.createAttribute(a_localname
)
107 node
.setAttributeNode(attr
)
110 self
.lastEvent
[1] = [(START_ELEMENT
, node
), None]
111 self
.lastEvent
= self
.lastEvent
[1]
114 def endElementNS(self
, name
, tagName
):
115 self
.lastEvent
[1] = [(END_ELEMENT
, self
.pop()), None]
116 self
.lastEvent
= self
.lastEvent
[1]
118 def startElement(self
, name
, attrs
):
120 node
= self
.document
.createElement(name
)
122 node
= self
.buildDocument(None, name
)
124 for aname
,value
in attrs
.items():
125 attr
= self
.document
.createAttribute(aname
)
127 node
.setAttributeNode(attr
)
129 self
.lastEvent
[1] = [(START_ELEMENT
, node
), None]
130 self
.lastEvent
= self
.lastEvent
[1]
133 def endElement(self
, name
):
134 self
.lastEvent
[1] = [(END_ELEMENT
, self
.pop()), None]
135 self
.lastEvent
= self
.lastEvent
[1]
137 def comment(self
, s
):
139 node
= self
.document
.createComment(s
)
140 self
.lastEvent
[1] = [(COMMENT
, node
), None]
141 self
.lastEvent
= self
.lastEvent
[1]
143 event
= [(COMMENT
, s
), None]
144 self
.pending_events
.append(event
)
146 def processingInstruction(self
, target
, data
):
148 node
= self
.document
.createProcessingInstruction(target
, data
)
149 self
.lastEvent
[1] = [(PROCESSING_INSTRUCTION
, node
), None]
150 self
.lastEvent
= self
.lastEvent
[1]
152 event
= [(PROCESSING_INSTRUCTION
, target
, data
), None]
153 self
.pending_events
.append(event
)
155 def ignorableWhitespace(self
, chars
):
156 node
= self
.document
.createTextNode(chars
)
157 self
.lastEvent
[1] = [(IGNORABLE_WHITESPACE
, node
), None]
158 self
.lastEvent
= self
.lastEvent
[1]
160 def characters(self
, chars
):
161 node
= self
.document
.createTextNode(chars
)
162 self
.lastEvent
[1] = [(CHARACTERS
, node
), None]
163 self
.lastEvent
= self
.lastEvent
[1]
165 def startDocument(self
):
166 if self
.documentFactory
is None:
167 import xmlcore
.dom
.minidom
168 self
.documentFactory
= xmlcore
.dom
.minidom
.Document
.implementation
170 def buildDocument(self
, uri
, tagname
):
171 # Can't do that in startDocument, since we need the tagname
172 # XXX: obtain DocumentType
173 node
= self
.documentFactory
.createDocument(uri
, tagname
, None)
175 self
.lastEvent
[1] = [(START_DOCUMENT
, node
), None]
176 self
.lastEvent
= self
.lastEvent
[1]
178 # Put everything we have seen so far into the document
179 for e
in self
.pending_events
:
180 if e
[0][0] == PROCESSING_INSTRUCTION
:
182 n
= self
.document
.createProcessingInstruction(target
, data
)
183 e
[0] = (PROCESSING_INSTRUCTION
, n
)
184 elif e
[0][0] == COMMENT
:
185 n
= self
.document
.createComment(e
[0][1])
188 raise AssertionError("Unknown pending event ",e
[0][0])
189 self
.lastEvent
[1] = e
191 self
.pending_events
= None
192 return node
.firstChild
194 def endDocument(self
):
195 self
.lastEvent
[1] = [(END_DOCUMENT
, self
.document
), None]
199 "clear(): Explicitly release parsing structures"
203 def warning(self
, exception
):
205 def error(self
, exception
):
207 def fatalError(self
, exception
):
210 class DOMEventStream
:
211 def __init__(self
, stream
, parser
, bufsize
):
214 self
.bufsize
= bufsize
215 if not hasattr(self
.parser
, 'feed'):
216 self
.getEvent
= self
._slurp
220 self
.pulldom
= PullDOM()
221 # This content handler relies on namespace support
222 self
.parser
.setFeature(xmlcore
.sax
.handler
.feature_namespaces
, 1)
223 self
.parser
.setContentHandler(self
.pulldom
)
225 def __getitem__(self
, pos
):
240 def expandNode(self
, node
):
241 event
= self
.getEvent()
244 token
, cur_node
= event
247 if token
!= END_ELEMENT
:
248 parents
[-1].appendChild(cur_node
)
249 if token
== START_ELEMENT
:
250 parents
.append(cur_node
)
251 elif token
== END_ELEMENT
:
253 event
= self
.getEvent()
256 # use IncrementalParser interface, so we get the desired
258 if not self
.pulldom
.firstEvent
[1]:
259 self
.pulldom
.lastEvent
= self
.pulldom
.firstEvent
260 while not self
.pulldom
.firstEvent
[1]:
261 buf
= self
.stream
.read(self
.bufsize
)
265 self
.parser
.feed(buf
)
266 rc
= self
.pulldom
.firstEvent
[1][0]
267 self
.pulldom
.firstEvent
[1] = self
.pulldom
.firstEvent
[1][1]
271 """ Fallback replacement for getEvent() using the
272 standard SAX2 interface, which means we slurp the
273 SAX events into memory (no performance gain, but
274 we are compatible to all SAX parsers).
276 self
.parser
.parse(self
.stream
)
277 self
.getEvent
= self
._emit
281 """ Fallback replacement for getEvent() that emits
282 the events that _slurp() read previously.
284 rc
= self
.pulldom
.firstEvent
[1][0]
285 self
.pulldom
.firstEvent
[1] = self
.pulldom
.firstEvent
[1][1]
289 """clear(): Explicitly release parsing objects"""
295 class SAX2DOM(PullDOM
):
297 def startElementNS(self
, name
, tagName
, attrs
):
298 PullDOM
.startElementNS(self
, name
, tagName
, attrs
)
299 curNode
= self
.elementStack
[-1]
300 parentNode
= self
.elementStack
[-2]
301 parentNode
.appendChild(curNode
)
303 def startElement(self
, name
, attrs
):
304 PullDOM
.startElement(self
, name
, attrs
)
305 curNode
= self
.elementStack
[-1]
306 parentNode
= self
.elementStack
[-2]
307 parentNode
.appendChild(curNode
)
309 def processingInstruction(self
, target
, data
):
310 PullDOM
.processingInstruction(self
, target
, data
)
311 node
= self
.lastEvent
[0][1]
312 parentNode
= self
.elementStack
[-1]
313 parentNode
.appendChild(node
)
315 def ignorableWhitespace(self
, chars
):
316 PullDOM
.ignorableWhitespace(self
, chars
)
317 node
= self
.lastEvent
[0][1]
318 parentNode
= self
.elementStack
[-1]
319 parentNode
.appendChild(node
)
321 def characters(self
, chars
):
322 PullDOM
.characters(self
, chars
)
323 node
= self
.lastEvent
[0][1]
324 parentNode
= self
.elementStack
[-1]
325 parentNode
.appendChild(node
)
328 default_bufsize
= (2 ** 14) - 20
330 def parse(stream_or_string
, parser
=None, bufsize
=None):
332 bufsize
= default_bufsize
333 if type(stream_or_string
) in _StringTypes
:
334 stream
= open(stream_or_string
)
336 stream
= stream_or_string
338 parser
= xmlcore
.sax
.make_parser()
339 return DOMEventStream(stream
, parser
, bufsize
)
341 def parseString(string
, parser
=None):
343 from cStringIO
import StringIO
345 from StringIO
import StringIO
347 bufsize
= len(string
)
348 buf
= StringIO(string
)
350 parser
= xmlcore
.sax
.make_parser()
351 return DOMEventStream(buf
, parser
, bufsize
)