Set bug/patch count. Take a bow, everyone!
[pytest.git] / Lib / xmlcore / dom / pulldom.py
blobdad3718fea36f267fae77d674ad0bb4dfc2bae3d
1 import xmlcore.sax
2 import xmlcore.sax.handler
3 import types
5 try:
6 _StringTypes = [types.StringType, types.UnicodeType]
7 except AttributeError:
8 _StringTypes = [types.StringType]
10 START_ELEMENT = "START_ELEMENT"
11 END_ELEMENT = "END_ELEMENT"
12 COMMENT = "COMMENT"
13 START_DOCUMENT = "START_DOCUMENT"
14 END_DOCUMENT = "END_DOCUMENT"
15 PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
16 IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
17 CHARACTERS = "CHARACTERS"
19 class PullDOM(xmlcore.sax.ContentHandler):
20 _locator = None
21 document = None
23 def __init__(self, documentFactory=None):
24 from xmlcore.dom import XML_NAMESPACE
25 self.documentFactory = documentFactory
26 self.firstEvent = [None, None]
27 self.lastEvent = self.firstEvent
28 self.elementStack = []
29 self.push = self.elementStack.append
30 try:
31 self.pop = self.elementStack.pop
32 except AttributeError:
33 # use class' pop instead
34 pass
35 self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
36 self._current_context = self._ns_contexts[-1]
37 self.pending_events = []
39 def pop(self):
40 result = self.elementStack[-1]
41 del self.elementStack[-1]
42 return result
44 def setDocumentLocator(self, locator):
45 self._locator = locator
47 def startPrefixMapping(self, prefix, uri):
48 if not hasattr(self, '_xmlns_attrs'):
49 self._xmlns_attrs = []
50 self._xmlns_attrs.append((prefix or 'xmlns', uri))
51 self._ns_contexts.append(self._current_context.copy())
52 self._current_context[uri] = prefix or None
54 def endPrefixMapping(self, prefix):
55 self._current_context = self._ns_contexts.pop()
57 def startElementNS(self, name, tagName , attrs):
58 # Retrieve xml namespace declaration attributes.
59 xmlns_uri = 'http://www.w3.org/2000/xmlns/'
60 xmlns_attrs = getattr(self, '_xmlns_attrs', None)
61 if xmlns_attrs is not None:
62 for aname, value in xmlns_attrs:
63 attrs._attrs[(xmlns_uri, aname)] = value
64 self._xmlns_attrs = []
65 uri, localname = name
66 if uri:
67 # When using namespaces, the reader may or may not
68 # provide us with the original name. If not, create
69 # *a* valid tagName from the current context.
70 if tagName is None:
71 prefix = self._current_context[uri]
72 if prefix:
73 tagName = prefix + ":" + localname
74 else:
75 tagName = localname
76 if self.document:
77 node = self.document.createElementNS(uri, tagName)
78 else:
79 node = self.buildDocument(uri, tagName)
80 else:
81 # When the tagname is not prefixed, it just appears as
82 # localname
83 if self.document:
84 node = self.document.createElement(localname)
85 else:
86 node = self.buildDocument(None, localname)
88 for aname,value in attrs.items():
89 a_uri, a_localname = aname
90 if a_uri == xmlns_uri:
91 if a_localname == 'xmlns':
92 qname = a_localname
93 else:
94 qname = 'xmlns:' + a_localname
95 attr = self.document.createAttributeNS(a_uri, qname)
96 node.setAttributeNodeNS(attr)
97 elif a_uri:
98 prefix = self._current_context[a_uri]
99 if prefix:
100 qname = prefix + ":" + a_localname
101 else:
102 qname = a_localname
103 attr = self.document.createAttributeNS(a_uri, qname)
104 node.setAttributeNodeNS(attr)
105 else:
106 attr = self.document.createAttribute(a_localname)
107 node.setAttributeNode(attr)
108 attr.value = value
110 self.lastEvent[1] = [(START_ELEMENT, node), None]
111 self.lastEvent = self.lastEvent[1]
112 self.push(node)
114 def endElementNS(self, name, tagName):
115 self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
116 self.lastEvent = self.lastEvent[1]
118 def startElement(self, name, attrs):
119 if self.document:
120 node = self.document.createElement(name)
121 else:
122 node = self.buildDocument(None, name)
124 for aname,value in attrs.items():
125 attr = self.document.createAttribute(aname)
126 attr.value = value
127 node.setAttributeNode(attr)
129 self.lastEvent[1] = [(START_ELEMENT, node), None]
130 self.lastEvent = self.lastEvent[1]
131 self.push(node)
133 def endElement(self, name):
134 self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
135 self.lastEvent = self.lastEvent[1]
137 def comment(self, s):
138 if self.document:
139 node = self.document.createComment(s)
140 self.lastEvent[1] = [(COMMENT, node), None]
141 self.lastEvent = self.lastEvent[1]
142 else:
143 event = [(COMMENT, s), None]
144 self.pending_events.append(event)
146 def processingInstruction(self, target, data):
147 if self.document:
148 node = self.document.createProcessingInstruction(target, data)
149 self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
150 self.lastEvent = self.lastEvent[1]
151 else:
152 event = [(PROCESSING_INSTRUCTION, target, data), None]
153 self.pending_events.append(event)
155 def ignorableWhitespace(self, chars):
156 node = self.document.createTextNode(chars)
157 self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
158 self.lastEvent = self.lastEvent[1]
160 def characters(self, chars):
161 node = self.document.createTextNode(chars)
162 self.lastEvent[1] = [(CHARACTERS, node), None]
163 self.lastEvent = self.lastEvent[1]
165 def startDocument(self):
166 if self.documentFactory is None:
167 import xmlcore.dom.minidom
168 self.documentFactory = xmlcore.dom.minidom.Document.implementation
170 def buildDocument(self, uri, tagname):
171 # Can't do that in startDocument, since we need the tagname
172 # XXX: obtain DocumentType
173 node = self.documentFactory.createDocument(uri, tagname, None)
174 self.document = node
175 self.lastEvent[1] = [(START_DOCUMENT, node), None]
176 self.lastEvent = self.lastEvent[1]
177 self.push(node)
178 # Put everything we have seen so far into the document
179 for e in self.pending_events:
180 if e[0][0] == PROCESSING_INSTRUCTION:
181 _,target,data = e[0]
182 n = self.document.createProcessingInstruction(target, data)
183 e[0] = (PROCESSING_INSTRUCTION, n)
184 elif e[0][0] == COMMENT:
185 n = self.document.createComment(e[0][1])
186 e[0] = (COMMENT, n)
187 else:
188 raise AssertionError("Unknown pending event ",e[0][0])
189 self.lastEvent[1] = e
190 self.lastEvent = e
191 self.pending_events = None
192 return node.firstChild
194 def endDocument(self):
195 self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
196 self.pop()
198 def clear(self):
199 "clear(): Explicitly release parsing structures"
200 self.document = None
202 class ErrorHandler:
203 def warning(self, exception):
204 print exception
205 def error(self, exception):
206 raise exception
207 def fatalError(self, exception):
208 raise exception
210 class DOMEventStream:
211 def __init__(self, stream, parser, bufsize):
212 self.stream = stream
213 self.parser = parser
214 self.bufsize = bufsize
215 if not hasattr(self.parser, 'feed'):
216 self.getEvent = self._slurp
217 self.reset()
219 def reset(self):
220 self.pulldom = PullDOM()
221 # This content handler relies on namespace support
222 self.parser.setFeature(xmlcore.sax.handler.feature_namespaces, 1)
223 self.parser.setContentHandler(self.pulldom)
225 def __getitem__(self, pos):
226 rc = self.getEvent()
227 if rc:
228 return rc
229 raise IndexError
231 def next(self):
232 rc = self.getEvent()
233 if rc:
234 return rc
235 raise StopIteration
237 def __iter__(self):
238 return self
240 def expandNode(self, node):
241 event = self.getEvent()
242 parents = [node]
243 while event:
244 token, cur_node = event
245 if cur_node is node:
246 return
247 if token != END_ELEMENT:
248 parents[-1].appendChild(cur_node)
249 if token == START_ELEMENT:
250 parents.append(cur_node)
251 elif token == END_ELEMENT:
252 del parents[-1]
253 event = self.getEvent()
255 def getEvent(self):
256 # use IncrementalParser interface, so we get the desired
257 # pull effect
258 if not self.pulldom.firstEvent[1]:
259 self.pulldom.lastEvent = self.pulldom.firstEvent
260 while not self.pulldom.firstEvent[1]:
261 buf = self.stream.read(self.bufsize)
262 if not buf:
263 self.parser.close()
264 return None
265 self.parser.feed(buf)
266 rc = self.pulldom.firstEvent[1][0]
267 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
268 return rc
270 def _slurp(self):
271 """ Fallback replacement for getEvent() using the
272 standard SAX2 interface, which means we slurp the
273 SAX events into memory (no performance gain, but
274 we are compatible to all SAX parsers).
276 self.parser.parse(self.stream)
277 self.getEvent = self._emit
278 return self._emit()
280 def _emit(self):
281 """ Fallback replacement for getEvent() that emits
282 the events that _slurp() read previously.
284 rc = self.pulldom.firstEvent[1][0]
285 self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
286 return rc
288 def clear(self):
289 """clear(): Explicitly release parsing objects"""
290 self.pulldom.clear()
291 del self.pulldom
292 self.parser = None
293 self.stream = None
295 class SAX2DOM(PullDOM):
297 def startElementNS(self, name, tagName , attrs):
298 PullDOM.startElementNS(self, name, tagName, attrs)
299 curNode = self.elementStack[-1]
300 parentNode = self.elementStack[-2]
301 parentNode.appendChild(curNode)
303 def startElement(self, name, attrs):
304 PullDOM.startElement(self, name, attrs)
305 curNode = self.elementStack[-1]
306 parentNode = self.elementStack[-2]
307 parentNode.appendChild(curNode)
309 def processingInstruction(self, target, data):
310 PullDOM.processingInstruction(self, target, data)
311 node = self.lastEvent[0][1]
312 parentNode = self.elementStack[-1]
313 parentNode.appendChild(node)
315 def ignorableWhitespace(self, chars):
316 PullDOM.ignorableWhitespace(self, chars)
317 node = self.lastEvent[0][1]
318 parentNode = self.elementStack[-1]
319 parentNode.appendChild(node)
321 def characters(self, chars):
322 PullDOM.characters(self, chars)
323 node = self.lastEvent[0][1]
324 parentNode = self.elementStack[-1]
325 parentNode.appendChild(node)
328 default_bufsize = (2 ** 14) - 20
330 def parse(stream_or_string, parser=None, bufsize=None):
331 if bufsize is None:
332 bufsize = default_bufsize
333 if type(stream_or_string) in _StringTypes:
334 stream = open(stream_or_string)
335 else:
336 stream = stream_or_string
337 if not parser:
338 parser = xmlcore.sax.make_parser()
339 return DOMEventStream(stream, parser, bufsize)
341 def parseString(string, parser=None):
342 try:
343 from cStringIO import StringIO
344 except ImportError:
345 from StringIO import StringIO
347 bufsize = len(string)
348 buf = StringIO(string)
349 if not parser:
350 parser = xmlcore.sax.make_parser()
351 return DOMEventStream(buf, parser, bufsize)