1 """A quick DOM implementation.
3 Python's xml.dom is very slow. The xml.sax module is also slow (as it imports urllib2).
4 This is our light-weight version.
7 # Copyright (C) 2009, Thomas Leonard
8 # See the README file for details, or visit http://0install.net.
10 from xml
.parsers
import expat
12 class Element(object):
14 @ivar uri: the element's namespace
16 @ivar name: the element's localName
18 @ivar attrs: the element's attributes (key is in the form [namespace " "] localName
19 @type attrs: {str: str}
20 @ivar childNodes: children
21 @type childNodes: [L{Element}]
22 @ivar content: the text content
24 __slots__
= ['uri', 'name', 'attrs', 'childNodes', 'content']
25 def __init__(self
, uri
, name
, attrs
):
28 self
.attrs
= attrs
.copy()
32 attrs
= [n
+ '=' + self
.attrs
[n
] for n
in self
.attrs
]
33 start
= '<{%s}%s %s' % (self
.uri
, self
.name
, ' '.join(attrs
))
35 return start
+ '>' + '\n'.join(map(str, self
.childNodes
)) + ('</%s>' % (self
.name
))
37 return start
+ '>' + self
.content
+ ('</%s>' % (self
.name
))
41 def getAttribute(self
, name
):
42 return self
.attrs
.get(name
, None)
45 """SAXHandler that builds a tree of L{Element}s"""
49 def startElementNS(self
, fullname
, attrs
):
50 split
= fullname
.split(' ', 1)
52 self
.stack
.append(Element(split
[0], split
[1], attrs
))
54 self
.stack
.append(Element(None, fullname
, attrs
))
57 def characters(self
, data
):
60 def endElementNS(self
, name
):
61 contents
= self
.contents
.strip()
62 self
.stack
[-1].content
= contents
64 new
= self
.stack
.pop()
66 self
.stack
[-1].childNodes
.append(new
)
71 """Parse an XML stream into a tree of L{Element}s.
72 @param source: data to parse
76 handler
= QSAXhandler()
77 parser
= expat
.ParserCreate(namespace_separator
= ' ')
79 parser
.StartElementHandler
= handler
.startElementNS
80 parser
.EndElementHandler
= handler
.endElementNS
81 parser
.CharacterDataHandler
= handler
.characters
83 parser
.ParseFile(source
)