Simpler/cleaner.
[pyTivo/wgw.git] / xmpp / simplexml.py
blobeeb723e4f9ed0c17a2cc62d1b189c1926bc78f22
1 ## simplexml.py based on Mattew Allum's xmlstream.py
2 ##
3 ## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov
4 ##
5 ## This program is free software; you can redistribute it and/or modify
6 ## it under the terms of the GNU General Public License as published by
7 ## the Free Software Foundation; either version 2, or (at your option)
8 ## any later version.
9 ##
10 ## This program is distributed in the hope that it will be useful,
11 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ## GNU General Public License for more details.
15 # $Id: simplexml.py,v 1.33 2007/09/11 12:46:16 normanr Exp $
17 """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
18 I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
20 import xml.parsers.expat
22 def XMLescape(txt):
23 """Returns provided string with symbols & < > " replaced by their respective XML entities."""
24 return txt.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")
26 ENCODING='utf-8'
27 def ustr(what):
28 """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
29 if type(what) == type(u''): return what
30 try: r=what.__str__()
31 except AttributeError: r=str(what)
32 if type(r)<>type(u''): return unicode(r,ENCODING)
33 return r
35 class Node:
36 """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation
37 from set of "namespace name", attributes and payload of text strings and other nodes.
38 It does not natively support building node from text string and uses NodeBuilder class for that purpose.
39 After creation node can be mangled in many ways so it can be completely changed.
40 Also node can be serialised into string in one of two modes: default (where the textual representation
41 of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
42 result more readable by human.
44 Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
45 replication from the some other node. The drawback of the fast way is that new node shares some
46 info with the "original" node that is changing the one node may influence the other. Though it is
47 rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
48 replication (and using replication only to move upwards on the classes tree).
49 """
50 FORCE_NODE_RECREATION=0
51 def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
52 """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
53 by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
54 and child nodes that this node carries within itself and "parent" argument that is another node
55 that this one will be the child of. Also the __init__ can be provided with "node" argument that is
56 either a text string containing exactly one node or another Node instance to begin with. If both
57 "node" and other arguments is provided then the node initially created as replica of "node"
58 provided and then modified to be compliant with other arguments."""
59 if node:
60 if self.FORCE_NODE_RECREATION and type(node)==type(self): node=str(node)
61 if type(node)<>type(self): node=NodeBuilder(node,self)
62 else:
63 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent
64 for key in node.attrs.keys(): self.attrs[key]=node.attrs[key]
65 for data in node.data: self.data.append(data)
66 for kid in node.kids: self.kids.append(kid)
67 else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
69 if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:]
70 if parent: self.parent = parent
71 if self.parent and not self.namespace: self.namespace=self.parent.namespace
72 for attr in attrs.keys():
73 self.attrs[attr]=attrs[attr]
74 if type(payload) in (type(''),type(u'')): payload=[payload]
75 for i in payload:
76 if type(i)==type(self): self.addChild(node=i)
77 else: self.addData(i)
79 def __str__(self,fancy=0):
80 """ Method used to dump node into textual representation.
81 if "fancy" argument is set to True produces indented output for readability."""
82 s = (fancy-1) * 2 * ' ' + "<" + self.name
83 if self.namespace:
84 if not self.parent or self.parent.namespace!=self.namespace:
85 s = s + ' xmlns="%s"'%self.namespace
86 for key in self.attrs.keys():
87 val = ustr(self.attrs[key])
88 s = s + ' %s="%s"' % ( key, XMLescape(val) )
89 s = s + ">"
90 cnt = 0
91 if self.kids:
92 if fancy: s = s + "\n"
93 for a in self.kids:
94 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
95 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
96 if a: s = s + a.__str__(fancy and fancy+1)
97 cnt=cnt+1
98 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
99 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
100 if not self.kids and s[-1:]=='>':
101 s=s[:-1]+' />'
102 if fancy: s = s + "\n"
103 else:
104 if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
105 s = s + "</" + self.name + ">"
106 if fancy: s = s + "\n"
107 return s
108 def getCDATA(self):
109 """ Serialise node, dropping all tags and leaving CDATA intact.
110 That is effectively kills all formatiing, leaving only text were contained in XML.
112 s = ""
113 cnt = 0
114 if self.kids:
115 for a in self.kids:
116 s=s+self.data[cnt]
117 if a: s = s + a.getCDATA()
118 cnt=cnt+1
119 if (len(self.data)-1) >= cnt: s = s + self.data[cnt]
120 return s
121 def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
122 """ If "node" argument is provided, adds it as child node. Else creates new node from
123 the other arguments' values and adds it as well."""
124 if attrs.has_key('xmlns'):
125 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
126 if namespace: name=namespace+' '+name
127 if node:
128 newnode=node
129 node.parent = self
130 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
131 self.kids.append(newnode)
132 self.data.append(u'')
133 return newnode
134 def addData(self, data):
135 """ Adds some CDATA to node. """
136 self.data.append(ustr(data))
137 self.kids.append(None)
138 def clearData(self):
139 """ Removes all CDATA from the node. """
140 self.data=[]
141 def delAttr(self, key):
142 """ Deletes an attribute "key" """
143 del self.attrs[key]
144 def delChild(self, node, attrs={}):
145 """ Deletes the "node" from the node's childs list, if "node" is an instance.
146 Else deletes the first node that have specified name and (optionally) attributes. """
147 if type(node)<>type(self): node=self.getTag(node,attrs)
148 self.kids[self.kids.index(node)]=None
149 return node
150 def getAttrs(self):
151 """ Returns all node's attributes as dictionary. """
152 return self.attrs
153 def getAttr(self, key):
154 """ Returns value of specified attribute. """
155 try: return self.attrs[key]
156 except: return None
157 def getChildren(self):
158 """ Returns all node's child nodes as list. """
159 return self.kids
160 def getData(self):
161 """ Returns all node CDATA as string (concatenated). """
162 return ''.join(self.data)
163 def getName(self):
164 """ Returns the name of node """
165 return self.name
166 def getNamespace(self):
167 """ Returns the namespace of node """
168 return self.namespace
169 def getParent(self):
170 """ Returns the parent of node (if present). """
171 return self.parent
172 def getPayload(self):
173 """ Return the payload of node i.e. list of child nodes and CDATA entries.
174 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
175 ['text1', <nodea instance>, <nodeb instance>, ' text2']. """
176 ret=[]
177 for i in range(max(len(self.data),len(self.kids))):
178 if i < len(self.data) and self.data[i]: ret.append(self.data[i])
179 if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i])
180 return ret
181 def getTag(self, name, attrs={}, namespace=None):
182 """ Filters all child nodes using specified arguments as filter.
183 Returns the first found or None if not found. """
184 return self.getTags(name, attrs, namespace, one=1)
185 def getTagAttr(self,tag,attr):
186 """ Returns attribute value of the child with specified name (or None if no such attribute)."""
187 try: return self.getTag(tag).attrs[attr]
188 except: return None
189 def getTagData(self,tag):
190 """ Returns cocatenated CDATA of the child with specified name."""
191 try: return self.getTag(tag).getData()
192 except: return None
193 def getTags(self, name, attrs={}, namespace=None, one=0):
194 """ Filters all child nodes using specified arguments as filter.
195 Returns the list of nodes found. """
196 nodes=[]
197 for node in self.kids:
198 if not node: continue
199 if namespace and namespace<>node.getNamespace(): continue
200 if node.getName() == name:
201 for key in attrs.keys():
202 if not node.attrs.has_key(key) or node.attrs[key]<>attrs[key]: break
203 else: nodes.append(node)
204 if one and nodes: return nodes[0]
205 if not one: return nodes
206 def setAttr(self, key, val):
207 """ Sets attribute "key" with the value "val". """
208 self.attrs[key]=val
209 def setData(self, data):
210 """ Sets node's CDATA to provided string. Resets all previous CDATA!"""
211 self.data=[ustr(data)]
212 def setName(self,val):
213 """ Changes the node name. """
214 self.name = val
215 def setNamespace(self, namespace):
216 """ Changes the node namespace. """
217 self.namespace=namespace
218 def setParent(self, node):
219 """ Sets node's parent to "node". WARNING: do not checks if the parent already present
220 and not removes the node from the list of childs of previous parent. """
221 self.parent = node
222 def setPayload(self,payload,add=0):
223 """ Sets node payload according to the list specified. WARNING: completely replaces all node's
224 previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
225 if type(payload) in (type(''),type(u'')): payload=[payload]
226 if add: self.kids+=payload
227 else: self.kids=payload
228 def setTag(self, name, attrs={}, namespace=None):
229 """ Same as getTag but if the node with specified namespace/attributes not found, creates such
230 node and returns it. """
231 node=self.getTags(name, attrs, namespace=namespace, one=1)
232 if node: return node
233 else: return self.addChild(name, attrs, namespace=namespace)
234 def setTagAttr(self,tag,attr,val):
235 """ Creates new node (if not already present) with name "tag"
236 and sets it's attribute "attr" to value "val". """
237 try: self.getTag(tag).attrs[attr]=val
238 except: self.addChild(tag,attrs={attr:val})
239 def setTagData(self,tag,val,attrs={}):
240 """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
241 and sets it's CDATA to string "val". """
242 try: self.getTag(tag,attrs).setData(ustr(val))
243 except: self.addChild(tag,attrs,payload=[ustr(val)])
244 def has_attr(self,key):
245 """ Checks if node have attribute "key"."""
246 return self.attrs.has_key(key)
247 def __getitem__(self,item):
248 """ Returns node's attribute "item" value. """
249 return self.getAttr(item)
250 def __setitem__(self,item,val):
251 """ Sets node's attribute "item" value. """
252 return self.setAttr(item,val)
253 def __delitem__(self,item):
254 """ Deletes node's attribute "item". """
255 return self.delAttr(item)
256 def __getattr__(self,attr):
257 """ Reduce memory usage caused by T/NT classes - use memory only when needed. """
258 if attr=='T':
259 self.T=T(self)
260 return self.T
261 if attr=='NT':
262 self.NT=NT(self)
263 return self.NT
264 raise AttributeError
266 class T:
267 """ Auxiliary class used to quick access to node's child nodes. """
268 def __init__(self,node): self.__dict__['node']=node
269 def __getattr__(self,attr): return self.node.getTag(attr)
270 def __setattr__(self,attr,val):
271 if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val)
272 else: return self.node.setTagData(attr,val)
273 def __delattr__(self,attr): return self.node.delChild(attr)
275 class NT(T):
276 """ Auxiliary class used to quick create node's child nodes. """
277 def __getattr__(self,attr): return self.node.addChild(attr)
278 def __setattr__(self,attr,val):
279 if isinstance(val,Node): self.node.addChild(attr,node=val)
280 else: return self.node.addChild(attr,payload=[val])
282 DBG_NODEBUILDER = 'nodebuilder'
283 class NodeBuilder:
284 """ Builds a Node class minidom from data parsed to it. This class used for two purposes:
285 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
286 2. Handling an incoming XML stream. This is done by mangling
287 the __dispatch_depth parameter and redefining the dispatch method.
288 You do not need to use this class directly if you do not designing your own XML handler."""
289 def __init__(self,data=None,initial_node=None):
290 """ Takes two optional parameters: "data" and "initial_node".
291 By default class initialised with empty Node class instance.
292 Though, if "initial_node" is provided it used as "starting point".
293 You can think about it as of "node upgrade".
294 "data" (if provided) feeded to parser immidiatedly after instance init.
296 self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
297 self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
298 self._parser.StartElementHandler = self.starttag
299 self._parser.EndElementHandler = self.endtag
300 self._parser.CharacterDataHandler = self.handle_data
301 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
302 self.Parse = self._parser.Parse
304 self.__depth = 0
305 self._dispatch_depth = 1
306 self._document_attrs = None
307 self._mini_dom=initial_node
308 self.last_is_data = 1
309 self._ptr=None
310 self.namespaces={"http://www.w3.org/XML/1998/namespace":'xml:'}
311 self.xmlns="http://www.w3.org/XML/1998/namespace"
313 if data: self._parser.Parse(data,1)
315 def destroy(self):
316 """ Method used to allow class instance to be garbage-collected. """
317 self._parser.StartElementHandler = None
318 self._parser.EndElementHandler = None
319 self._parser.CharacterDataHandler = None
320 self._parser.StartNamespaceDeclHandler = None
322 def starttag(self, tag, attrs):
323 """XML Parser callback. Used internally"""
324 attlist=attrs.keys() #
325 for attr in attlist: # FIXME: Crude hack. And it also slows down the whole library considerably.
326 sp=attr.rfind(" ") #
327 if sp==-1: continue #
328 ns=attr[:sp] #
329 attrs[self.namespaces[ns]+attr[sp+1:]]=attrs[attr]
330 del attrs[attr] #
331 self.__depth += 1
332 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down')
333 if self.__depth == self._dispatch_depth:
334 if not self._mini_dom : self._mini_dom = Node(tag=tag, attrs=attrs)
335 else: Node.__init__(self._mini_dom,tag=tag, attrs=attrs)
336 self._ptr = self._mini_dom
337 elif self.__depth > self._dispatch_depth:
338 self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs))
339 self._ptr = self._ptr.kids[-1]
340 if self.__depth == 1:
341 self._document_attrs = attrs
342 ns, name = (['']+tag.split())[-2:]
343 self.stream_header_received(ns, name, attrs)
344 if not self.last_is_data and self._ptr.parent: self._ptr.parent.data.append('')
345 self.last_is_data = 0
347 def endtag(self, tag ):
348 """XML Parser callback. Used internally"""
349 self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up')
350 if self.__depth == self._dispatch_depth:
351 self.dispatch(self._mini_dom)
352 elif self.__depth > self._dispatch_depth:
353 self._ptr = self._ptr.parent
354 else:
355 self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop')
356 self.__depth -= 1
357 self.last_is_data = 0
358 if self.__depth == 0: self.stream_footer_received()
360 def handle_data(self, data):
361 """XML Parser callback. Used internally"""
362 self.DEBUG(DBG_NODEBUILDER, data, 'data')
363 if not self._ptr: return
364 if self.last_is_data:
365 self._ptr.data[-1] += data
366 else:
367 self._ptr.data.append(data)
368 self.last_is_data = 1
370 def handle_namespace_start(self, prefix, uri):
371 """XML Parser callback. Used internally"""
372 if prefix: self.namespaces[uri]=prefix+':'
373 else: self.xmlns=uri
374 def DEBUG(self, level, text, comment=None):
375 """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
376 def getDom(self):
377 """ Returns just built Node. """
378 return self._mini_dom
379 def dispatch(self,stanza):
380 """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built
381 node as argument. Can be redefined to convert incoming XML stanzas to program events. """
382 def stream_header_received(self,ns,tag,attrs):
383 """ Method called when stream just opened. """
384 def stream_footer_received(self):
385 """ Method called when stream just closed. """
387 def XML2Node(xml):
388 """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file.
389 Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """
390 return NodeBuilder(xml).getDom()
392 def BadXML2Node(xml):
393 """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round.
394 I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced
395 tags though. F.e. "<b>some text <br>some more text</b>" will not work."""
396 return NodeBuilder(xml).getDom()