drain the read fd when refusing authentication
[pandav-og.git] / xmldict.py
blob307fee511396eb815af57592b8c238c482ab3caa
1 # A sane XML-to-objects parser
2 # TODO: error & better malformed xml handling
3 # (c) 2005. Ivan Voras
4 import sys
5 import re
7 class Tag:
9 def __init__(self, name, attrs, data='', parser=None):
10 self.d = {}
11 self.name = name
12 self.attrs = attrs
13 if type(self.attrs) == type(''):
14 self.attrs = splitattrs(self.attrs)
15 for a in self.attrs:
16 if a.startswith('xmlns'):
17 nsname = a[6:]
18 parser.namespaces[nsname] = self.attrs[a]
19 self.rawname = self.name
21 p = name.find(':')
22 if p > 0:
23 nsname = name[0:p]
24 if nsname in parser.namespaces:
25 self.ns = parser.namespaces[nsname]
26 self.name = self.rawname[p+1:]
27 else:
28 self.ns = ''
29 #print self.rawname, '->', self.name, self.ns
30 self.data = data
32 # Emulate dictionary d
33 def __len__(self):
34 return len(self.d)
36 def __getitem__(self, key):
37 return self.d[key]
39 def __setitem__(self, key, value):
40 self.d[key] = value
42 def __delitem__(self, key):
43 del self.d[key]
45 def __iter__(self):
46 return self.d.iterkeys()
48 def __contains__(self, key):
49 return key in self.d
51 def prettyPrint (self, indent=0):
52 s = " " * indent
53 if self.attrs:
54 s += u'<%s %s> %s ' % (self.name, self.attrs, self.data)
55 else:
56 s += u'<%s> %s ' % (self.name, self.data)
58 s += "\n"
59 for k in self.d:
60 s += " " * indent
61 if type(self.d[k]) == type(self):
62 s += u'%s:\n' % k
63 s += self.d[k].prettyPrint(indent + 1)
64 else:
65 raise "NIY"
66 #s += u'|' + u','.join([x.prettyPrint(indent + 1) for x in self.d[k]]) + u'|\n'
67 pass
68 return s
70 def __str__(self):
71 """Returns unicode semi human-readable representation of the structure"""
72 if self.attrs:
73 s = u'<%s %s> %s ' % (self.name, self.attrs, self.data)
74 else:
75 s = u'<%s> %s ' % (self.name, self.data)
77 for k in self.d:
78 if type(self.d[k]) == type(self):
79 s += u'|%s: %s|' % (k, str(self.d[k]))
80 else:
81 s += u'|' + u','.join([str(x) for x in self.d[k]]) + u'|'
82 return s
85 def addChild(self, tag):
86 """Adds a child to self. tag must be instance of Tag"""
87 if tag.name in self.d:
88 if type(self.d[tag.name]) == type(self): # If there are multiple sibiling tags with same name, form a list :)
89 self.d[tag.name] = [self.d[tag.name]]
90 self.d[tag.name].append(tag)
91 else:
92 self.d[tag.name] = tag
93 return tag
96 def toUnicode(self, fromencoding, recurse=True):
97 """Converts data & attribute data to unicode from specified encoding"""
98 if type(self.data) == type(''):
99 self.data = self.data.decode(fromencoding, 'replace')
100 for a in self.attrs:
101 if type(self.attrs[a] == type('')):
102 self.attrs[a] = self.attrs[a].decode(fromencoding, 'replace')
103 if recurse:
104 for k in self.d:
105 if type(self.d[k]) == type(self):
106 self.d[k].toUnicode(fromencoding, recurse)
110 class XMLDict_Parser:
112 def __init__(self, xml):
113 self.xml = xml
114 self.p = 0
115 self.encoding = sys.getdefaultencoding()
116 self.namespaces = {}
119 def getnexttag(self):
120 ptag = self.xml.find('<', self.p)
121 if ptag < 0:
122 return None, None, self.xml[self.p:].strip()
124 data = self.xml[self.p:ptag].strip()
126 self.p = ptag
127 self.tagbegin = ptag
129 p2 = self.xml.find('>', self.p+1)
130 if p2 < 0:
131 raise "Malformed XML - unclosed tag?"
133 tag = self.xml[ptag+1:p2]
134 self.p = p2+1
135 self.tagend = p2+1
137 ps = tag.find(' ')
138 if ps > 0:
139 tag, attrs = tag.split(' ', 1)
140 else:
141 attrs = ''
143 return tag, attrs, data
146 def builddict(self):
147 """Builds a nested-dictionary-like structure from the xml. This method
148 picks up tags on the main level and calls processTag() for nested tags."""
149 d = Tag('<root>', '')
150 while True:
151 tag, attrs, data = self.getnexttag()
152 if data != '': # data is actually that between the last tag and this one
153 sys.stderr.write("Warning: inline data between tags?!\n")
154 if not tag:
155 break
156 if tag[-1] == '/': # an 'empty' tag (e.g. <empty/>)
157 d.addChild(Tag(tag[:-1], attrs, parser=self))
158 continue
159 elif tag[0] == '?': # special tag
160 t = d.addChild(Tag(tag, attrs, parser=self))
161 if tag == '?xml' and 'encoding' in t.attrs:
162 self.encoding = t.attrs['encoding']
163 else:
164 try:
165 self.processTag(d.addChild(Tag(tag, attrs, parser=self)))
166 except:
167 sys.stderr.write("Error processing tag %s\n" % tag)
168 d.encoding = self.encoding
169 return d
172 def processTag(self, dtag):
173 """Process single tag's data"""
174 until = '/'+dtag.rawname
175 while True:
176 tag, attrs, data = self.getnexttag()
177 if data:
178 dtag.data += data
179 if tag == None:
180 sys.stderr.write("Unterminated tag '"+dtag.rawname+"'?\n")
181 break
182 if tag == until:
183 break
184 if tag[-1] == '/':
185 dtag.addChild(Tag(tag[:-1], attrs, parser=self))
186 continue
187 self.processTag(dtag.addChild(Tag(tag, attrs, parser=self)))
190 def splitattrs(att):
191 """Extracts name="value" pairs from string; returns them as dictionary"""
192 d = {}
193 for m in re.findall('([a-zA-Z_][a-zA-Z_:0-9]*?)="(.+?)"', att):
194 d[m[0]] = m[1]
195 return d
198 def builddict(xml):
199 """Wrapper function for straightforward parsing"""
200 p = XMLDict_Parser(xml)
201 return p.builddict()
204 if __name__ == '__main__': # functionality test
205 p = XMLDict_Parser('<tag1>text</tag1>')
206 d = p.builddict()
207 print d
208 print "Contents of tag1 is: '%s'" % d['tag1'].data
209 p = XMLDict_Parser('<group><user>joe</user><user>nick</user><user>john</user></group>')
210 d = p.builddict()
211 print d
212 print 'users are:'
213 for u in d['group']['user']:
214 print u
215 # print d['group']
216 # print d['group'].d
217 p = XMLDict_Parser('<group><user/><user/><user/></group>')
218 d = p.builddict()
219 print d
220 # print d['group'].d
221 p = XMLDict_Parser('<users><joe/><nick/><john/></users>')
222 d = p.builddict()
223 print d
224 if 'joe' in d['users']:
225 print 'have no fear, joe is near.'
226 if 'george' in d['users']:
227 print 'george is evil'
228 print 'users are:'
229 for u in d['users']:
230 print u