A little bug fix
[jcd.git] / JID.py
blobd4fcf2c6383375a77188695aec74b48369f2e38c
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3 # vim: expandtab:shiftwidth=4:fileencoding=utf-8 :
5 # Copyright ® 2008 Fulvio Satta
7 # If you want contact me, send an email to Yota_VGA@users.sf.net
9 # This file is part of jcd
11 # jcd is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # jcd is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #TODO: Exception improvements (for an i18n and many other automatic tasks)
26 #TODO: Test, test, test
27 #TODO: Polishing
29 ##########################
30 ##### IMPORT SECTION #####
31 ##########################
33 import stringprep as _stringprep #Tables for stringprep
35 import encodings.idna as _idna #Stringprep for domains
37 import unicodedata as _unicodedata #For string normalizzation
39 #################################################
40 ##### INTERNAL STRINGPREP FUNCTIONS SECTION #####
41 #################################################
43 #Character mapping for nodes
44 def _mapNode(c):
45 if _stringprep.in_table_b1(c):
46 return ''
48 return _stringprep.map_table_b2(c)
50 #Character mapping for resources
51 def _mapResource(c):
52 if _stringprep.in_table_b1(c):
53 return ''
55 return c
57 #Raise an exception if a character is proibited in a node name
58 def _proibitionNode(c):
59 proibited_characters = [u'"', u'&', u"'", u'/', u':', u'<', u'>', u'@']
61 if _stringprep.in_table_c11(c) or c in proibited_characters:
62 raise UnicodeError('Invalid character (%c)' % c)
64 return _proibitionResource(c)
66 #Raise an exception if a character is proibited in a resource name
67 def _proibitionResource(c):
68 if _stringprep.in_table_c12(c) or \
69 _stringprep.in_table_c21(c) or \
70 _stringprep.in_table_c22(c) or \
71 _stringprep.in_table_c3(c) or \
72 _stringprep.in_table_c4(c) or \
73 _stringprep.in_table_c5(c) or \
74 _stringprep.in_table_c6(c) or \
75 _stringprep.in_table_c7(c) or \
76 _stringprep.in_table_c8(c) or \
77 _stringprep.in_table_c9(c):
78 raise UnicodeError('Invalid character (%c)' % c)
80 return c
82 #Unassigned characters (illegals)
83 def _unassigned(c):
84 if _stringprep.in_table_a1(c):
85 raise UnicodeError('Unassigned character (%c)' % c)
87 return c
89 #Check if the string is correct for that concern the bidirectionality
90 #and that not contain some illecal character combination
91 def _checkBidirectional(s):
92 NOBIDI = 0 #no bidirectional characters
93 RAND = 1 #Rand character
94 LCAT = 2 #LCat character
96 type = NOBIDI #One of the constant NOBIDI, RAND or LCAT
98 #Some state variables
99 first = True
100 firstRand = False
101 lastRand = False
103 #for each character
104 for c in s:
105 #Rand character
106 if _stringprep.in_table_d1(c):
107 if first:
108 firstRand = True
110 #Rand and LCat character togheter
111 if type == LCAT:
112 raise UnicodeError('Bidirectional characters from ' \
113 'both of the stringprep bidirectional tables')
115 #If the string have some Rand character the first character must
116 #be a Rand character
117 if not firstRand:
118 raise UnicodeError('Rand character whitout initial ' \
119 'rand character')
121 type = RAND
122 lastRand = True
124 #Non rand character
125 else:
126 lastRand = False
128 #LCat character
129 if _stringprep.in_table_d2(c):
130 #Rand and LCat character togheter
131 if type == RAND:
132 raise UnicodeError('Bidirectional characters ' \
133 'from both of the stringprep ' \
134 'bidirectional tables')
136 type = LCAT
138 first = False
140 #If the string have some Rand character the last character must be a Rand
141 #character
142 if type == RAND and not lastRand:
143 raise UnicodeError('Rand character whitout final ' \
144 'rand character')
146 #################################################
147 ##### EXTERNAL STRINGPREP FUNCTIONS SECTION #####
148 #################################################
150 #Check a string with nodeprep
151 def nodeprep(s):
152 np = _unicodedata.normalize('NFKC', u''.join(map(_mapNode, s)))
153 np = map(_proibitionNode, np)
155 map(_unassigned, np)
157 _checkBidirectional(np)
158 return ''.join(np)
160 #Check a string with resourceprer
161 def resourceprep(s):
162 rp = _unicodedata.normalize('NFKC', u''.join(map(_mapResource, s)))
163 rp = map(_proibitionResource, rp)
165 map(_unassigned, rp)
167 _checkBidirectional(rp)
168 return ''.join(rp)
170 #############################
171 ##### JID CLASS SECTION #####
172 #############################
174 #JID class conformed to the RFCs
175 class JID:
176 def __init__(self, jid = None, node = None, domain = None, \
177 resource = None, isNormalized = False):
178 #If is an istance of JID skip the normal assign (for prestational
179 #purposes)
180 if isinstance(jid, JID):
181 #The JID is normalized
182 self.__isNormalized = True
184 #Copy the variables
185 self.node = jid.node
186 self.domain = jid.domain
187 self.resource = jid.resource
189 #For the future the set will be normalized
190 self.__isNormalized = False
191 return
193 #All the strings are automatically casted to unicode
194 if node != None: node = unicode(node)
195 if domain != None: domain = unicode(domain)
196 if resource != None: resource = unicode(resource)
198 #If the user send a jid like a string split that
199 if jid:
200 j = unicode(jid)
201 if j.find('@') >= 0:
202 node, j = j.split('@', 1)
204 if j.find('/') >= 0:
205 j, resource = j.split('/', 1)
207 domain = j
209 #Set if the JID is normalized
210 self.__isNormalized = isNormalized
212 #Set the JID variables
213 self.node = node
214 self.domain = domain
215 self.resource = resource
217 #For the future the set will be normalized
218 self.__isNormalized = False
220 #Reinterpreter for node, domain and resource attributes
221 def __setattr__(self, item, value):
222 #node attribute
223 if item == 'node':
224 self.__dict__['node'] = node = value
226 if not self.__isNormalized and node:
227 node = nodeprep(value)
228 self.__nnode = node
230 if self.__nnode and len(self.__nnode) > 1023:
231 raise ValueError('Node too long')
233 #domain attribute
234 elif item == 'domain':
235 self.__dict__['domain'] = domain = value
237 if not self.__isNormalized and domain:
238 domain = _idna.ToASCII(_idna.nameprep(domain)).lower()
239 self.__ndomain = domain
241 if not domain:
242 raise ValueError('JID must have a domain')
244 if len(self.__ndomain) > 1023:
245 raise ValueError('Domain too long')
247 #resource attribute
248 elif item == 'resource':
249 self.__dict__['resource'] = resource = value
251 if not self.__isNormalized and resource:
252 resource = resourceprep(resource)
253 self.__nresource = resource
255 if self.__nresource and len(self.__nresource) > 1023:
256 raise ValueError('Resource too long')
258 #default action
259 else:
260 self.__dict__[item] = value
262 #Obtain the unicode rappresentation of a jid
263 def __str__(self):
264 #Only the domain in jid
265 jid = self.domain
267 #Add the node part
268 if self.node != None:
269 jid = self.node + u'@' + jid
271 #Add the resource part
272 if self.resource != None:
273 jid += u'/' + self.resource
275 return jid
277 #Equivalence operator
278 def __eq__(self, other):
279 b = JID(other)
281 return self.__nnode == b.__nnode and \
282 self.__ndomain == b.__ndomain and \
283 self.__nresource == b.__nresource
285 #Unequivalence operator
286 def __ne__(self, other):
287 return not (self == other)
289 #Hash generation
290 def __hash__(self):
291 return hash(unicode(self.normalized()))
293 #Give the normalized corresponding JID
294 def normalized(self):
295 return JID(node = self.__nnode, domain = self.__ndomain, \
296 resource = self.__nresource, isNormalized = True)
298 #Get the bare JID
299 def bareJID(self):
300 jid = JID(node = self.node, domain = self.domain, \
301 isNormalized = True)
303 jid.__nnode = self.__nnode
304 jid.__ndomain = self.__ndomain
305 return jid
307 ##########################################
308 ##### XEP 106 (JID ESCAPING) SECTION #####
309 ##########################################
311 #Escape codes of all the possible escapes excpet that for the \ character
312 _escape_codes = [('20', ' '), \
313 ('22', '"'), \
314 ('26', '&'), \
315 ('27', "'"), \
316 ('2f', '/'), \
317 ('3a', ':'), \
318 ('3c', '<'), \
319 ('3e', '>'), \
320 ('40', '@')]
322 #XEP 106: Escape a node
323 def escapeJIDNode(node, stripSpaces = True):
324 #Strip all the spaces in the jid if required
325 if stripSpaces:
326 node = node.strip(' ')
328 #If some space is at the start or the end of a JID raise an exception
329 if node[0] == ' ' or node[-1] == ' ':
330 raise ValueError('Spaces first or last in the JID')
332 s = node
333 for code, character in _escape_codes:
334 #Replace all the codes
335 s = s.replace('\\' + code, r'\5c' + code)
336 s = s.replace(character, '\\' + code)
338 return JID(s)
340 #XEP 106: Unescape a node
341 def unescapeJIDNode(node, stripSpaces = True):
342 #Strip all the spaces in the jid if required
343 if stripSpaces:
344 node = node.strip(' ')
346 #If some space is at the start or the end of a JID raise an exception
347 if node[0] == ' ' or node[-1] == ' ':
348 raise ValueError('Spaces first or last in the JID')
350 #If the node start or end with \20 return the unaltered node
351 if s.startswith(r'\20') or s.endswith(r'\20'):
352 return node
354 #Replace the codes eccept that \
355 for code, character in _escape_codes:
356 s = s.replace('\\' + code, character)
358 #Replace the code for \
359 s = s.replace(r'\5c', '\\')
361 return s
363 ########################
364 ##### TEST SECTION #####
365 ########################
367 if __name__ == '__main__':
368 a = JID('a@b/c')
369 b = JID(a)
371 print a == 'A@b/c'
372 print escapeJIDNode(' ba@')
373 print hash(a)
374 print a.bareJID()