JID.py

   1 #!/usr/bin/python
   2 # -*- coding: utf-8 -*-
   3 # vim: expandtab:shiftwidth=4:fileencoding=utf-8 :
   4
   5 # Copyright ® 2008 Fulvio Satta
   6 #
   7 # If you want contact me, send an email to Yota_VGA@users.sf.net
   8 #
   9 # This file is part of jcd
  10 #
  11 # jcd is free software; you can redistribute it and/or modify
  12 # it under the terms of the GNU General Public License as published by
  13 # the Free Software Foundation; either version 2 of the License, or
  14 # (at your option) any later version.
  15 #
  16 # jcd is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 # GNU General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU General Public License
  22 # along with this program; if not, write to the Free Software
  23 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  24
  25 #TODO: Exception improvements (for an i18n and many other automatic tasks)
  26 #TODO: Test, test, test
  27 #TODO: Polishing
  28
  29 ##########################
  30 ##### IMPORT SECTION #####
  31 ##########################
  32
  33 import stringprep     as _stringprep  #Tables for stringprep
  34
  35 import encodings.idna as _idna        #Stringprep for domains
  36
  37 import unicodedata    as _unicodedata #For string normalizzation
  38
  39 #################################################
  40 ##### INTERNAL STRINGPREP FUNCTIONS SECTION #####
  41 #################################################
  42
  43 #Character mapping for nodes
  44 def _mapNode(c):
  45     if _stringprep.in_table_b1(c):
  46         return ''
  47
  48     return _stringprep.map_table_b2(c)
  49
  50 #Character mapping for resources
  51 def _mapResource(c):
  52     if _stringprep.in_table_b1(c):
  53         return ''
  54
  55     return c
  56
  57 #Raise an exception if a character is proibited in a node name
  58 def _proibitionNode(c):
  59     proibited_characters = [u'"', u'&', u"'", u'/', u':', u'<', u'>', u'@']
  60
  61     if _stringprep.in_table_c11(c) or c in proibited_characters:
  62             raise UnicodeError('Invalid character (%c)' % c)
  63
  64     return _proibitionResource(c)
  65
  66 #Raise an exception if a character is proibited in a resource name
  67 def _proibitionResource(c):
  68     if  _stringprep.in_table_c12(c) or \
  69         _stringprep.in_table_c21(c) or \
  70         _stringprep.in_table_c22(c) or \
  71         _stringprep.in_table_c3(c)  or \
  72         _stringprep.in_table_c4(c)  or \
  73         _stringprep.in_table_c5(c)  or \
  74         _stringprep.in_table_c6(c)  or \
  75         _stringprep.in_table_c7(c)  or \
  76         _stringprep.in_table_c8(c)  or \
  77         _stringprep.in_table_c9(c):
  78             raise UnicodeError('Invalid character (%c)' % c)
  79
  80     return c
  81
  82 #Unassigned characters (illegals)
  83 def _unassigned(c):
  84     if _stringprep.in_table_a1(c):
  85         raise UnicodeError('Unassigned character (%c)' % c)
  86
  87     return c
  88
  89 #Check if the string is correct for that concern the bidirectionality
  90 #and that not contain some illecal character combination
  91 def _checkBidirectional(s):
  92     NOBIDI = 0 #no bidirectional characters
  93     RAND   = 1 #Rand character
  94     LCAT   = 2 #LCat character
  95
  96     type = NOBIDI #One of the constant NOBIDI, RAND or LCAT
  97
  98     #Some state variables
  99     first     = True
 100     firstRand = False
 101     lastRand  = False
 102
 103     #for each character
 104     for c in s:
 105         #Rand character
 106         if _stringprep.in_table_d1(c):
 107             if first:
 108                 firstRand = True
 109
 110             #Rand and LCat character togheter
 111             if type == LCAT:
 112                 raise UnicodeError('Bidirectional characters from ' \
 113                         'both of the stringprep bidirectional tables')
 114
 115             #If the string have some Rand character the first character must
 116             #be a Rand character
 117             if not firstRand:
 118                 raise UnicodeError('Rand character whitout initial ' \
 119                         'rand character')
 120
 121             type = RAND
 122             lastRand = True
 123
 124         #Non rand character
 125         else:
 126             lastRand = False
 127
 128             #LCat character
 129             if _stringprep.in_table_d2(c):
 130                 #Rand and LCat character togheter
 131                 if type == RAND:
 132                     raise UnicodeError('Bidirectional characters ' \
 133                             'from both of the stringprep ' \
 134                             'bidirectional tables')
 135
 136                 type = LCAT
 137
 138         first = False
 139
 140     #If the string have some Rand character the last character must be a Rand
 141     #character
 142     if type == RAND and not lastRand:
 143         raise UnicodeError('Rand character whitout final ' \
 144                 'rand character')
 145
 146 #################################################
 147 ##### EXTERNAL STRINGPREP FUNCTIONS SECTION #####
 148 #################################################
 149
 150 #Check a string with nodeprep
 151 def nodeprep(s):
 152     np = _unicodedata.normalize('NFKC', u''.join(map(_mapNode, s)))
 153     np = map(_proibitionNode, np)
 154
 155     map(_unassigned, np)
 156
 157     _checkBidirectional(np)
 158     return ''.join(np)
 159
 160 #Check a string with resourceprer
 161 def resourceprep(s):
 162     rp = _unicodedata.normalize('NFKC', u''.join(map(_mapResource, s)))
 163     rp = map(_proibitionResource, rp)
 164
 165     map(_unassigned, rp)
 166
 167     _checkBidirectional(rp)
 168     return ''.join(rp)
 169
 170 #############################
 171 ##### JID CLASS SECTION #####
 172 #############################
 173
 174 #JID class conformed to the RFCs
 175 class JID:
 176     def __init__(self, jid = None, node = None, domain = None, \
 177             resource = None, isNormalized = False):
 178         #If is an istance of JID skip the normal assign (for prestational
 179         #purposes)
 180         if isinstance(jid, JID):
 181             #The JID is normalized
 182             self.__isNormalized = True
 183
 184             #Copy the variables
 185             self.node     = jid.node
 186             self.domain   = jid.domain
 187             self.resource = jid.resource
 188
 189             #For the future the set will be normalized
 190             self.__isNormalized = False
 191             return
 192
 193         #All the strings are automatically casted to unicode
 194         if node     != None: node     = unicode(node)
 195         if domain   != None: domain   = unicode(domain)
 196         if resource != None: resource = unicode(resource)
 197
 198         #If the user send a jid like a string split that
 199         if jid:
 200             j = unicode(jid)
 201             if j.find('@') >= 0:
 202                 node, j = j.split('@', 1)
 203
 204             if j.find('/') >= 0:
 205                 j, resource = j.split('/', 1)
 206
 207             domain = j
 208
 209         #Set if the JID is normalized
 210         self.__isNormalized = isNormalized
 211
 212         #Set the JID variables
 213         self.node = node
 214         self.domain = domain
 215         self.resource = resource
 216
 217         #For the future the set will be normalized
 218         self.__isNormalized = False
 219
 220     #Reinterpreter for node, domain and resource attributes
 221     def __setattr__(self, item, value):
 222         #node attribute
 223         if item == 'node':
 224             self.__dict__['node'] = node = value
 225
 226             if not self.__isNormalized and node:
 227                 node = nodeprep(value)
 228             self.__nnode = node
 229
 230             if self.__nnode and len(self.__nnode) > 1023:
 231                 raise ValueError('Node too long')
 232
 233         #domain attribute
 234         elif item == 'domain':
 235             self.__dict__['domain'] = domain = value
 236
 237             if not self.__isNormalized and domain:
 238                 domain = _idna.ToASCII(_idna.nameprep(domain)).lower()
 239             self.__ndomain = domain
 240
 241             if not domain:
 242                 raise ValueError('JID must have a domain')
 243
 244             if len(self.__ndomain) > 1023:
 245                 raise ValueError('Domain too long')
 246
 247         #resource attribute
 248         elif item == 'resource':
 249             self.__dict__['resource'] = resource = value
 250
 251             if not self.__isNormalized and resource:
 252                 resource = resourceprep(resource)
 253             self.__nresource = resource
 254
 255             if self.__nresource and len(self.__nresource) > 1023:
 256                 raise ValueError('Resource too long')
 257
 258         #default action
 259         else:
 260             self.__dict__[item] = value
 261
 262     #Obtain the unicode rappresentation of a jid
 263     def __str__(self):
 264         #Only the domain in jid
 265         jid = self.domain
 266
 267         #Add the node part
 268         if self.node != None:
 269             jid = self.node + u'@' + jid
 270
 271         #Add the resource part
 272         if self.resource != None:
 273             jid += u'/' + self.resource
 274
 275         return jid
 276
 277     #Equivalence operator
 278     def __eq__(self, other):
 279         b = JID(other)
 280
 281         return self.__nnode     == b.__nnode     and \
 282                self.__ndomain   == b.__ndomain   and \
 283                self.__nresource == b.__nresource
 284
 285     #Unequivalence operator
 286     def __ne__(self, other):
 287         return not (self == other)
 288
 289     #Hash generation
 290     def __hash__(self):
 291         return hash(unicode(self.normalized()))
 292
 293     #Give the normalized corresponding JID
 294     def normalized(self):
 295         return JID(node = self.__nnode, domain = self.__ndomain, \
 296                 resource = self.__nresource, isNormalized = True)
 297
 298     #Get the bare JID
 299     def bareJID(self):
 300         jid = JID(node = self.node, domain = self.domain, \
 301                 isNormalized = True)
 302
 303         jid.__nnode = self.__nnode
 304         jid.__ndomain = self.__ndomain
 305         return jid
 306
 307 ##########################################
 308 ##### XEP 106 (JID ESCAPING) SECTION #####
 309 ##########################################
 310
 311 #Escape codes of all the possible escapes excpet that for the \ character
 312 _escape_codes = [('20', ' '), \
 313                  ('22', '"'), \
 314                  ('26', '&'), \
 315                  ('27', "'"), \
 316                  ('2f', '/'), \
 317                  ('3a', ':'), \
 318                  ('3c', '<'), \
 319                  ('3e', '>'), \
 320                  ('40', '@')]
 321
 322 #XEP 106: Escape a node
 323 def escapeJIDNode(node, stripSpaces = True):
 324     #Strip all the spaces in the jid if required
 325     if stripSpaces:
 326         node = node.strip(' ')
 327
 328     #If some space is at the start or the end of a JID raise an exception
 329     if node[0] == ' ' or node[-1] == ' ':
 330         raise ValueError('Spaces first or last in the JID')
 331
 332     s = node
 333     for code, character in _escape_codes:
 334         #Replace all the codes
 335         s = s.replace('\\' + code, r'\5c' + code)
 336         s = s.replace(character, '\\' + code)
 337
 338     return JID(s)
 339
 340 #XEP 106: Unescape a node
 341 def unescapeJIDNode(node, stripSpaces = True):
 342     #Strip all the spaces in the jid if required
 343     if stripSpaces:
 344         node = node.strip(' ')
 345
 346     #If some space is at the start or the end of a JID raise an exception
 347     if node[0] == ' ' or node[-1] == ' ':
 348         raise ValueError('Spaces first or last in the JID')
 349
 350     #If the node start or end with \20 return the unaltered node
 351     if s.startswith(r'\20') or s.endswith(r'\20'):
 352         return node
 353
 354     #Replace the codes eccept that \
 355     for code, character in _escape_codes:
 356         s = s.replace('\\' + code, character)
 357
 358     #Replace the code for \
 359     s = s.replace(r'\5c', '\\')
 360
 361     return s
 362
 363 ########################
 364 ##### TEST SECTION #####
 365 ########################
 366
 367 if __name__ == '__main__':
 368     a = JID('a@b/c')
 369     b = JID(a)
 370
 371     print a == 'A@b/c'
 372     print escapeJIDNode(' ba@')
 373     print hash(a)
 374     print a.bareJID()