Bump to 1.3.1
[slixmpp.git] / sleekxmpp / jid.py
blobac5ba30d959b693878fa03c3fa9d6d96377777fa
1 # -*- coding: utf-8 -*-
2 """
3 sleekxmpp.jid
4 ~~~~~~~~~~~~~~~~~~~~~~~
6 This module allows for working with Jabber IDs (JIDs).
8 Part of SleekXMPP: The Sleek XMPP Library
10 :copyright: (c) 2011 Nathanael C. Fritz
11 :license: MIT, see LICENSE for more details
12 """
14 from __future__ import unicode_literals
16 import re
17 import socket
18 import stringprep
19 import threading
20 import encodings.idna
22 from copy import deepcopy
24 from sleekxmpp.util import stringprep_profiles
25 from sleekxmpp.thirdparty import OrderedDict
27 #: These characters are not allowed to appear in a JID.
28 ILLEGAL_CHARS = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r' + \
29 '\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19' + \
30 '\x1a\x1b\x1c\x1d\x1e\x1f' + \
31 ' !"#$%&\'()*+,./:;<=>?@[\\]^_`{|}~\x7f'
33 #: The basic regex pattern that a JID must match in order to determine
34 #: the local, domain, and resource parts. This regex does NOT do any
35 #: validation, which requires application of nodeprep, resourceprep, etc.
36 JID_PATTERN = re.compile(
37 "^(?:([^\"&'/:<>@]{1,1023})@)?([^/@]{1,1023})(?:/(.{1,1023}))?$"
40 #: The set of escape sequences for the characters not allowed by nodeprep.
41 JID_ESCAPE_SEQUENCES = set(['\\20', '\\22', '\\26', '\\27', '\\2f',
42 '\\3a', '\\3c', '\\3e', '\\40', '\\5c'])
44 #: A mapping of unallowed characters to their escape sequences. An escape
45 #: sequence for '\' is also included since it must also be escaped in
46 #: certain situations.
47 JID_ESCAPE_TRANSFORMATIONS = {' ': '\\20',
48 '"': '\\22',
49 '&': '\\26',
50 "'": '\\27',
51 '/': '\\2f',
52 ':': '\\3a',
53 '<': '\\3c',
54 '>': '\\3e',
55 '@': '\\40',
56 '\\': '\\5c'}
58 #: The reverse mapping of escape sequences to their original forms.
59 JID_UNESCAPE_TRANSFORMATIONS = {'\\20': ' ',
60 '\\22': '"',
61 '\\26': '&',
62 '\\27': "'",
63 '\\2f': '/',
64 '\\3a': ':',
65 '\\3c': '<',
66 '\\3e': '>',
67 '\\40': '@',
68 '\\5c': '\\'}
70 JID_CACHE = OrderedDict()
71 JID_CACHE_LOCK = threading.Lock()
72 JID_CACHE_MAX_SIZE = 1024
74 def _cache(key, parts, locked):
75 JID_CACHE[key] = (parts, locked)
76 if len(JID_CACHE) > JID_CACHE_MAX_SIZE:
77 with JID_CACHE_LOCK:
78 while len(JID_CACHE) > JID_CACHE_MAX_SIZE:
79 found = None
80 for key, item in JID_CACHE.items():
81 if not item[1]: # if not locked
82 found = key
83 break
84 if not found: # more than MAX_SIZE locked
85 # warn?
86 break
87 del JID_CACHE[found]
89 # pylint: disable=c0103
90 #: The nodeprep profile of stringprep used to validate the local,
91 #: or username, portion of a JID.
92 nodeprep = stringprep_profiles.create(
93 nfkc=True,
94 bidi=True,
95 mappings=[
96 stringprep_profiles.b1_mapping,
97 stringprep.map_table_b2],
98 prohibited=[
99 stringprep.in_table_c11,
100 stringprep.in_table_c12,
101 stringprep.in_table_c21,
102 stringprep.in_table_c22,
103 stringprep.in_table_c3,
104 stringprep.in_table_c4,
105 stringprep.in_table_c5,
106 stringprep.in_table_c6,
107 stringprep.in_table_c7,
108 stringprep.in_table_c8,
109 stringprep.in_table_c9,
110 lambda c: c in ' \'"&/:<>@'],
111 unassigned=[stringprep.in_table_a1])
113 # pylint: disable=c0103
114 #: The resourceprep profile of stringprep, which is used to validate
115 #: the resource portion of a JID.
116 resourceprep = stringprep_profiles.create(
117 nfkc=True,
118 bidi=True,
119 mappings=[stringprep_profiles.b1_mapping],
120 prohibited=[
121 stringprep.in_table_c12,
122 stringprep.in_table_c21,
123 stringprep.in_table_c22,
124 stringprep.in_table_c3,
125 stringprep.in_table_c4,
126 stringprep.in_table_c5,
127 stringprep.in_table_c6,
128 stringprep.in_table_c7,
129 stringprep.in_table_c8,
130 stringprep.in_table_c9],
131 unassigned=[stringprep.in_table_a1])
134 def _parse_jid(data):
136 Parse string data into the node, domain, and resource
137 components of a JID, if possible.
139 :param string data: A string that is potentially a JID.
141 :raises InvalidJID:
143 :returns: tuple of the validated local, domain, and resource strings
145 match = JID_PATTERN.match(data)
146 if not match:
147 raise InvalidJID('JID could not be parsed')
149 (node, domain, resource) = match.groups()
151 node = _validate_node(node)
152 domain = _validate_domain(domain)
153 resource = _validate_resource(resource)
155 return node, domain, resource
158 def _validate_node(node):
159 """Validate the local, or username, portion of a JID.
161 :raises InvalidJID:
163 :returns: The local portion of a JID, as validated by nodeprep.
165 try:
166 if node is not None:
167 node = nodeprep(node)
169 if not node:
170 raise InvalidJID('Localpart must not be 0 bytes')
171 if len(node) > 1023:
172 raise InvalidJID('Localpart must be less than 1024 bytes')
173 return node
174 except stringprep_profiles.StringPrepError:
175 raise InvalidJID('Invalid local part')
178 def _validate_domain(domain):
179 """Validate the domain portion of a JID.
181 IP literal addresses are left as-is, if valid. Domain names
182 are stripped of any trailing label separators (`.`), and are
183 checked with the nameprep profile of stringprep. If the given
184 domain is actually a punyencoded version of a domain name, it
185 is converted back into its original Unicode form. Domains must
186 also not start or end with a dash (`-`).
188 :raises InvalidJID:
190 :returns: The validated domain name
192 ip_addr = False
194 # First, check if this is an IPv4 address
195 try:
196 socket.inet_aton(domain)
197 ip_addr = True
198 except socket.error:
199 pass
201 # Check if this is an IPv6 address
202 if not ip_addr and hasattr(socket, 'inet_pton'):
203 try:
204 socket.inet_pton(socket.AF_INET6, domain.strip('[]'))
205 domain = '[%s]' % domain.strip('[]')
206 ip_addr = True
207 except (socket.error, ValueError):
208 pass
210 if not ip_addr:
211 # This is a domain name, which must be checked further
213 if domain and domain[-1] == '.':
214 domain = domain[:-1]
216 domain_parts = []
217 for label in domain.split('.'):
218 try:
219 label = encodings.idna.nameprep(label)
220 encodings.idna.ToASCII(label)
221 pass_nameprep = True
222 except UnicodeError:
223 pass_nameprep = False
225 if not pass_nameprep:
226 raise InvalidJID('Could not encode domain as ASCII')
228 if label.startswith('xn--'):
229 label = encodings.idna.ToUnicode(label)
231 for char in label:
232 if char in ILLEGAL_CHARS:
233 raise InvalidJID('Domain contains illegal characters')
235 if '-' in (label[0], label[-1]):
236 raise InvalidJID('Domain started or ended with -')
238 domain_parts.append(label)
239 domain = '.'.join(domain_parts)
241 if not domain:
242 raise InvalidJID('Domain must not be 0 bytes')
243 if len(domain) > 1023:
244 raise InvalidJID('Domain must be less than 1024 bytes')
246 return domain
249 def _validate_resource(resource):
250 """Validate the resource portion of a JID.
252 :raises InvalidJID:
254 :returns: The local portion of a JID, as validated by resourceprep.
256 try:
257 if resource is not None:
258 resource = resourceprep(resource)
260 if not resource:
261 raise InvalidJID('Resource must not be 0 bytes')
262 if len(resource) > 1023:
263 raise InvalidJID('Resource must be less than 1024 bytes')
264 return resource
265 except stringprep_profiles.StringPrepError:
266 raise InvalidJID('Invalid resource')
269 def _escape_node(node):
270 """Escape the local portion of a JID."""
271 result = []
273 for i, char in enumerate(node):
274 if char == '\\':
275 if ''.join((node[i:i+3])) in JID_ESCAPE_SEQUENCES:
276 result.append('\\5c')
277 continue
278 result.append(char)
280 for i, char in enumerate(result):
281 if char != '\\':
282 result[i] = JID_ESCAPE_TRANSFORMATIONS.get(char, char)
284 escaped = ''.join(result)
286 if escaped.startswith('\\20') or escaped.endswith('\\20'):
287 raise InvalidJID('Escaped local part starts or ends with "\\20"')
289 _validate_node(escaped)
291 return escaped
294 def _unescape_node(node):
295 """Unescape a local portion of a JID.
297 .. note::
298 The unescaped local portion is meant ONLY for presentation,
299 and should not be used for other purposes.
301 unescaped = []
302 seq = ''
303 for i, char in enumerate(node):
304 if char == '\\':
305 seq = node[i:i+3]
306 if seq not in JID_ESCAPE_SEQUENCES:
307 seq = ''
308 if seq:
309 if len(seq) == 3:
310 unescaped.append(JID_UNESCAPE_TRANSFORMATIONS.get(seq, char))
312 # Pop character off the escape sequence, and ignore it
313 seq = seq[1:]
314 else:
315 unescaped.append(char)
316 unescaped = ''.join(unescaped)
318 return unescaped
321 def _format_jid(local=None, domain=None, resource=None):
322 """Format the given JID components into a full or bare JID.
324 :param string local: Optional. The local portion of the JID.
325 :param string domain: Required. The domain name portion of the JID.
326 :param strin resource: Optional. The resource portion of the JID.
328 :return: A full or bare JID string.
330 result = []
331 if local:
332 result.append(local)
333 result.append('@')
334 if domain:
335 result.append(domain)
336 if resource:
337 result.append('/')
338 result.append(resource)
339 return ''.join(result)
342 class InvalidJID(ValueError):
344 Raised when attempting to create a JID that does not pass validation.
346 It can also be raised if modifying an existing JID in such a way as
347 to make it invalid, such trying to remove the domain from an existing
348 full JID while the local and resource portions still exist.
351 # pylint: disable=R0903
352 class UnescapedJID(object):
355 .. versionadded:: 1.1.10
358 def __init__(self, local, domain, resource):
359 self._jid = (local, domain, resource)
361 # pylint: disable=R0911
362 def __getattr__(self, name):
363 """Retrieve the given JID component.
365 :param name: one of: user, server, domain, resource,
366 full, or bare.
368 if name == 'resource':
369 return self._jid[2] or ''
370 elif name in ('user', 'username', 'local', 'node'):
371 return self._jid[0] or ''
372 elif name in ('server', 'domain', 'host'):
373 return self._jid[1] or ''
374 elif name in ('full', 'jid'):
375 return _format_jid(*self._jid)
376 elif name == 'bare':
377 return _format_jid(self._jid[0], self._jid[1])
378 elif name == '_jid':
379 return getattr(super(JID, self), '_jid')
380 else:
381 return None
383 def __str__(self):
384 """Use the full JID as the string value."""
385 return _format_jid(*self._jid)
387 def __repr__(self):
388 """Use the full JID as the representation."""
389 return self.__str__()
392 class JID(object):
395 A representation of a Jabber ID, or JID.
397 Each JID may have three components: a user, a domain, and an optional
398 resource. For example: user@domain/resource
400 When a resource is not used, the JID is called a bare JID.
401 The JID is a full JID otherwise.
403 **JID Properties:**
404 :jid: Alias for ``full``.
405 :full: The string value of the full JID.
406 :bare: The string value of the bare JID.
407 :user: The username portion of the JID.
408 :username: Alias for ``user``.
409 :local: Alias for ``user``.
410 :node: Alias for ``user``.
411 :domain: The domain name portion of the JID.
412 :server: Alias for ``domain``.
413 :host: Alias for ``domain``.
414 :resource: The resource portion of the JID.
416 :param string jid:
417 A string of the form ``'[user@]domain[/resource]'``.
418 :param string local:
419 Optional. Specify the local, or username, portion
420 of the JID. If provided, it will override the local
421 value provided by the `jid` parameter. The given
422 local value will also be escaped if necessary.
423 :param string domain:
424 Optional. Specify the domain of the JID. If
425 provided, it will override the domain given by
426 the `jid` parameter.
427 :param string resource:
428 Optional. Specify the resource value of the JID.
429 If provided, it will override the domain given
430 by the `jid` parameter.
432 :raises InvalidJID:
435 # pylint: disable=W0212
436 def __init__(self, jid=None, **kwargs):
437 locked = kwargs.get('cache_lock', False)
438 in_local = kwargs.get('local', None)
439 in_domain = kwargs.get('domain', None)
440 in_resource = kwargs.get('resource', None)
441 parts = None
442 if in_local or in_domain or in_resource:
443 parts = (in_local, in_domain, in_resource)
445 # only check cache if there is a jid string, or parts, not if there
446 # are both
447 self._jid = None
448 key = None
449 if (jid is not None) and (parts is None):
450 if isinstance(jid, JID):
451 # it's already good to go, and there are no additions
452 self._jid = jid._jid
453 return
454 key = jid
455 self._jid, locked = JID_CACHE.get(jid, (None, locked))
456 elif jid is None and parts is not None:
457 key = parts
458 self._jid, locked = JID_CACHE.get(parts, (None, locked))
459 if not self._jid:
460 if not jid:
461 parsed_jid = (None, None, None)
462 elif not isinstance(jid, JID):
463 parsed_jid = _parse_jid(jid)
464 else:
465 parsed_jid = jid._jid
467 local, domain, resource = parsed_jid
469 if 'local' in kwargs:
470 local = _escape_node(in_local)
471 if 'domain' in kwargs:
472 domain = _validate_domain(in_domain)
473 if 'resource' in kwargs:
474 resource = _validate_resource(in_resource)
476 self._jid = (local, domain, resource)
477 if key:
478 _cache(key, self._jid, locked)
480 def unescape(self):
481 """Return an unescaped JID object.
483 Using an unescaped JID is preferred for displaying JIDs
484 to humans, and they should NOT be used for any other
485 purposes than for presentation.
487 :return: :class:`UnescapedJID`
489 .. versionadded:: 1.1.10
491 return UnescapedJID(_unescape_node(self._jid[0]),
492 self._jid[1],
493 self._jid[2])
495 def regenerate(self):
496 """No-op
498 .. deprecated:: 1.1.10
500 pass
502 def reset(self, data):
503 """Start fresh from a new JID string.
505 :param string data: A string of the form ``'[user@]domain[/resource]'``.
507 .. deprecated:: 1.1.10
509 self._jid = JID(data)._jid
511 @property
512 def resource(self):
513 return self._jid[2] or ''
515 @property
516 def user(self):
517 return self._jid[0] or ''
519 @property
520 def local(self):
521 return self._jid[0] or ''
523 @property
524 def node(self):
525 return self._jid[0] or ''
527 @property
528 def username(self):
529 return self._jid[0] or ''
531 @property
532 def bare(self):
533 return _format_jid(self._jid[0], self._jid[1])
535 @property
536 def server(self):
537 return self._jid[1] or ''
539 @property
540 def domain(self):
541 return self._jid[1] or ''
543 @property
544 def host(self):
545 return self._jid[1] or ''
547 @property
548 def full(self):
549 return _format_jid(*self._jid)
551 @property
552 def jid(self):
553 return _format_jid(*self._jid)
555 @property
556 def bare(self):
557 return _format_jid(self._jid[0], self._jid[1])
560 @resource.setter
561 def resource(self, value):
562 self._jid = JID(self, resource=value)._jid
564 @user.setter
565 def user(self, value):
566 self._jid = JID(self, local=value)._jid
568 @username.setter
569 def username(self, value):
570 self._jid = JID(self, local=value)._jid
572 @local.setter
573 def local(self, value):
574 self._jid = JID(self, local=value)._jid
576 @node.setter
577 def node(self, value):
578 self._jid = JID(self, local=value)._jid
580 @server.setter
581 def server(self, value):
582 self._jid = JID(self, domain=value)._jid
584 @domain.setter
585 def domain(self, value):
586 self._jid = JID(self, domain=value)._jid
588 @host.setter
589 def host(self, value):
590 self._jid = JID(self, domain=value)._jid
592 @full.setter
593 def full(self, value):
594 self._jid = JID(value)._jid
596 @jid.setter
597 def jid(self, value):
598 self._jid = JID(value)._jid
600 @bare.setter
601 def bare(self, value):
602 parsed = JID(value)._jid
603 self._jid = (parsed[0], parsed[1], self._jid[2])
606 def __str__(self):
607 """Use the full JID as the string value."""
608 return _format_jid(*self._jid)
610 def __repr__(self):
611 """Use the full JID as the representation."""
612 return self.__str__()
614 # pylint: disable=W0212
615 def __eq__(self, other):
616 """Two JIDs are equal if they have the same full JID value."""
617 if isinstance(other, UnescapedJID):
618 return False
620 other = JID(other)
621 return self._jid == other._jid
623 # pylint: disable=W0212
624 def __ne__(self, other):
625 """Two JIDs are considered unequal if they are not equal."""
626 return not self == other
628 def __hash__(self):
629 """Hash a JID based on the string version of its full JID."""
630 return hash(self.__str__())
632 def __copy__(self):
633 """Generate a duplicate JID."""
634 return JID(self)
636 def __deepcopy__(self, memo):
637 """Generate a duplicate JID."""
638 return JID(deepcopy(str(self), memo))