Rename to slixmpp
[slixmpp.git] / slixmpp / util / stringprep_profiles.py
blob5fb0b4b738847f64b98cc6d823e20d60f98ed0cb
1 # -*- coding: utf-8 -*-
2 """
3 slixmpp.util.stringprep_profiles
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 This module makes it easier to define profiles of stringprep,
7 such as nodeprep and resourceprep for JID validation, and
8 SASLprep for SASL.
10 Part of Slixmpp: The Slick XMPP Library
12 :copyright: (c) 2012 Nathanael C. Fritz, Lance J.T. Stout
13 :license: MIT, see LICENSE for more details
14 """
17 from __future__ import unicode_literals
19 import stringprep
20 from unicodedata import ucd_3_2_0 as unicodedata
22 from slixmpp.util import unicode
25 class StringPrepError(UnicodeError):
26 pass
29 def b1_mapping(char):
30 """Map characters that are commonly mapped to nothing."""
31 return '' if stringprep.in_table_b1(char) else None
34 def c12_mapping(char):
35 """Map non-ASCII whitespace to spaces."""
36 return ' ' if stringprep.in_table_c12(char) else None
39 def map_input(data, tables=None):
40 """
41 Each character in the input stream MUST be checked against
42 a mapping table.
43 """
44 result = []
45 for char in data:
46 replacement = None
48 for mapping in tables:
49 replacement = mapping(char)
50 if replacement is not None:
51 break
53 if replacement is None:
54 replacement = char
55 result.append(replacement)
56 return ''.join(result)
59 def normalize(data, nfkc=True):
60 """
61 A profile can specify one of two options for Unicode normalization:
62 - no normalization
63 - Unicode normalization with form KC
64 """
65 if nfkc:
66 data = unicodedata.normalize('NFKC', data)
67 return data
70 def prohibit_output(data, tables=None):
71 """
72 Before the text can be emitted, it MUST be checked for prohibited
73 code points.
74 """
75 for char in data:
76 for check in tables:
77 if check(char):
78 raise StringPrepError("Prohibited code point: %s" % char)
81 def check_bidi(data):
82 """
83 1) The characters in section 5.8 MUST be prohibited.
85 2) If a string contains any RandALCat character, the string MUST NOT
86 contain any LCat character.
88 3) If a string contains any RandALCat character, a RandALCat
89 character MUST be the first character of the string, and a
90 RandALCat character MUST be the last character of the string.
91 """
92 if not data:
93 return data
95 has_lcat = False
96 has_randal = False
98 for c in data:
99 if stringprep.in_table_c8(c):
100 raise StringPrepError("BIDI violation: seciton 6 (1)")
101 if stringprep.in_table_d1(c):
102 has_randal = True
103 elif stringprep.in_table_d2(c):
104 has_lcat = True
106 if has_randal and has_lcat:
107 raise StringPrepError("BIDI violation: section 6 (2)")
109 first_randal = stringprep.in_table_d1(data[0])
110 last_randal = stringprep.in_table_d1(data[-1])
111 if has_randal and not (first_randal and last_randal):
112 raise StringPrepError("BIDI violation: section 6 (3)")
115 def create(nfkc=True, bidi=True, mappings=None,
116 prohibited=None, unassigned=None):
117 """Create a profile of stringprep.
119 :param bool nfkc:
120 If `True`, perform NFKC Unicode normalization. Defaults to `True`.
121 :param bool bidi:
122 If `True`, perform bidirectional text checks. Defaults to `True`.
123 :param list mappings:
124 Optional list of functions for mapping characters to
125 suitable replacements.
126 :param list prohibited:
127 Optional list of functions which check for the presence of
128 prohibited characters.
129 :param list unassigned:
130 Optional list of functions for detecting the use of unassigned
131 code points.
133 :raises: StringPrepError
134 :return: Unicode string of the resulting text passing the
135 profile's requirements.
137 def profile(data, query=False):
138 try:
139 data = unicode(data)
140 except UnicodeError:
141 raise StringPrepError
143 data = map_input(data, mappings)
144 data = normalize(data, nfkc)
145 prohibit_output(data, prohibited)
146 if bidi:
147 check_bidi(data)
148 if query and unassigned:
149 check_unassigned(data, unassigned)
150 return data
151 return profile