1 # -*- coding: utf-8 -*-
3 slixmpp.util.stringprep_profiles
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 This module makes it easier to define profiles of stringprep,
7 such as nodeprep and resourceprep for JID validation, and
10 Part of Slixmpp: The Slick XMPP Library
12 :copyright: (c) 2012 Nathanael C. Fritz, Lance J.T. Stout
13 :license: MIT, see LICENSE for more details
17 from __future__
import unicode_literals
20 from unicodedata
import ucd_3_2_0
as unicodedata
22 from slixmpp
.util
import unicode
25 class StringPrepError(UnicodeError):
30 """Map characters that are commonly mapped to nothing."""
31 return '' if stringprep
.in_table_b1(char
) else None
34 def c12_mapping(char
):
35 """Map non-ASCII whitespace to spaces."""
36 return ' ' if stringprep
.in_table_c12(char
) else None
39 def map_input(data
, tables
=None):
41 Each character in the input stream MUST be checked against
48 for mapping
in tables
:
49 replacement
= mapping(char
)
50 if replacement
is not None:
53 if replacement
is None:
55 result
.append(replacement
)
56 return ''.join(result
)
59 def normalize(data
, nfkc
=True):
61 A profile can specify one of two options for Unicode normalization:
63 - Unicode normalization with form KC
66 data
= unicodedata
.normalize('NFKC', data
)
70 def prohibit_output(data
, tables
=None):
72 Before the text can be emitted, it MUST be checked for prohibited
78 raise StringPrepError("Prohibited code point: %s" % char
)
83 1) The characters in section 5.8 MUST be prohibited.
85 2) If a string contains any RandALCat character, the string MUST NOT
86 contain any LCat character.
88 3) If a string contains any RandALCat character, a RandALCat
89 character MUST be the first character of the string, and a
90 RandALCat character MUST be the last character of the string.
99 if stringprep
.in_table_c8(c
):
100 raise StringPrepError("BIDI violation: seciton 6 (1)")
101 if stringprep
.in_table_d1(c
):
103 elif stringprep
.in_table_d2(c
):
106 if has_randal
and has_lcat
:
107 raise StringPrepError("BIDI violation: section 6 (2)")
109 first_randal
= stringprep
.in_table_d1(data
[0])
110 last_randal
= stringprep
.in_table_d1(data
[-1])
111 if has_randal
and not (first_randal
and last_randal
):
112 raise StringPrepError("BIDI violation: section 6 (3)")
115 def create(nfkc
=True, bidi
=True, mappings
=None,
116 prohibited
=None, unassigned
=None):
117 """Create a profile of stringprep.
120 If `True`, perform NFKC Unicode normalization. Defaults to `True`.
122 If `True`, perform bidirectional text checks. Defaults to `True`.
123 :param list mappings:
124 Optional list of functions for mapping characters to
125 suitable replacements.
126 :param list prohibited:
127 Optional list of functions which check for the presence of
128 prohibited characters.
129 :param list unassigned:
130 Optional list of functions for detecting the use of unassigned
133 :raises: StringPrepError
134 :return: Unicode string of the resulting text passing the
135 profile's requirements.
137 def profile(data
, query
=False):
141 raise StringPrepError
143 data
= map_input(data
, mappings
)
144 data
= normalize(data
, nfkc
)
145 prohibit_output(data
, prohibited
)
148 if query
and unassigned
:
149 check_unassigned(data
, unassigned
)