slixmpp/util/stringprep_profiles.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     slixmpp.util.stringprep_profiles
   4     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   5
   6     This module makes it easier to define profiles of stringprep,
   7     such as nodeprep and resourceprep for JID validation, and
   8     SASLprep for SASL.
   9
  10     Part of Slixmpp: The Slick XMPP Library
  11
  12     :copyright: (c) 2012 Nathanael C. Fritz, Lance J.T. Stout
  13     :license: MIT, see LICENSE for more details
  14 """
  15
  16
  17 from __future__ import unicode_literals
  18
  19 import stringprep
  20 from unicodedata import ucd_3_2_0 as unicodedata
  21
  22 from slixmpp.util import unicode
  23
  24
  25 class StringPrepError(UnicodeError):
  26     pass
  27
  28
  29 def b1_mapping(char):
  30     """Map characters that are commonly mapped to nothing."""
  31     return '' if stringprep.in_table_b1(char) else None
  32
  33
  34 def c12_mapping(char):
  35     """Map non-ASCII whitespace to spaces."""
  36     return ' ' if stringprep.in_table_c12(char) else None
  37
  38
  39 def map_input(data, tables=None):
  40     """
  41     Each character in the input stream MUST be checked against
  42     a mapping table.
  43     """
  44     result = []
  45     for char in data:
  46         replacement = None
  47
  48         for mapping in tables:
  49             replacement = mapping(char)
  50             if replacement is not None:
  51                 break
  52
  53         if replacement is None:
  54             replacement = char
  55         result.append(replacement)
  56     return ''.join(result)
  57
  58
  59 def normalize(data, nfkc=True):
  60     """
  61     A profile can specify one of two options for Unicode normalization:
  62         - no normalization
  63         - Unicode normalization with form KC
  64     """
  65     if nfkc:
  66         data = unicodedata.normalize('NFKC', data)
  67     return data
  68
  69
  70 def prohibit_output(data, tables=None):
  71     """
  72     Before the text can be emitted, it MUST be checked for prohibited
  73     code points.
  74     """
  75     for char in data:
  76         for check in tables:
  77             if check(char):
  78                 raise StringPrepError("Prohibited code point: %s" % char)
  79
  80
  81 def check_bidi(data):
  82     """
  83     1) The characters in section 5.8 MUST be prohibited.
  84
  85     2) If a string contains any RandALCat character, the string MUST NOT
  86        contain any LCat character.
  87
  88     3) If a string contains any RandALCat character, a RandALCat
  89        character MUST be the first character of the string, and a
  90        RandALCat character MUST be the last character of the string.
  91     """
  92     if not data:
  93         return data
  94
  95     has_lcat = False
  96     has_randal = False
  97
  98     for c in data:
  99         if stringprep.in_table_c8(c):
 100             raise StringPrepError("BIDI violation: seciton 6 (1)")
 101         if stringprep.in_table_d1(c):
 102             has_randal = True
 103         elif stringprep.in_table_d2(c):
 104             has_lcat = True
 105
 106     if has_randal and has_lcat:
 107         raise StringPrepError("BIDI violation: section 6 (2)")
 108
 109     first_randal = stringprep.in_table_d1(data[0])
 110     last_randal = stringprep.in_table_d1(data[-1])
 111     if has_randal and not (first_randal and last_randal):
 112         raise StringPrepError("BIDI violation: section 6 (3)")
 113
 114
 115 def create(nfkc=True, bidi=True, mappings=None,
 116            prohibited=None, unassigned=None):
 117     """Create a profile of stringprep.
 118
 119     :param bool nfkc:
 120         If `True`, perform NFKC Unicode normalization. Defaults to `True`.
 121     :param bool bidi:
 122         If `True`, perform bidirectional text checks. Defaults to `True`.
 123     :param list mappings:
 124         Optional list of functions for mapping characters to
 125         suitable replacements.
 126     :param list prohibited:
 127         Optional list of functions which check for the presence of
 128         prohibited characters.
 129     :param list unassigned:
 130         Optional list of functions for detecting the use of unassigned
 131         code points.
 132
 133     :raises: StringPrepError
 134     :return: Unicode string of the resulting text passing the
 135              profile's requirements.
 136     """
 137     def profile(data, query=False):
 138         try:
 139             data = unicode(data)
 140         except UnicodeError:
 141             raise StringPrepError
 142
 143         data = map_input(data, mappings)
 144         data = normalize(data, nfkc)
 145         prohibit_output(data, prohibited)
 146         if bidi:
 147             check_bidi(data)
 148         if query and unassigned:
 149             check_unassigned(data, unassigned)
 150         return data
 151     return profile