Bump versions.
[libidn.git] / csharp / IDNA.cs
blob141d7f960de8f705987db7c3e7897360abe581a2
1 /// <summary> Copyright (C) 2004, 2005 Free Software Foundation, Inc.
2 /// *
3 /// Author: Alexander Gnauck AG-Software
4 /// *
5 /// This file is part of GNU Libidn.
6 /// *
7 /// This program is free software; you can redistribute it and/or
8 /// modify it under the terms of the GNU General Public License as
9 /// published by the Free Software Foundation; either version 2 of the
10 /// License, or (at your option) any later version.
11 /// *
12 /// This program is distributed in the hope that it will be useful,
13 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
14 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 /// General Public License for more details.
16 /// *
17 /// You should have received a copy of the GNU General Public License
18 /// along with this program; if not, write to the Free Software
19 /// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 /// 02111-1307 USA.
21 /// </summary>
23 using System;
25 namespace gnu.inet.encoding
28 public class IDNA
30 public const System.String ACE_PREFIX = "xn--";
32 /// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490
33 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII
34 /// rules are enforced. The input string may be a domain name
35 /// containing dots.
36 /// *
37 /// </summary>
38 /// <param name="input">Unicode string.
39 /// </param>
40 /// <returns> Encoded string.
41 ///
42 /// </returns>
43 public static System.String toASCII(System.String input)
45 System.Text.StringBuilder o = new System.Text.StringBuilder();
46 System.Text.StringBuilder h = new System.Text.StringBuilder();
48 for (int i = 0; i < input.Length; i++)
50 char c = input[i];
51 if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61')
53 o.Append(toASCII(h.ToString(), false, true));
54 o.Append(c);
55 h = new System.Text.StringBuilder();
57 else
59 h.Append(c);
62 o.Append(toASCII(h.ToString(), false, true));
63 return o.ToString();
66 /// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490
67 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII
68 /// rules are enforced.
69 /// *
70 /// </summary>
71 /// <param name="input">Unicode string.
72 /// </param>
73 /// <param name="allowUnassigned">Unassigned characters, allowed or not?
74 /// </param>
75 /// <param name="useSTD3ASCIIRules">STD3 ASCII rules, enforced or not?
76 /// </param>
77 /// <returns> Encoded string.
78 ///
79 /// </returns>
80 public static System.String toASCII(System.String input, bool allowUnassigned, bool useSTD3ASCIIRules)
82 // Step 1: Check if the string contains code points outside
83 // the ASCII range 0..0x7c.
85 bool nonASCII = false;
87 for (int i = 0; i < input.Length; i++)
89 int c = input[i];
90 if (c > 0x7f)
92 nonASCII = true;
93 break;
97 // Step 2: Perform the nameprep operation.
99 if (nonASCII)
103 input = Stringprep.nameprep(input, allowUnassigned);
105 catch (StringprepException e)
107 // TODO
108 throw new IDNAException(e);
112 // Step 3: - Verify the absence of non-LDH ASCII code points
113 // (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
114 // (char) 0x7b..0x7f
115 // - Verify the absence of leading and trailing
116 // hyphen-minus
118 if (useSTD3ASCIIRules)
120 for (int i = 0; i < input.Length; i++)
122 int c = input[i];
123 if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f))
125 throw new IDNAException(IDNAException.CONTAINS_NON_LDH);
129 if (input.StartsWith("-") || input.EndsWith("-"))
131 throw new IDNAException(IDNAException.CONTAINS_HYPHEN);
135 // Step 4: If all code points are inside 0..0x7f, skip to step 8
137 nonASCII = false;
139 for (int i = 0; i < input.Length; i++)
141 int c = input[i];
142 if (c > 0x7f)
144 nonASCII = true;
145 break;
149 System.String output = input;
151 if (nonASCII)
154 // Step 5: Verify that the sequence does not begin with the ACE prefix.
156 if (input.StartsWith(ACE_PREFIX))
158 throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX);
161 // Step 6: Punycode
165 output = Punycode.encode(input);
167 catch (PunycodeException e)
169 // TODO
170 throw new IDNAException(e);
173 // Step 7: Prepend the ACE prefix.
175 output = ACE_PREFIX + output;
178 // Step 8: Check that the length is inside 1..63.
180 if (output.Length < 1 || output.Length > 63)
182 throw new IDNAException(IDNAException.TOO_LONG);
185 return output;
188 /// <summary> Converts an ASCII-encoded string to Unicode. Unassigned
189 /// characters are not allowed and STD3 hostnames are enforced. Input
190 /// may be domain name containing dots.
191 /// *
192 /// </summary>
193 /// <param name="input">ASCII input string.
194 /// </param>
195 /// <returns> Unicode string.
196 ///
197 /// </returns>
198 public static System.String toUnicode(System.String input)
200 System.Text.StringBuilder o = new System.Text.StringBuilder();
201 System.Text.StringBuilder h = new System.Text.StringBuilder();
203 for (int i = 0; i < input.Length; i++)
205 char c = input[i];
206 if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61')
208 o.Append(toUnicode(h.ToString(), false, true));
209 o.Append(c);
210 h = new System.Text.StringBuilder();
212 else
214 h.Append(c);
217 o.Append(toUnicode(h.ToString(), false, true));
218 return o.ToString();
221 /// <summary> Converts an ASCII-encoded string to Unicode.
222 /// *
223 /// </summary>
224 /// <param name="input">ASCII input string.
225 /// </param>
226 /// <param name="allowUnassigned">Allow unassigned Unicode characters.
227 /// </param>
228 /// <param name="useSTD3ASCIIRules">Check that the output conforms to STD3.
229 /// </param>
230 /// <returns> Unicode string.
231 ///
232 /// </returns>
233 public static System.String toUnicode(System.String input, bool allowUnassigned, bool useSTD3ASCIIRules)
235 System.String original = input;
236 bool nonASCII = false;
238 // Step 1: If all code points are inside 0..0x7f, skip to step 3.
240 for (int i = 0; i < input.Length; i++)
242 int c = input[i];
243 if (c > 0x7f)
245 nonASCII = true;
246 break;
250 // Step 2: Perform the Nameprep operation.
252 if (nonASCII)
256 input = Stringprep.nameprep(input, allowUnassigned);
258 catch (StringprepException e)
260 // ToUnicode never fails!
261 return original;
265 // Step 3: Verify the sequence starts with the ACE prefix.
267 if (!input.StartsWith(ACE_PREFIX))
269 // ToUnicode never fails!
270 return original;
273 System.String stored = input;
275 // Step 4: Remove the ACE prefix.
277 input = input.Substring(ACE_PREFIX.Length);
279 // Step 5: Decode using punycode
281 System.String output;
285 output = Punycode.decode(input);
287 catch (PunycodeException e)
289 // ToUnicode never fails!
290 return original;
293 // Step 6: Apply toASCII
295 System.String ascii;
299 ascii = toASCII(output, allowUnassigned, useSTD3ASCIIRules);
301 catch (IDNAException e)
303 // ToUnicode never fails!
304 return original;
307 // Step 7: Compare case-insensitively.
309 if (!ascii.ToUpper().Equals(stored.ToUpper()))
311 // ToUnicode never fails!
312 return original;
315 // Step 8: Return the result.
317 return output;