Bump versions.
[libidn.git] / csharp / IDNA.cs
blob28aea89759f7a553d1bea1c5f55b9bdfb87def31
1 /// <summary>
2 /// Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 /// *
4 /// Author: Alexander Gnauck AG-Software
5 /// *
6 /// This file is part of GNU Libidn.
7 /// *
8 /// This program is free software; you can redistribute it and/or
9 /// modify it under the terms of the GNU General Public License as
10 /// published by the Free Software Foundation; either version 2 of the
11 /// License, or (at your option) any later version.
12 /// *
13 /// This program is distributed in the hope that it will be useful,
14 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
15 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 /// General Public License for more details.
17 /// *
18 /// You should have received a copy of the GNU General Public License
19 /// along with this program; if not, write to the Free Software
20 /// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 /// 02111-1307 USA.
22 /// </summary>
24 using System;
26 namespace gnu.inet.encoding
29 public class IDNA
31 public const System.String ACE_PREFIX = "xn--";
33 /// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490
34 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII
35 /// rules are enforced. The input string may be a domain name
36 /// containing dots.
37 /// *
38 /// </summary>
39 /// <param name="input">Unicode string.
40 /// </param>
41 /// <returns> Encoded string.
42 ///
43 /// </returns>
44 public static System.String toASCII(System.String input)
46 System.Text.StringBuilder o = new System.Text.StringBuilder();
47 System.Text.StringBuilder h = new System.Text.StringBuilder();
49 for (int i = 0; i < input.Length; i++)
51 char c = input[i];
52 if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61')
54 o.Append(toASCII(h.ToString(), false, true));
55 o.Append(c);
56 h = new System.Text.StringBuilder();
58 else
60 h.Append(c);
63 o.Append(toASCII(h.ToString(), false, true));
64 return o.ToString();
67 /// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490
68 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII
69 /// rules are enforced.
70 /// *
71 /// </summary>
72 /// <param name="input">Unicode string.
73 /// </param>
74 /// <param name="allowUnassigned">Unassigned characters, allowed or not?
75 /// </param>
76 /// <param name="useSTD3ASCIIRules">STD3 ASCII rules, enforced or not?
77 /// </param>
78 /// <returns> Encoded string.
79 ///
80 /// </returns>
81 public static System.String toASCII(System.String input, bool allowUnassigned, bool useSTD3ASCIIRules)
83 // Step 1: Check if the string contains code points outside
84 // the ASCII range 0..0x7c.
86 bool nonASCII = false;
88 for (int i = 0; i < input.Length; i++)
90 int c = input[i];
91 if (c > 0x7f)
93 nonASCII = true;
94 break;
98 // Step 2: Perform the nameprep operation.
100 if (nonASCII)
104 input = Stringprep.nameprep(input, allowUnassigned);
106 catch (StringprepException e)
108 // TODO
109 throw new IDNAException(e);
113 // Step 3: - Verify the absence of non-LDH ASCII code points
114 // (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
115 // (char) 0x7b..0x7f
116 // - Verify the absence of leading and trailing
117 // hyphen-minus
119 if (useSTD3ASCIIRules)
121 for (int i = 0; i < input.Length; i++)
123 int c = input[i];
124 if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f))
126 throw new IDNAException(IDNAException.CONTAINS_NON_LDH);
130 if (input.StartsWith("-") || input.EndsWith("-"))
132 throw new IDNAException(IDNAException.CONTAINS_HYPHEN);
136 // Step 4: If all code points are inside 0..0x7f, skip to step 8
138 nonASCII = false;
140 for (int i = 0; i < input.Length; i++)
142 int c = input[i];
143 if (c > 0x7f)
145 nonASCII = true;
146 break;
150 System.String output = input;
152 if (nonASCII)
155 // Step 5: Verify that the sequence does not begin with the ACE prefix.
157 if (input.StartsWith(ACE_PREFIX))
159 throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX);
162 // Step 6: Punycode
166 output = Punycode.encode(input);
168 catch (PunycodeException e)
170 // TODO
171 throw new IDNAException(e);
174 // Step 7: Prepend the ACE prefix.
176 output = ACE_PREFIX + output;
179 // Step 8: Check that the length is inside 1..63.
181 if (output.Length < 1 || output.Length > 63)
183 throw new IDNAException(IDNAException.TOO_LONG);
186 return output;
189 /// <summary> Converts an ASCII-encoded string to Unicode. Unassigned
190 /// characters are not allowed and STD3 hostnames are enforced. Input
191 /// may be domain name containing dots.
192 /// *
193 /// </summary>
194 /// <param name="input">ASCII input string.
195 /// </param>
196 /// <returns> Unicode string.
197 ///
198 /// </returns>
199 public static System.String toUnicode(System.String input)
201 System.Text.StringBuilder o = new System.Text.StringBuilder();
202 System.Text.StringBuilder h = new System.Text.StringBuilder();
204 for (int i = 0; i < input.Length; i++)
206 char c = input[i];
207 if (c == '.' || c == '\u3002' || c == '\uff0e' || c == '\uff61')
209 o.Append(toUnicode(h.ToString(), false, true));
210 o.Append(c);
211 h = new System.Text.StringBuilder();
213 else
215 h.Append(c);
218 o.Append(toUnicode(h.ToString(), false, true));
219 return o.ToString();
222 /// <summary> Converts an ASCII-encoded string to Unicode.
223 /// *
224 /// </summary>
225 /// <param name="input">ASCII input string.
226 /// </param>
227 /// <param name="allowUnassigned">Allow unassigned Unicode characters.
228 /// </param>
229 /// <param name="useSTD3ASCIIRules">Check that the output conforms to STD3.
230 /// </param>
231 /// <returns> Unicode string.
232 ///
233 /// </returns>
234 public static System.String toUnicode(System.String input, bool allowUnassigned, bool useSTD3ASCIIRules)
236 System.String original = input;
237 bool nonASCII = false;
239 // Step 1: If all code points are inside 0..0x7f, skip to step 3.
241 for (int i = 0; i < input.Length; i++)
243 int c = input[i];
244 if (c > 0x7f)
246 nonASCII = true;
247 break;
251 // Step 2: Perform the Nameprep operation.
253 if (nonASCII)
257 input = Stringprep.nameprep(input, allowUnassigned);
259 catch (StringprepException e)
261 // ToUnicode never fails!
262 return original;
266 // Step 3: Verify the sequence starts with the ACE prefix.
268 if (!input.StartsWith(ACE_PREFIX))
270 // ToUnicode never fails!
271 return original;
274 System.String stored = input;
276 // Step 4: Remove the ACE prefix.
278 input = input.Substring(ACE_PREFIX.Length);
280 // Step 5: Decode using punycode
282 System.String output;
286 output = Punycode.decode(input);
288 catch (PunycodeException e)
290 // ToUnicode never fails!
291 return original;
294 // Step 6: Apply toASCII
296 System.String ascii;
300 ascii = toASCII(output, allowUnassigned, useSTD3ASCIIRules);
302 catch (IDNAException e)
304 // ToUnicode never fails!
305 return original;
308 // Step 7: Compare case-insensitively.
310 if (!ascii.ToUpper().Equals(stored.ToUpper()))
312 // ToUnicode never fails!
313 return original;
316 // Step 8: Return the result.
318 return output;