2 /// Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 /// Author: Alexander Gnauck AG-Software
6 /// This file is part of GNU Libidn.
8 /// This program is free software; you can redistribute it and/or
9 /// modify it under the terms of the GNU General Public License as
10 /// published by the Free Software Foundation; either version 2 of the
11 /// License, or (at your option) any later version.
13 /// This program is distributed in the hope that it will be useful,
14 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
15 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 /// General Public License for more details.
18 /// You should have received a copy of the GNU General Public License
19 /// along with this program; if not, write to the Free Software
20 /// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 namespace gnu
.inet
.encoding
31 public const System
.String ACE_PREFIX
= "xn--";
33 /// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490
34 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII
35 /// rules are enforced. The input string may be a domain name
39 /// <param name="input">Unicode string.
41 /// <returns> Encoded string.
44 public static System
.String
toASCII(System
.String input
)
46 System
.Text
.StringBuilder o
= new System
.Text
.StringBuilder();
47 System
.Text
.StringBuilder h
= new System
.Text
.StringBuilder();
49 for (int i
= 0; i
< input
.Length
; i
++)
52 if (c
== '.' || c
== '\u3002' || c
== '\uff0e' || c
== '\uff61')
54 o
.Append(toASCII(h
.ToString(), false, true));
56 h
= new System
.Text
.StringBuilder();
63 o
.Append(toASCII(h
.ToString(), false, true));
67 /// <summary> Converts a Unicode string to ASCII using the procedure in RFC3490
68 /// section 4.1. Unassigned characters are not allowed and STD3 ASCII
69 /// rules are enforced.
72 /// <param name="input">Unicode string.
74 /// <param name="allowUnassigned">Unassigned characters, allowed or not?
76 /// <param name="useSTD3ASCIIRules">STD3 ASCII rules, enforced or not?
78 /// <returns> Encoded string.
81 public static System
.String
toASCII(System
.String input
, bool allowUnassigned
, bool useSTD3ASCIIRules
)
83 // Step 1: Check if the string contains code points outside
84 // the ASCII range 0..0x7c.
86 bool nonASCII
= false;
88 for (int i
= 0; i
< input
.Length
; i
++)
98 // Step 2: Perform the nameprep operation.
104 input
= Stringprep
.nameprep(input
, allowUnassigned
);
106 catch (StringprepException e
)
109 throw new IDNAException(e
);
113 // Step 3: - Verify the absence of non-LDH ASCII code points
114 // (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
116 // - Verify the absence of leading and trailing
119 if (useSTD3ASCIIRules
)
121 for (int i
= 0; i
< input
.Length
; i
++)
124 if ((c
<= 0x2c) || (c
>= 0x2e && c
<= 0x2f) || (c
>= 0x3a && c
<= 0x40) || (c
>= 0x5b && c
<= 0x60) || (c
>= 0x7b && c
<= 0x7f))
126 throw new IDNAException(IDNAException
.CONTAINS_NON_LDH
);
130 if (input
.StartsWith("-") || input
.EndsWith("-"))
132 throw new IDNAException(IDNAException
.CONTAINS_HYPHEN
);
136 // Step 4: If all code points are inside 0..0x7f, skip to step 8
140 for (int i
= 0; i
< input
.Length
; i
++)
150 System
.String output
= input
;
155 // Step 5: Verify that the sequence does not begin with the ACE prefix.
157 if (input
.StartsWith(ACE_PREFIX
))
159 throw new IDNAException(IDNAException
.CONTAINS_ACE_PREFIX
);
166 output
= Punycode
.encode(input
);
168 catch (PunycodeException e
)
171 throw new IDNAException(e
);
174 // Step 7: Prepend the ACE prefix.
176 output
= ACE_PREFIX
+ output
;
179 // Step 8: Check that the length is inside 1..63.
181 if (output
.Length
< 1 || output
.Length
> 63)
183 throw new IDNAException(IDNAException
.TOO_LONG
);
189 /// <summary> Converts an ASCII-encoded string to Unicode. Unassigned
190 /// characters are not allowed and STD3 hostnames are enforced. Input
191 /// may be domain name containing dots.
194 /// <param name="input">ASCII input string.
196 /// <returns> Unicode string.
199 public static System
.String
toUnicode(System
.String input
)
201 System
.Text
.StringBuilder o
= new System
.Text
.StringBuilder();
202 System
.Text
.StringBuilder h
= new System
.Text
.StringBuilder();
204 for (int i
= 0; i
< input
.Length
; i
++)
207 if (c
== '.' || c
== '\u3002' || c
== '\uff0e' || c
== '\uff61')
209 o
.Append(toUnicode(h
.ToString(), false, true));
211 h
= new System
.Text
.StringBuilder();
218 o
.Append(toUnicode(h
.ToString(), false, true));
222 /// <summary> Converts an ASCII-encoded string to Unicode.
225 /// <param name="input">ASCII input string.
227 /// <param name="allowUnassigned">Allow unassigned Unicode characters.
229 /// <param name="useSTD3ASCIIRules">Check that the output conforms to STD3.
231 /// <returns> Unicode string.
234 public static System
.String
toUnicode(System
.String input
, bool allowUnassigned
, bool useSTD3ASCIIRules
)
236 System
.String original
= input
;
237 bool nonASCII
= false;
239 // Step 1: If all code points are inside 0..0x7f, skip to step 3.
241 for (int i
= 0; i
< input
.Length
; i
++)
251 // Step 2: Perform the Nameprep operation.
257 input
= Stringprep
.nameprep(input
, allowUnassigned
);
259 catch (StringprepException e
)
261 // ToUnicode never fails!
266 // Step 3: Verify the sequence starts with the ACE prefix.
268 if (!input
.StartsWith(ACE_PREFIX
))
270 // ToUnicode never fails!
274 System
.String stored
= input
;
276 // Step 4: Remove the ACE prefix.
278 input
= input
.Substring(ACE_PREFIX
.Length
);
280 // Step 5: Decode using punycode
282 System
.String output
;
286 output
= Punycode
.decode(input
);
288 catch (PunycodeException e
)
290 // ToUnicode never fails!
294 // Step 6: Apply toASCII
300 ascii
= toASCII(output
, allowUnassigned
, useSTD3ASCIIRules
);
302 catch (IDNAException e
)
304 // ToUnicode never fails!
308 // Step 7: Compare case-insensitively.
310 if (!ascii
.ToUpper().Equals(stored
.ToUpper()))
312 // ToUnicode never fails!
316 // Step 8: Return the result.