2 /// Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 /// Author: Alexander Gnauck AG-Software
6 /// This file is part of GNU Libidn.
8 /// This program is free software; you can redistribute it and/or
9 /// modify it under the terms of the GNU General Public License as
10 /// published by the Free Software Foundation; either version 2 of the
11 /// License, or (at your option) any later version.
13 /// This program is distributed in the hope that it will be useful,
14 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
15 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 /// General Public License for more details.
18 /// You should have received a copy of the GNU General Public License
19 /// along with this program; if not, write to the Free Software
20 /// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 namespace gnu
.inet
.encoding
31 /// <summary> Applies NFKC normalization to a string.
34 /// <param name="in">The string to normalize.
36 /// <returns> An NFKC normalized string.
39 public static System
.String
normalizeNFKC(System
.String sbIn
)
41 System
.Text
.StringBuilder sbOut
= new System
.Text
.StringBuilder();
43 for (int i
= 0; i
< sbIn
.Length
; i
++)
47 // In Unicode 3.0, Hangul was defined as the block from U+AC00
48 // to U+D7A3, however, since Unicode 3.2 the block extends until
49 // U+D7AF. The decomposeHangul function only decomposes until
50 // U+D7A3. Should this be changed?
51 if (code
>= 0xAC00 && code
<= 0xD7AF)
53 sbOut
.Append(decomposeHangul(code
));
57 int index
= decomposeIndex(code
);
64 sbOut
.Append(DecompositionMappings
.m
[index
]);
69 // Bring the stringbuffer into canonical order.
70 canonicalOrdering(sbOut
);
72 // Do the canonical composition.
76 for (int i
= 0; i
< sbOut
.Length
; i
++)
78 int cc
= combiningClass(sbOut
[i
]);
80 if (i
> 0 && (last_cc
== 0 || last_cc
!= cc
))
82 // Try to combine characters
83 char a
= sbOut
[last_start
];
86 int c
= compose(a
, b
);
90 sbOut
[last_start
] = (char) c
;
91 //sbOut.deleteCharAt(i);
101 last_cc
= combiningClass(sbOut
[i
- 1]);
115 return sbOut
.ToString();
119 /// <summary> Returns the index inside the decomposition table, implemented
120 /// using a binary search.
123 /// <param name="c">Character to look up.
125 /// <returns> Index if found, -1 otherwise.
128 internal static int decomposeIndex(char c
)
131 int end
= DecompositionKeys
.k
.Length
/ 2;
135 int half
= (start
+ end
) / 2;
136 int code
= DecompositionKeys
.k
[half
* 2];
140 return DecompositionKeys
.k
[half
* 2 + 1];
144 // Character not found
158 /// <summary> Returns the combining class of a given character.
161 /// <param name="c">The character.
163 /// <returns> The combining class.
166 internal static int combiningClass(char c
)
171 int i
= CombiningClass
.i
[h
];
174 return CombiningClass
.c
[i
][l
];
182 /// <summary> Rearranges characters in a stringbuffer in order to respect the
183 /// canonical ordering properties.
186 /// <param name="The">StringBuffer to rearrange.
189 internal static void canonicalOrdering(System
.Text
.StringBuilder sbIn
)
191 bool isOrdered
= false;
201 lastCC
= combiningClass(sbIn
[0]);
203 for (int i
= 0; i
< sbIn
.Length
- 1; i
++)
205 int nextCC
= combiningClass(sbIn
[i
+ 1]);
206 if (nextCC
!= 0 && lastCC
> nextCC
)
208 for (int j
= i
+ 1; j
> 0; j
--)
210 if (combiningClass(sbIn
[j
- 1]) <= nextCC
)
215 sbIn
[j
] = sbIn
[j
- 1];
226 /// <summary> Returns the index inside the composition table.
229 /// <param name="a">Character to look up.
231 /// <returns> Index if found, -1 otherwise.
234 internal static int composeIndex(char a
)
236 if (a
>> 8 >= Composition
.composePage
.Length
)
240 int ap
= Composition
.composePage
[a
>> 8];
245 return Composition
.composeData
[ap
][a
& 0xff];
248 /// <summary> Tries to compose two characters canonically.
251 /// <param name="a">First character.
253 /// <param name="b">Second character.
255 /// <returns> The composed character or -1 if no composition could be
259 internal static int compose(char a
, char b
)
261 int h
= composeHangul(a
, b
);
267 int ai
= composeIndex(a
);
269 if (ai
>= Composition
.singleFirstStart
&& ai
< Composition
.singleSecondStart
)
271 if (b
== Composition
.singleFirst
[ai
- Composition
.singleFirstStart
][0])
273 return Composition
.singleFirst
[ai
- Composition
.singleFirstStart
][1];
281 int bi
= composeIndex(b
);
283 if (bi
>= Composition
.singleSecondStart
)
285 if (a
== Composition
.singleSecond
[bi
- Composition
.singleSecondStart
][0])
287 return Composition
.singleSecond
[bi
- Composition
.singleSecondStart
][1];
295 if (ai
>= 0 && ai
< Composition
.multiSecondStart
&& bi
>= Composition
.multiSecondStart
&& bi
< Composition
.singleFirstStart
)
297 char[] f
= Composition
.multiFirst
[ai
];
299 if (bi
- Composition
.multiSecondStart
< f
.Length
)
301 char r
= f
[bi
- Composition
.multiSecondStart
];
317 /// <summary> Entire hangul code copied from:
318 /// http://www.unicode.org/unicode/reports/tr15/
320 /// Several hangul specific constants
322 internal const int SBase
= 0xAC00;
323 internal const int LBase
= 0x1100;
324 internal const int VBase
= 0x1161;
325 internal const int TBase
= 0x11A7;
326 internal const int LCount
= 19;
327 internal const int VCount
= 21;
328 internal const int TCount
= 28;
330 internal static readonly int NCount
= VCount
* TCount
;
332 internal static readonly int SCount
= LCount
* NCount
;
334 /// <summary> Decomposes a hangul character.
337 /// <param name="s">A character to decompose.
339 /// <returns> A string containing the hangul decomposition of the input
340 /// character. If no hangul decomposition can be found, a string
341 /// containing the character itself is returned.
344 internal static System
.String
decomposeHangul(char s
)
346 int SIndex
= s
- SBase
;
347 if (SIndex
< 0 || SIndex
>= SCount
)
351 System
.Text
.StringBuilder result
= new System
.Text
.StringBuilder();
352 int L
= LBase
+ SIndex
/ NCount
;
353 int V
= VBase
+ (SIndex
% NCount
) / TCount
;
354 int T
= TBase
+ SIndex
% TCount
;
355 result
.Append((char) L
);
356 result
.Append((char) V
);
358 result
.Append((char) T
);
359 return result
.ToString();
362 /// <summary> Composes two hangul characters.
365 /// <param name="a">First character.
367 /// <param name="b">Second character.
369 /// <returns> Returns the composed character or -1 if the two
370 /// characters cannot be composed.
373 internal static int composeHangul(char a
, char b
)
375 // 1. check to see if two current characters are L and V
376 int LIndex
= a
- LBase
;
377 if (0 <= LIndex
&& LIndex
< LCount
)
379 int VIndex
= b
- VBase
;
380 if (0 <= VIndex
&& VIndex
< VCount
)
382 // make syllable of form LV
383 return SBase
+ (LIndex
* VCount
+ VIndex
) * TCount
;
387 // 2. check to see if two current characters are LV and T
388 int SIndex
= a
- SBase
;
389 if (0 <= SIndex
&& SIndex
< SCount
&& (SIndex
% TCount
) == 0)
391 int TIndex
= b
- TBase
;
392 if (0 <= TIndex
&& TIndex
<= TCount
)
394 // make syllable of form LVT