1 /// <summary> Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3 /// Author: Alexander Gnauck AG-Software
5 /// This file is part of GNU Libidn.
7 /// This program is free software; you can redistribute it and/or
8 /// modify it under the terms of the GNU General Public License as
9 /// published by the Free Software Foundation; either version 2 of the
10 /// License, or (at your option) any later version.
12 /// This program is distributed in the hope that it will be useful,
13 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
14 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 /// General Public License for more details.
17 /// You should have received a copy of the GNU General Public License
18 /// along with this program; if not, write to the Free Software
19 /// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 namespace gnu
.inet
.encoding
30 /// <summary> Applies NFKC normalization to a string.
33 /// <param name="in">The string to normalize.
35 /// <returns> An NFKC normalized string.
38 public static System
.String
normalizeNFKC(System
.String sbIn
)
40 System
.Text
.StringBuilder sbOut
= new System
.Text
.StringBuilder();
42 for (int i
= 0; i
< sbIn
.Length
; i
++)
46 // In Unicode 3.0, Hangul was defined as the block from U+AC00
47 // to U+D7A3, however, since Unicode 3.2 the block extends until
48 // U+D7AF. The decomposeHangul function only decomposes until
49 // U+D7A3. Should this be changed?
50 if (code
>= 0xAC00 && code
<= 0xD7AF)
52 sbOut
.Append(decomposeHangul(code
));
56 int index
= decomposeIndex(code
);
63 sbOut
.Append(DecompositionMappings
.m
[index
]);
68 // Bring the stringbuffer into canonical order.
69 canonicalOrdering(sbOut
);
71 // Do the canonical composition.
75 for (int i
= 0; i
< sbOut
.Length
; i
++)
77 int cc
= combiningClass(sbOut
[i
]);
79 if (i
> 0 && (last_cc
== 0 || last_cc
!= cc
))
81 // Try to combine characters
82 char a
= sbOut
[last_start
];
85 int c
= compose(a
, b
);
89 sbOut
[last_start
] = (char) c
;
90 //sbOut.deleteCharAt(i);
100 last_cc
= combiningClass(sbOut
[i
- 1]);
114 return sbOut
.ToString();
118 /// <summary> Returns the index inside the decomposition table, implemented
119 /// using a binary search.
122 /// <param name="c">Character to look up.
124 /// <returns> Index if found, -1 otherwise.
127 internal static int decomposeIndex(char c
)
130 int end
= DecompositionKeys
.k
.Length
/ 2;
134 int half
= (start
+ end
) / 2;
135 int code
= DecompositionKeys
.k
[half
* 2];
139 return DecompositionKeys
.k
[half
* 2 + 1];
143 // Character not found
157 /// <summary> Returns the combining class of a given character.
160 /// <param name="c">The character.
162 /// <returns> The combining class.
165 internal static int combiningClass(char c
)
170 int i
= CombiningClass
.i
[h
];
173 return CombiningClass
.c
[i
][l
];
181 /// <summary> Rearranges characters in a stringbuffer in order to respect the
182 /// canonical ordering properties.
185 /// <param name="The">StringBuffer to rearrange.
188 internal static void canonicalOrdering(System
.Text
.StringBuilder sbIn
)
190 bool isOrdered
= false;
200 lastCC
= combiningClass(sbIn
[0]);
202 for (int i
= 0; i
< sbIn
.Length
- 1; i
++)
204 int nextCC
= combiningClass(sbIn
[i
+ 1]);
205 if (nextCC
!= 0 && lastCC
> nextCC
)
207 for (int j
= i
+ 1; j
> 0; j
--)
209 if (combiningClass(sbIn
[j
- 1]) <= nextCC
)
214 sbIn
[j
] = sbIn
[j
- 1];
225 /// <summary> Returns the index inside the composition table.
228 /// <param name="a">Character to look up.
230 /// <returns> Index if found, -1 otherwise.
233 internal static int composeIndex(char a
)
235 if (a
>> 8 >= Composition
.composePage
.Length
)
239 int ap
= Composition
.composePage
[a
>> 8];
244 return Composition
.composeData
[ap
][a
& 0xff];
247 /// <summary> Tries to compose two characters canonically.
250 /// <param name="a">First character.
252 /// <param name="b">Second character.
254 /// <returns> The composed character or -1 if no composition could be
258 internal static int compose(char a
, char b
)
260 int h
= composeHangul(a
, b
);
266 int ai
= composeIndex(a
);
268 if (ai
>= Composition
.singleFirstStart
&& ai
< Composition
.singleSecondStart
)
270 if (b
== Composition
.singleFirst
[ai
- Composition
.singleFirstStart
][0])
272 return Composition
.singleFirst
[ai
- Composition
.singleFirstStart
][1];
280 int bi
= composeIndex(b
);
282 if (bi
>= Composition
.singleSecondStart
)
284 if (a
== Composition
.singleSecond
[bi
- Composition
.singleSecondStart
][0])
286 return Composition
.singleSecond
[bi
- Composition
.singleSecondStart
][1];
294 if (ai
>= 0 && ai
< Composition
.multiSecondStart
&& bi
>= Composition
.multiSecondStart
&& bi
< Composition
.singleFirstStart
)
296 char[] f
= Composition
.multiFirst
[ai
];
298 if (bi
- Composition
.multiSecondStart
< f
.Length
)
300 char r
= f
[bi
- Composition
.multiSecondStart
];
316 /// <summary> Entire hangul code copied from:
317 /// http://www.unicode.org/unicode/reports/tr15/
319 /// Several hangul specific constants
321 internal const int SBase
= 0xAC00;
322 internal const int LBase
= 0x1100;
323 internal const int VBase
= 0x1161;
324 internal const int TBase
= 0x11A7;
325 internal const int LCount
= 19;
326 internal const int VCount
= 21;
327 internal const int TCount
= 28;
329 internal static readonly int NCount
= VCount
* TCount
;
331 internal static readonly int SCount
= LCount
* NCount
;
333 /// <summary> Decomposes a hangul character.
336 /// <param name="s">A character to decompose.
338 /// <returns> A string containing the hangul decomposition of the input
339 /// character. If no hangul decomposition can be found, a string
340 /// containing the character itself is returned.
343 internal static System
.String
decomposeHangul(char s
)
345 int SIndex
= s
- SBase
;
346 if (SIndex
< 0 || SIndex
>= SCount
)
350 System
.Text
.StringBuilder result
= new System
.Text
.StringBuilder();
351 int L
= LBase
+ SIndex
/ NCount
;
352 int V
= VBase
+ (SIndex
% NCount
) / TCount
;
353 int T
= TBase
+ SIndex
% TCount
;
354 result
.Append((char) L
);
355 result
.Append((char) V
);
357 result
.Append((char) T
);
358 return result
.ToString();
361 /// <summary> Composes two hangul characters.
364 /// <param name="a">First character.
366 /// <param name="b">Second character.
368 /// <returns> Returns the composed character or -1 if the two
369 /// characters cannot be composed.
372 internal static int composeHangul(char a
, char b
)
374 // 1. check to see if two current characters are L and V
375 int LIndex
= a
- LBase
;
376 if (0 <= LIndex
&& LIndex
< LCount
)
378 int VIndex
= b
- VBase
;
379 if (0 <= VIndex
&& VIndex
< VCount
)
381 // make syllable of form LV
382 return SBase
+ (LIndex
* VCount
+ VIndex
) * TCount
;
386 // 2. check to see if two current characters are LV and T
387 int SIndex
= a
- SBase
;
388 if (0 <= SIndex
&& SIndex
< SCount
&& (SIndex
% TCount
) == 0)
390 int TIndex
= b
- TBase
;
391 if (0 <= TIndex
&& TIndex
<= TCount
)
393 // make syllable of form LVT