2 using System
.Globalization
;
4 using System
.Runtime
.CompilerServices
;
6 using NUtil
= Mono
.Globalization
.Unicode
.NormalizationTableUtil
;
8 namespace Mono
.Globalization
.Unicode
10 internal enum NormalizationCheck
{
16 internal unsafe class Normalization
18 public const int NoNfd
= 1;
19 public const int NoNfkd
= 2;
20 public const int NoNfc
= 4;
21 public const int MaybeNfc
= 8;
22 public const int NoNfkc
= 16;
23 public const int MaybeNfkc
= 32;
24 public const int FullCompositionExclusion
= 64;
25 public const int IsUnsafe
= 128;
26 // public const int ExpandOnNfd = 256;
27 // public const int ExpandOnNfc = 512;
28 // public const int ExpandOnNfkd = 1024;
29 // public const int ExpandOnNfkc = 2048;
31 static uint PropValue (int cp
)
33 return props
[NUtil
.PropIdx (cp
)];
36 static int CharMapIdx (int cp
)
38 return charMapIndex
[NUtil
.MapIdx (cp
)];
41 static int GetNormalizedStringLength (int ch
)
43 int start
= charMapIndex
[NUtil
.MapIdx (ch
)];
45 while (mappedChars
[i
] != 0)
50 static byte GetCombiningClass (int c
)
52 return combiningClass
[NUtil
.Combining
.ToIndex (c
)];
55 static int GetPrimaryCompositeFromMapIndex (int src
)
57 return mapIdxToComposite
[NUtil
.Composite
.ToIndex (src
)];
60 static int GetPrimaryCompositeHelperIndex (int cp
)
62 return helperIndex
[NUtil
.Helper
.ToIndex (cp
)];
65 static int GetPrimaryCompositeCharIndex (object chars
, int start
)
67 string s
= chars
as string;
68 StringBuilder sb
= chars
as StringBuilder
;
69 char startCh
= s
!= null ? s
[start
] : sb
[start
];
70 int charsLength
= sb
!= null ? sb
.Length
: s
.Length
;
72 int idx
= GetPrimaryCompositeHelperIndex ((int) startCh
);
75 while (mappedChars
[idx
] == startCh
) {
77 int combiningClass
= 0;
78 for (int i
= 1, j
= 1; ; i
++, j
++) {
79 prevCB
= combiningClass
;
81 if (mappedChars
[idx
+ i
] == 0)
84 if (start
+ i
>= charsLength
)
85 return 0; // didn't match
87 // handle blocked characters here.
94 combiningClass
= GetCombiningClass (curCh
);
95 if (mappedChars
[idx
+ i
] == curCh
) {
99 if (combiningClass
< prevCB
) // blocked. Give up this map entry.
101 if (++j
+ start
>= charsLength
|| combiningClass
== 0)
106 continue; // check next character in the current map entry string.
107 if (prevCB
< combiningClass
) {
109 if (mappedChars
[idx
+ i
] == curCh
)
111 //if (mappedChars [idx + i] > curCh)
112 // return 0; // no match
114 // otherwise move idx to next item
115 while (mappedChars
[i
] != 0)
121 // reached to end of entries
125 private static string Compose (string source
, int checkType
)
127 StringBuilder sb
= null;
128 Decompose (source
, ref sb
, checkType
);
130 sb
= Combine (source
, 0, checkType
);
132 Combine (sb
, 0, checkType
);
134 return sb
!= null ? sb
.ToString () : source
;
137 private static StringBuilder
Combine (string source
, int start
, int checkType
)
139 for (int i
= 0; i
< source
.Length
; i
++) {
140 if (QuickCheck (source
[i
], checkType
) == NormalizationCheck
.Yes
)
142 StringBuilder sb
= new StringBuilder (source
.Length
+ source
.Length
/ 10);
144 Combine (sb
, i
, checkType
);
151 private static bool CanBePrimaryComposite (int i)
153 if (i >= 0x3400 && i <= 0x9FBB)
154 return GetPrimaryCompositeHelperIndex (i) != 0;
155 return (PropValue (i) & IsUnsafe) != 0;
158 private static void Combine (StringBuilder sb
, int start
, int checkType
)
160 for (int i
= start
; i
< sb
.Length
; i
++) {
161 if (QuickCheck (sb
[i
], checkType
) == NormalizationCheck
.Yes
)
165 // FIXME: It should check "blocked" too
166 for (;i
> 0; i
--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
167 if (GetCombiningClass ((int) sb
[i
]) == 0)
170 int idx
= 0; // index to mappedChars
171 for (; i
< cur
; i
++) {
172 idx
= GetPrimaryCompositeMapIndex (sb
, (int) sb
[i
], i
);
181 int prim
= GetPrimaryCompositeFromMapIndex (idx
);
182 int len
= GetNormalizedStringLength (prim
);
183 if (prim
== 0 || len
== 0)
184 throw new SystemException ("Internal error: should not happen. Input: " + sb
);
186 sb
.Insert (i
++, (char) prim
); // always single character
188 // handle blocked characters here.
189 while (removed
< len
) {
190 if (sb
[i
] == mappedChars
[idx
+ removed
]) {
193 // otherwise, skip it.
202 static int GetPrimaryCompositeMapIndex (object o
, int cur
, int bufferPos
)
204 if ((PropValue (cur
) & FullCompositionExclusion
) != 0)
206 if (GetCombiningClass (cur
) != 0)
207 return 0; // not a starter
208 return GetPrimaryCompositeCharIndex (o
, bufferPos
);
211 static string Decompose (string source
, int checkType
)
213 StringBuilder sb
= null;
214 Decompose (source
, ref sb
, checkType
);
215 return sb
!= null ? sb
.ToString () : source
;
218 static void Decompose (string source
,
219 ref StringBuilder sb
, int checkType
)
223 for (int i
= 0; i
< source
.Length
; i
++)
224 if (QuickCheck (source
[i
], checkType
) == NormalizationCheck
.No
)
225 DecomposeChar (ref sb
, ref buf
, source
,
228 sb
.Append (source
, start
, source
.Length
- start
);
229 ReorderCanonical (source
, ref sb
, 1);
232 static void ReorderCanonical (string src
, ref StringBuilder sb
, int start
)
235 // check only with src.
236 for (int i
= 1; i
< src
.Length
; i
++) {
237 int level
= GetCombiningClass (src
[i
]);
240 if (GetCombiningClass (src
[i
- 1]) > level
) {
241 sb
= new StringBuilder (src
.Length
);
242 sb
.Append (src
, 0, src
.Length
);
243 ReorderCanonical (src
, ref sb
, i
);
249 // check only with sb
250 for (int i
= start
; i
< sb
.Length
; i
++) {
251 int level
= GetCombiningClass (sb
[i
]);
254 if (GetCombiningClass (sb
[i
- 1]) > level
) {
258 i
--; // apply recursively
263 static void DecomposeChar (ref StringBuilder sb
,
264 ref int [] buf
, string s
, int i
, ref int start
)
267 sb
= new StringBuilder (s
.Length
+ 100);
268 sb
.Append (s
, start
, i
- start
);
271 GetCanonical (s
[i
], buf
, 0);
272 for (int x
= 0; ; x
++) {
275 if (buf
[x
] < char.MaxValue
)
276 sb
.Append ((char) buf
[x
]);
278 sb
.Append ((char) (buf
[x
] >> 10 + 0xD800));
279 sb
.Append ((char) ((buf
[x
] & 0x0FFF) + 0xDC00));
285 public static NormalizationCheck
QuickCheck (char c
, int type
)
290 v
= PropValue ((int) c
);
291 return (v
& NoNfc
) == 0 ?
292 (v
& MaybeNfc
) == 0 ?
293 NormalizationCheck
.Yes
:
294 NormalizationCheck
.Maybe
:
295 NormalizationCheck
.No
;
297 if ('\uAC00' <= c
&& c
<= '\uD7A3')
298 return NormalizationCheck
.No
;
299 return (PropValue ((int) c
) & NoNfd
) != 0 ?
300 NormalizationCheck
.No
: NormalizationCheck
.Yes
;
302 v
= PropValue ((int) c
);
303 return (v
& NoNfkc
) != 0 ? NormalizationCheck
.No
:
304 (v
& MaybeNfkc
) != 0 ?
305 NormalizationCheck
.Maybe
:
306 NormalizationCheck
.Yes
;
308 if ('\uAC00' <= c
&& c
<= '\uD7A3')
309 return NormalizationCheck
.No
;
310 return (PropValue ((int) c
) & NoNfkd
) != 0 ?
311 NormalizationCheck
.No
: NormalizationCheck
.Yes
;
315 /* for now we don't use FC_NFKC closure
316 public static bool IsMultiForm (char c)
318 return (PropValue ((int) c) & 0xF0000000) != 0;
321 public static char SingleForm (char c)
323 uint v = PropValue ((int) c);
324 int idx = (int) ((v & 0x7FFF0000) >> 16);
325 return (char) singleNorm [idx];
328 public static void MultiForm (char c, char [] buf, int index)
330 // FIXME: handle surrogate
331 uint v = PropValue ((int) c);
332 int midx = (int) ((v & 0x7FFF0000) >> 16);
333 buf [index] = (char) multiNorm [midx];
334 buf [index + 1] = (char) multiNorm [midx + 1];
335 buf [index + 2] = (char) multiNorm [midx + 2];
336 buf [index + 3] = (char) multiNorm [midx + 3];
337 if (buf [index + 3] != 0)
338 buf [index + 4] = (char) 0; // zero termination
342 const int HangulSBase
= 0xAC00, HangulLBase
= 0x1100,
343 HangulVBase
= 0x1161, HangulTBase
= 0x11A7,
344 HangulLCount
= 19, HangulVCount
= 21, HangulTCount
= 28,
345 HangulNCount
= HangulVCount
* HangulTCount
, // 588
346 HangulSCount
= HangulLCount
* HangulNCount
; // 11172
348 private static bool GetCanonicalHangul (int s
, int [] buf
, int bufIdx
)
350 int idx
= s
- HangulSBase
;
351 if (idx
< 0 || idx
>= HangulSCount
) {
355 int L
= HangulLBase
+ idx
/ HangulNCount
;
356 int V
= HangulVBase
+ (idx
% HangulNCount
) / HangulTCount
;
357 int T
= HangulTBase
+ idx
% HangulTCount
;
361 if (T
!= HangulTBase
) {
364 buf
[bufIdx
] = (char) 0;
368 public static void GetCanonical (int c
, int [] buf
, int bufIdx
)
370 if (!GetCanonicalHangul (c
, buf
, bufIdx
)) {
371 for (int i
= CharMapIdx (c
); mappedChars
[i
] != 0; i
++)
372 buf
[bufIdx
++] = mappedChars
[i
];
373 buf
[bufIdx
] = (char) 0;
377 public static bool IsNormalized (string source
, int type
)
380 for (int i
= 0; i
< source
.Length
; i
++) {
381 int cc
= GetCombiningClass (source
[i
]);
382 if (cc
!= 0 && cc
< prevCC
)
385 switch (QuickCheck (source
[i
], type
)) {
386 case NormalizationCheck
.Yes
:
388 case NormalizationCheck
.No
:
390 case NormalizationCheck
.Maybe
:
391 // for those forms with composition, it cannot be checked here
395 return source
== Normalize (source
, type
);
399 // partly copied from Combine()
401 for (;i
> 0; i
--) // this loop does not check sb[0], but regardless of the condition below it should not go under 0.
402 if (GetCombiningClass ((int) source
[i
]) == 0)
405 // Now i is the "starter"
406 for (; i
< cur
; i
++) {
407 if (GetPrimaryCompositeCharIndex (source
, i
) != 0)
416 public static string Normalize (string source
, int type
)
421 return Compose (source
, type
);
424 return Decompose (source
, type
);
429 static int* mappedChars
;
430 static short* charMapIndex
;
431 static short* helperIndex
;
432 static ushort* mapIdxToComposite
;
433 static byte* combiningClass
;
437 public static readonly bool IsReady
= true; // always
439 static Normalization ()
441 fixed (byte* tmp
= propsArr
) {
444 fixed (int* tmp
= mappedCharsArr
) {
447 fixed (short* tmp
= charMapIndexArr
) {
450 fixed (short* tmp
= helperIndexArr
) {
453 fixed (ushort* tmp
= mapIdxToCompositeArr
) {
454 mapIdxToComposite
= tmp
;
456 fixed (byte* tmp
= combiningClassArr
) {
457 combiningClass
= tmp
;
462 static object forLock
= new object ();
463 public static readonly bool isReady
;
465 public static bool IsReady
{
466 get { return isReady; }
469 [MethodImpl (MethodImplOptions
.InternalCall
)]
470 static extern void load_normalization_resource (
471 out IntPtr props
, out IntPtr mappedChars
,
472 out IntPtr charMapIndex
, out IntPtr helperIndex
,
473 out IntPtr mapIdxToComposite
, out IntPtr combiningClass
);
475 static Normalization ()
477 IntPtr p1
, p2
, p3
, p4
, p5
, p6
;
479 load_normalization_resource (out p1
, out p2
, out p3
, out p4
, out p5
, out p6
);
481 mappedChars
= (int*) p2
;
482 charMapIndex
= (short*) p3
;
483 helperIndex
= (short*) p4
;
484 mapIdxToComposite
= (ushort*) p5
;
485 combiningClass
= (byte*) p6
;
495 // autogenerated code or icall to fill array runs here