2009-12-02 Jb Evain <jbevain@novell.com>
[mcs.git] / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
blob6c187cd9d2ea33a604921c3943a3ef00c03c4107
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 // Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 //
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 //
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
31 // remappings.
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
68 using System;
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
80 // this environment variable is for debugging quick check.
81 #pragma warning disable 169, 414
82 static bool QuickCheckDisabled =
83 Environment.internalGetEnvironmentVariable (
84 "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 #pragma warning restore 169, 414
87 unsafe internal struct Context
89 public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
91 Option = opt;
92 AlwaysMatchFlags = alwaysMatchFlags;
93 NeverMatchFlags = neverMatchFlags;
94 Buffer1 = buffer1;
95 Buffer2 = buffer2;
96 PrevSortKey = prev1;
97 PrevCode = -1;
98 QuickCheckPossible = quickCheckPossible;
101 public readonly CompareOptions Option;
102 public readonly byte* NeverMatchFlags;
103 public readonly byte* AlwaysMatchFlags;
104 public byte* Buffer1;
105 public byte* Buffer2;
106 public int PrevCode;
107 public byte* PrevSortKey;
108 public readonly bool QuickCheckPossible;
110 public void ClearPrevInfo ()
112 PrevCode = -1;
113 PrevSortKey = null;
117 unsafe struct PreviousInfo
119 public int Code;
120 public byte* SortKey;
122 public PreviousInfo (bool dummy)
124 Code = -1;
125 SortKey = null;
129 struct Escape
131 public string Source;
132 public int Index;
133 public int Start;
134 public int End;
135 public int Optional;
138 static SimpleCollator invariant =
139 new SimpleCollator (CultureInfo.InvariantCulture);
141 readonly TextInfo textInfo; // for ToLower().
142 readonly bool frenchSort;
143 unsafe readonly byte* cjkCatTable;
144 unsafe readonly byte* cjkLv1Table;
145 readonly CodePointIndexer cjkIndexer;
146 unsafe readonly byte* cjkLv2Table;
147 readonly CodePointIndexer cjkLv2Indexer;
148 readonly int lcid;
149 readonly Contraction [] contractions;
150 readonly Level2Map [] level2Maps;
152 // This flag marks characters as "unsafe", where the character
153 // could be used as part of a contraction (whose length > 1).
154 readonly byte [] unsafeFlags;
156 const int UnsafeFlagLength = 0x300 / 8;
158 // readonly byte [] contractionFlags = new byte [16];
160 // This array is internally used inside IndexOf() to store
161 // "no need to check" ASCII characters.
163 // Now that it should be thread safe, this array is allocated
164 // at every time.
165 // byte [] neverMatchFlags = new byte [128 / 8];
167 #region .ctor() and split functions
169 public SimpleCollator (CultureInfo culture)
171 lcid = culture.LCID;
172 textInfo = culture.TextInfo;
174 unsafe {
175 SetCJKTable (culture, ref cjkIndexer,
176 ref cjkCatTable, ref cjkLv1Table,
177 ref cjkLv2Indexer, ref cjkLv2Table);
180 // Get tailoring info
181 TailoringInfo t = null;
182 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
183 t = Uni.GetTailoringInfo (ci.LCID);
184 if (t != null)
185 break;
187 if (t == null) // then use invariant
188 t = Uni.GetTailoringInfo (127);
190 frenchSort = t.FrenchSort;
191 Uni.BuildTailoringTables (culture, t, ref contractions,
192 ref level2Maps);
193 unsafeFlags = new byte [UnsafeFlagLength];
194 foreach (Contraction c in contractions) {
195 if (c.Source.Length > 1)
196 foreach (char ch in c.Source)
197 unsafeFlags [(int) ch / 8 ]
198 |= (byte) (1 << ((int) ch & 7));
199 // for (int i = 0; i < c.Source.Length; i++) {
200 // int ch = c.Source [i];
201 // if (ch > 127)
202 // continue;
203 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
204 // }
206 if (lcid != 127)
207 foreach (Contraction c in invariant.contractions) {
208 if (c.Source.Length > 1)
209 foreach (char ch in c.Source)
210 unsafeFlags [(int) ch / 8 ]
211 |= (byte) (1 << ((int) ch & 7));
212 // for (int i = 0; i < c.Source.Length; i++) {
213 // int ch = c.Source [i];
214 // if (ch > 127)
215 // continue;
216 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
217 // }
220 // FIXME: Since tailorings are mostly for latin
221 // (and in some cases Cyrillic) characters, it would
222 // be much better for performance to store "start
223 // indexes" for > 370 (culture-specific letters).
226 // dump tailoring table
227 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
228 culture.LCID, contractions.Length, level2Maps.Length);
229 foreach (Contraction c in contractions) {
230 foreach (char cc in c.Source)
231 Console.Write ("{0:X4} ", (int) cc);
232 Console.WriteLine (" -> '{0}'", c.Replacement);
237 unsafe private void SetCJKTable (
238 CultureInfo culture, ref CodePointIndexer cjkIndexer,
239 ref byte* catTable, ref byte* lv1Table,
240 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
242 string name = GetNeutralCulture (culture).Name;
244 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
245 ref lv1Table, ref lv2Indexer, ref lv2Table);
248 static CultureInfo GetNeutralCulture (CultureInfo info)
250 CultureInfo ret = info;
251 while (ret.Parent != null && ret.Parent.LCID != 127)
252 ret = ret.Parent;
253 return ret;
256 #endregion
258 unsafe byte Category (int cp)
260 if (cp < 0x3000 || cjkCatTable == null)
261 return Uni.Category (cp);
262 int idx = cjkIndexer.ToIndex (cp);
263 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
266 unsafe byte Level1 (int cp)
268 if (cp < 0x3000 || cjkLv1Table == null)
269 return Uni.Level1 (cp);
270 int idx = cjkIndexer.ToIndex (cp);
271 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
274 unsafe byte Level2 (int cp, ExtenderType ext)
276 if (ext == ExtenderType.Buggy)
277 return 5;
278 else if (ext == ExtenderType.Conditional)
279 return 0;
281 if (cp < 0x3000 || cjkLv2Table == null)
282 return Uni.Level2 (cp);
283 int idx = cjkLv2Indexer.ToIndex (cp);
284 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
285 if (ret != 0)
286 return ret;
287 ret = Uni.Level2 (cp);
288 if (level2Maps.Length == 0)
289 return ret;
290 for (int i = 0; i < level2Maps.Length; i++) {
291 if (level2Maps [i].Source == ret)
292 return level2Maps [i].Replace;
293 else if (level2Maps [i].Source > ret)
294 break;
296 return ret;
299 static bool IsHalfKana (int cp, COpt opt)
301 return (opt & COpt.IgnoreWidth) != 0 ||
302 Uni.IsHalfWidthKana ((char) cp);
305 Contraction GetContraction (string s, int start, int end)
307 // int si = s [start];
308 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
309 // return null;
310 Contraction c = GetContraction (s, start, end, contractions);
311 if (c != null || lcid == 127)
312 return c;
313 return GetContraction (s, start, end, invariant.contractions);
316 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
318 for (int i = 0; i < clist.Length; i++) {
319 Contraction ct = clist [i];
320 int diff = ct.Source [0] - s [start];
321 if (diff > 0)
322 return null; // it's already sorted
323 else if (diff < 0)
324 continue;
325 char [] chars = ct.Source;
326 if (end - start < chars.Length)
327 continue;
328 bool match = true;
329 for (int n = 0; n < chars.Length; n++)
330 if (s [start + n] != chars [n]) {
331 match = false;
332 break;
334 if (match)
335 return ct;
337 return null;
340 Contraction GetTailContraction (string s, int start, int end)
342 // int si = s [end - 1];
343 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
344 // return null;
345 Contraction c = GetTailContraction (s, start, end, contractions);
346 if (c != null || lcid == 127)
347 return c;
348 return GetTailContraction (s, start, end, invariant.contractions);
351 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
353 if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
354 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
355 for (int i = 0; i < clist.Length; i++) {
356 Contraction ct = clist [i];
357 char [] chars = ct.Source;
358 if (chars.Length > start - end)
359 continue;
360 if (chars [chars.Length - 1] != s [start])
361 continue;
363 bool match = true;
364 for (int n = 0, spos = start - chars.Length + 1;
365 n < chars.Length;
366 n++, spos++) {
367 if (s [spos] != chars [n]) {
368 match = false;
369 break;
372 if (match)
373 return ct;
375 return null;
378 Contraction GetContraction (char c)
380 Contraction ct = GetContraction (c, contractions);
381 if (ct != null || lcid == 127)
382 return ct;
383 return GetContraction (c, invariant.contractions);
386 Contraction GetContraction (char c, Contraction [] clist)
388 for (int i = 0; i < clist.Length; i++) {
389 Contraction ct = clist [i];
390 if (ct.Source [0] > c)
391 return null; // it's already sorted
392 if (ct.Source [0] == c && ct.Source.Length == 1)
393 return ct;
395 return null;
398 int FilterOptions (int i, COpt opt)
400 if ((opt & COpt.IgnoreWidth) != 0) {
401 int x = Uni.ToWidthCompat (i);
402 if (x != 0)
403 i = x;
405 if ((opt & COpt.OrdinalIgnoreCase) != 0)
406 i = textInfo.ToLower ((char) i);
407 if ((opt & COpt.IgnoreCase) != 0)
408 i = textInfo.ToLower ((char) i);
409 if ((opt & COpt.IgnoreKanaType) != 0)
410 i = Uni.ToKanaTypeInsensitive (i);
411 return i;
414 enum ExtenderType {
415 None,
416 Simple,
417 Voiced,
418 Conditional,
419 Buggy,
422 ExtenderType GetExtenderType (int i)
424 // LAMESPEC: Windows expects true for U+3005, but
425 // sometimes it does not represent to repeat just
426 // one character.
427 // Windows also expects true for U+3031 and U+3032,
428 // but they should *never* repeat one character.
430 // U+2015 becomes an extender only when it is Japanese
431 if (i == 0x2015)
432 return lcid == 16 ? ExtenderType.Conditional :
433 ExtenderType.None;
435 if (i < 0x3005 || i > 0xFF70)
436 return ExtenderType.None;
437 if (i >= 0xFE7C) {
438 switch (i) {
439 case 0xFE7C:
440 case 0xFE7D:
441 return ExtenderType.Simple;
442 case 0xFF70:
443 return ExtenderType.Conditional;
444 case 0xFF9E:
445 case 0xFF9F:
446 return ExtenderType.Voiced;
449 if (i > 0x30FE)
450 return ExtenderType.None;
451 switch (i) {
452 case 0x3005: // LAMESPEC: see above.
453 return ExtenderType.Buggy;
454 case 0x3031: // LAMESPEC: see above.
455 case 0x3032: // LAMESPEC: see above.
456 case 0x309D:
457 case 0x30FD:
458 return ExtenderType.Simple;
459 case 0x309E:
460 case 0x30FE:
461 return ExtenderType.Voiced;
462 case 0x30FC:
463 return ExtenderType.Conditional;
464 default:
465 return ExtenderType.None;
469 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
471 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
472 return 3;
473 switch (ext) {
474 case ExtenderType.None:
475 return 3;
476 case ExtenderType.Conditional:
477 return 5;
478 default:
479 return 4;
483 int FilterExtender (int i, ExtenderType ext, COpt opt)
485 if (ext == ExtenderType.Conditional &&
486 Uni.HasSpecialWeight ((char) i)) {
487 bool half = IsHalfKana ((char) i, opt);
488 bool katakana = !Uni.IsHiragana ((char) i);
489 switch (Level1 (i) & 7) {
490 case 2:
491 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
492 case 3:
493 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
494 case 4:
495 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
496 case 5:
497 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
498 case 6:
499 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
502 return i;
505 static bool IsIgnorable (int i, COpt opt)
507 return Uni.IsIgnorable (i, (byte) (1 +
508 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
509 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
513 bool IsSafe (int i)
515 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
518 #region GetSortKey()
520 public SortKey GetSortKey (string s)
522 return GetSortKey (s, CompareOptions.None);
525 public SortKey GetSortKey (string s, CompareOptions options)
527 return GetSortKey (s, 0, s.Length, options);
530 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
532 SortKeyBuffer buf = new SortKeyBuffer (lcid);
533 buf.Initialize (options, lcid, s, frenchSort);
534 int end = start + length;
535 GetSortKey (s, start, end, buf, options);
536 return buf.GetResultAndReset ();
539 unsafe void GetSortKey (string s, int start, int end,
540 SortKeyBuffer buf, CompareOptions opt)
542 byte* prevbuf = stackalloc byte [4];
543 ClearBuffer (prevbuf, 4);
544 Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
546 for (int n = start; n < end; n++) {
547 int i = s [n];
549 ExtenderType ext = GetExtenderType (i);
550 if (ext != ExtenderType.None) {
551 i = FilterExtender (ctx.PrevCode, ext, opt);
552 if (i >= 0)
553 FillSortKeyRaw (i, ext, buf, opt);
554 else if (ctx.PrevSortKey != null) {
555 byte* b = ctx.PrevSortKey;
556 buf.AppendNormal (
557 b [0],
558 b [1],
559 b [2] != 1 ? b [2] : Level2 (i, ext),
560 b [3] != 1 ? b [3] : Uni.Level3 (i));
562 // otherwise do nothing.
563 // (if the extender is the first char
564 // in the string, then just ignore.)
565 continue;
568 if (IsIgnorable (i, opt))
569 continue;
570 i = FilterOptions (i, opt);
572 Contraction ct = GetContraction (s, n, end);
573 if (ct != null) {
574 if (ct.Replacement != null) {
575 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
576 } else {
577 byte* b = ctx.PrevSortKey;
578 for (int bi = 0; bi < ct.SortKey.Length; bi++)
579 b [bi] = ct.SortKey [bi];
580 buf.AppendNormal (
581 b [0],
582 b [1],
583 b [2] != 1 ? b [2] : Level2 (i, ext),
584 b [3] != 1 ? b [3] : Uni.Level3 (i));
585 ctx.PrevCode = -1;
587 n += ct.Source.Length - 1;
589 else {
590 if (!Uni.IsIgnorableNonSpacing (i))
591 ctx.PrevCode = i;
592 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
597 void FillSortKeyRaw (int i, ExtenderType ext,
598 SortKeyBuffer buf, CompareOptions opt)
600 if (0x3400 <= i && i <= 0x4DB5) {
601 int diff = i - 0x3400;
602 buf.AppendCJKExtension (
603 (byte) (0x10 + diff / 254),
604 (byte) (diff % 254 + 2));
605 return;
608 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
609 switch (uc) {
610 case UnicodeCategory.PrivateUse:
611 int diff = i - 0xE000;
612 buf.AppendNormal (
613 (byte) (0xE5 + diff / 254),
614 (byte) (diff % 254 + 2),
617 return;
618 case UnicodeCategory.Surrogate:
619 FillSurrogateSortKeyRaw (i, buf);
620 return;
623 byte level2 = Level2 (i, ext);
624 if (Uni.HasSpecialWeight ((char) i)) {
625 byte level1 = Level1 (i);
626 buf.AppendKana (
627 Category (i),
628 level1,
629 level2,
630 Uni.Level3 (i),
631 Uni.IsJapaneseSmallLetter ((char) i),
632 ToDashTypeValue (ext, opt),
633 !Uni.IsHiragana ((char) i),
634 IsHalfKana ((char) i, opt)
636 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
637 // Append voice weight
638 buf.AppendNormal (1, 1, 1, 0);
640 else
641 buf.AppendNormal (
642 Category (i),
643 Level1 (i),
644 level2,
645 Uni.Level3 (i));
648 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
650 int diffbase = 0;
651 int segment = 0;
652 byte lower = 0;
654 if (i < 0xD840) {
655 diffbase = 0xD800;
656 segment = 0x41;
657 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
658 } else if (0xD840 <= i && i < 0xD880) {
659 diffbase = 0xD840;
660 segment = 0xF2;
661 lower = 0x3E;
662 } else if (0xDB80 <= i && i < 0xDC00) {
663 diffbase = 0xDB80 - 0x40;
664 segment = 0xFE;
665 lower = 0x3E;
666 } else {
667 diffbase = 0xDC00 - 0xF8 + 2;
668 segment = 0x41;
669 lower = 0x3F;
671 int diff = i - diffbase;
673 buf.AppendNormal (
674 (byte) (segment + diff / 254),
675 (byte) (diff % 254 + 2),
676 lower,
677 lower);
680 #endregion
682 #region Compare()
684 public int Compare (string s1, string s2)
686 return Compare (s1, s2, CompareOptions.None);
689 public int Compare (string s1, string s2, CompareOptions options)
691 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
694 private int CompareOrdinal (string s1, int idx1, int len1,
695 string s2, int idx2, int len2)
697 int min = len1 < len2 ? len1 : len2;
698 int end1 = idx1 + min;
699 int end2 = idx2 + min;
700 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
701 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
702 for (int i1 = idx1, i2 = idx2;
703 i1 < end1 && i2 < end2; i1++, i2++)
704 if (s1 [i1] != s2 [i2])
705 return s1 [i1] - s2 [i2];
706 return len1 == len2 ? 0 :
707 len1 == min ? - 1 : 1;
710 // mostly equivalent to CompareOrdinal, but the return value is
711 // not based on codepoints.
712 private int CompareQuick (string s1, int idx1, int len1,
713 string s2, int idx2, int len2, out bool sourceConsumed,
714 out bool targetConsumed, bool immediateBreakup)
716 sourceConsumed = false;
717 targetConsumed = false;
718 int min = len1 < len2 ? len1 : len2;
719 int end1 = idx1 + min;
720 int end2 = idx2 + min;
721 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
722 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
723 for (int i1 = idx1, i2 = idx2;
724 i1 < end1 && i2 < end2; i1++, i2++)
725 if (s1 [i1] != s2 [i2]) {
726 if (immediateBreakup)
727 return -1;
728 int ret = Category (s1 [i1]) - Category (s2 [i2]);
729 if (ret == 0)
730 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
731 // no level2 and 4
732 if (ret == 0)
733 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
734 if (ret == 0)
735 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
736 return ret;
738 sourceConsumed = len1 <= len2;
739 targetConsumed = len1 >= len2;
740 return len1 == len2 ? 0 :
741 len1 == min ? - 1 : 1;
744 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
745 string s2, int idx2, int len2)
747 int min = len1 < len2 ? len1 : len2;
748 int end1 = idx1 + min;
749 int end2 = idx2 + min;
750 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
751 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
752 TextInfo ti = invariant.textInfo;
753 for (int i1 = idx1, i2 = idx2;
754 i1 < end1 && i2 < end2; i1++, i2++)
755 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
756 return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
757 return len1 == len2 ? 0 :
758 len1 == min ? - 1 : 1;
761 public unsafe int Compare (string s1, int idx1, int len1,
762 string s2, int idx2, int len2, CompareOptions options)
764 // quick equality check
765 if (idx1 == idx2 && len1 == len2 &&
766 Object.ReferenceEquals (s1, s2))
767 return 0;
768 if (options == CompareOptions.Ordinal)
769 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
770 if (options == CompareOptions.OrdinalIgnoreCase)
771 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
773 #if false // stable easy version, depends on GetSortKey().
774 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
775 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
776 byte [] d1 = sk1.KeyData;
777 byte [] d2 = sk2.KeyData;
778 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
779 for (int i = 0; i < len; i++)
780 if (d1 [i] != d2 [i])
781 return d1 [i] < d2 [i] ? -1 : 1;
782 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
783 #else
784 byte* sk1 = stackalloc byte [4];
785 byte* sk2 = stackalloc byte [4];
786 ClearBuffer (sk1, 4);
787 ClearBuffer (sk2, 4);
788 Context ctx = new Context (options, null, null, sk1, sk2, null,
789 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
791 bool dummy, dummy2;
792 int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
793 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
794 #endif
797 unsafe void ClearBuffer (byte* buffer, int size)
799 for (int i = 0; i < size; i++)
800 buffer [i] = 0;
803 bool QuickCheckPossible (string s1, int idx1, int end1,
804 string s2, int idx2, int end2)
806 #if true
807 // looks like it is buggy and inefficient, so just disable it.
808 return false;
809 #else
810 if (QuickCheckDisabled)
811 return false;
812 // if (s1.Length > 100 || s2.Length > 100)
813 // return false;
814 for (int i = idx1; i < end1; i++)
815 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
816 return false;
817 for (int i = idx2; i < end2; i++)
818 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
819 return false;
820 return true;
821 #endif
824 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
825 int idx2, int len2,
826 out bool targetConsumed, out bool sourceConsumed,
827 bool skipHeadingExtenders, bool immediateBreakup,
828 ref Context ctx)
830 COpt opt = ctx.Option;
831 int start1 = idx1;
832 int start2 = idx2;
833 int end1 = idx1 + len1;
834 int end2 = idx2 + len2;
835 targetConsumed = false;
836 sourceConsumed = false;
837 PreviousInfo prev2 = new PreviousInfo (false);
839 if (opt == CompareOptions.None && ctx.QuickCheckPossible)
840 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
842 // It holds final result that comes from the comparison
843 // at level 2 or lower. Even if Compare() found the
844 // difference at level 2 or lower, it still has to
845 // continue level 1 comparison. FinalResult is used
846 // when there was no further differences.
847 int finalResult = 0;
848 // It holds the comparison level to do. It starts from
849 // 5, and becomes 1 when we do primary-only comparison.
850 int currentLevel = 5;
852 int lv5At1 = -1;
853 int lv5At2 = -1;
854 int lv5Value1 = 0;
855 int lv5Value2 = 0;
857 // Skip heading extenders
858 if (skipHeadingExtenders) {
859 for (; idx1 < end1; idx1++)
860 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
861 break;
862 for (; idx2 < end2; idx2++)
863 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
864 break;
867 ExtenderType ext1 = ExtenderType.None;
868 ExtenderType ext2 = ExtenderType.None;
870 int quickCheckPos1 = idx1;
871 int quickCheckPos2 = idx2;
872 bool stringSort = (opt & COpt.StringSort) != 0;
873 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
874 Escape escape1 = new Escape ();
875 Escape escape2 = new Escape ();
877 while (true) {
878 for (; idx1 < end1; idx1++)
879 if (!IsIgnorable (s1 [idx1], opt))
880 break;
881 for (; idx2 < end2; idx2++)
882 if (!IsIgnorable (s2 [idx2], opt))
883 break;
885 if (idx1 >= end1) {
886 if (escape1.Source == null)
887 break;
888 s1 = escape1.Source;
889 start1 = escape1.Start;
890 idx1 = escape1.Index;
891 end1 = escape1.End;
892 quickCheckPos1 = escape1.Optional;
893 escape1.Source = null;
894 continue;
896 if (idx2 >= end2) {
897 if (escape2.Source == null)
898 break;
899 s2 = escape2.Source;
900 start2 = escape2.Start;
901 idx2 = escape2.Index;
902 end2 = escape2.End;
903 quickCheckPos2 = escape2.Optional;
904 escape2.Source = null;
905 continue;
907 #if true
908 // If comparison is unstable, then this part is one of the most doubtful part.
909 // Here we run quick codepoint comparison and run back to "stable" area to
910 // compare characters.
912 // Strictly to say, even the first character
913 // could be compared here, but it messed
914 // backward step, so I just avoided mess.
915 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
916 while (idx1 < end1 && idx2 < end2 &&
917 s1 [idx1] == s2 [idx2]) {
918 idx1++;
919 idx2++;
921 if (idx1 == end1 || idx2 == end2)
922 continue; // check replacement
924 int backwardEnd1 = quickCheckPos1;
925 int backwardEnd2 = quickCheckPos2;
926 quickCheckPos1 = idx1;
927 quickCheckPos2 = idx2;
929 idx1--;
930 idx2--;
932 for (;idx1 > backwardEnd1; idx1--)
933 if (Category (s1 [idx1]) != 1)
934 break;
935 for (;idx2 > backwardEnd2; idx2--)
936 if (Category (s2 [idx2]) != 1)
937 break;
938 for (;idx1 > backwardEnd1; idx1--)
939 if (IsSafe (s1 [idx1]))
940 break;
941 for (;idx2 > backwardEnd2; idx2--)
942 if (IsSafe (s2 [idx2]))
943 break;
945 #endif
947 int cur1 = idx1;
948 int cur2 = idx2;
949 byte* sk1 = null;
950 byte* sk2 = null;
951 int i1 = FilterOptions (s1 [idx1], opt);
952 int i2 = FilterOptions (s2 [idx2], opt);
953 bool special1 = false;
954 bool special2 = false;
956 // If current character is an expander, then
957 // repeat the previous character.
958 ext1 = GetExtenderType (i1);
959 if (ext1 != ExtenderType.None) {
960 if (ctx.PrevCode < 0) {
961 if (ctx.PrevSortKey == null) {
962 // nothing to extend
963 idx1++;
964 continue;
966 sk1 = ctx.PrevSortKey;
968 else
969 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
971 ext2 = GetExtenderType (i2);
972 if (ext2 != ExtenderType.None) {
973 if (prev2.Code < 0) {
974 if (prev2.SortKey == null) {
975 // nothing to extend
976 idx2++;
977 continue;
979 sk2 = prev2.SortKey;
981 else
982 i2 = FilterExtender (prev2.Code, ext2, opt);
985 byte cat1 = Category (i1);
986 byte cat2 = Category (i2);
988 // Handle special weight characters
989 if (cat1 == 6) {
990 if (!stringSort && currentLevel == 5) {
991 lv5At1 = escape1.Source != null ?
992 escape1.Index - escape1.Start :
993 cur1 - start1;
994 // here Windows has a bug that it does
995 // not consider thirtiary weight.
996 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
998 ctx.PrevCode = i1;
999 idx1++;
1001 if (cat2 == 6) {
1002 if (!stringSort && currentLevel == 5) {
1003 lv5At2 = escape2.Source != null ?
1004 escape2.Index - escape2.Start :
1005 cur2 - start2;
1006 // here Windows has a bug that it does
1007 // not consider thirtiary weight.
1008 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1010 prev2.Code = i2;
1011 idx2++;
1013 if (cat1 == 6 || cat2 == 6) {
1014 if (currentLevel == 5) {
1015 if (lv5Value1 == lv5Value2) {
1016 // There is not really difference here.
1017 lv5At1 = lv5At2 = -1;
1018 lv5Value1 = lv5Value2 = 0;
1020 else
1021 currentLevel = 4;
1023 continue;
1026 Contraction ct1 = null;
1027 if (ext1 == ExtenderType.None)
1028 ct1 = GetContraction (s1, idx1, end1);
1030 int offset1 = 1;
1031 if (sk1 != null)
1032 offset1 = 1;
1033 else if (ct1 != null) {
1034 offset1 = ct1.Source.Length;
1035 if (ct1.SortKey != null) {
1036 sk1 = ctx.Buffer1;
1037 for (int i = 0; i < ct1.SortKey.Length; i++)
1038 sk1 [i] = ct1.SortKey [i];
1039 ctx.PrevCode = -1;
1040 ctx.PrevSortKey = sk1;
1042 else if (escape1.Source == null) {
1043 escape1.Source = s1;
1044 escape1.Start = start1;
1045 escape1.Index = cur1 + ct1.Source.Length;
1046 escape1.End = end1;
1047 escape1.Optional = quickCheckPos1;
1048 s1 = ct1.Replacement;
1049 idx1 = 0;
1050 start1 = 0;
1051 end1 = s1.Length;
1052 quickCheckPos1 = 0;
1053 continue;
1056 else {
1057 sk1 = ctx.Buffer1;
1058 sk1 [0] = cat1;
1059 sk1 [1] = Level1 (i1);
1060 if (!ignoreNonSpace && currentLevel > 1)
1061 sk1 [2] = Level2 (i1, ext1);
1062 if (currentLevel > 2)
1063 sk1 [3] = Uni.Level3 (i1);
1064 if (currentLevel > 3)
1065 special1 = Uni.HasSpecialWeight ((char) i1);
1066 if (cat1 > 1)
1067 ctx.PrevCode = i1;
1070 Contraction ct2 = null;
1071 if (ext2 == ExtenderType.None)
1072 ct2 = GetContraction (s2, idx2, end2);
1074 if (sk2 != null)
1075 idx2++;
1076 else if (ct2 != null) {
1077 idx2 += ct2.Source.Length;
1078 if (ct2.SortKey != null) {
1079 sk2 = ctx.Buffer2;
1080 for (int i = 0; i < ct2.SortKey.Length; i++)
1081 sk2 [i] = ct2.SortKey [i];
1082 prev2.Code = -1;
1083 prev2.SortKey = sk2;
1085 else if (escape2.Source == null) {
1086 escape2.Source = s2;
1087 escape2.Start = start2;
1088 escape2.Index = cur2 + ct2.Source.Length;
1089 escape2.End = end2;
1090 escape2.Optional = quickCheckPos2;
1091 s2 = ct2.Replacement;
1092 idx2 = 0;
1093 start2 = 0;
1094 end2 = s2.Length;
1095 quickCheckPos2 = 0;
1096 continue;
1099 else {
1100 sk2 = ctx.Buffer2;
1101 sk2 [0] = cat2;
1102 sk2 [1] = Level1 (i2);
1103 if (!ignoreNonSpace && currentLevel > 1)
1104 sk2 [2] = Level2 (i2, ext2);
1105 if (currentLevel > 2)
1106 sk2 [3] = Uni.Level3 (i2);
1107 if (currentLevel > 3)
1108 special2 = Uni.HasSpecialWeight ((char) i2);
1109 if (cat2 > 1)
1110 prev2.Code = i2;
1111 idx2++;
1114 // Add offset here so that it does not skip
1115 // idx1 while s2 has a replacement.
1116 idx1 += offset1;
1118 if (!ignoreNonSpace) {
1119 // add diacritical marks in s1 here
1120 while (idx1 < end1) {
1121 if (Category (s1 [idx1]) != 1)
1122 break;
1123 if (sk1 [2] == 0)
1124 sk1 [2] = 2;
1125 sk1 [2] = (byte) (sk1 [2] +
1126 Level2 (s1 [idx1], ExtenderType.None));
1127 idx1++;
1130 // add diacritical marks in s2 here
1131 while (idx2 < end2) {
1132 if (Category (s2 [idx2]) != 1)
1133 break;
1134 if (sk2 [2] == 0)
1135 sk2 [2] = 2;
1136 sk2 [2] = (byte) (sk2 [2] +
1137 Level2 (s2 [idx2], ExtenderType.None));
1138 idx2++;
1142 int ret = sk1 [0] - sk2 [0];
1143 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1144 if (ret != 0)
1145 return ret;
1146 if (currentLevel == 1)
1147 continue;
1148 if (!ignoreNonSpace) {
1149 ret = sk1 [2] - sk2 [2];
1150 if (ret != 0) {
1151 finalResult = ret;
1152 if (immediateBreakup)
1153 return -1; // different
1154 currentLevel = frenchSort ? 2 : 1;
1155 continue;
1158 if (currentLevel == 2)
1159 continue;
1160 ret = sk1 [3] - sk2 [3];
1161 if (ret != 0) {
1162 finalResult = ret;
1163 if (immediateBreakup)
1164 return -1; // different
1165 currentLevel = 2;
1166 continue;
1168 if (currentLevel == 3)
1169 continue;
1170 if (special1 != special2) {
1171 if (immediateBreakup)
1172 return -1; // different
1173 finalResult = special1 ? 1 : -1;
1174 currentLevel = 3;
1175 continue;
1177 if (special1) {
1178 ret = CompareFlagPair (
1179 !Uni.IsJapaneseSmallLetter ((char) i1),
1180 !Uni.IsJapaneseSmallLetter ((char) i2));
1181 ret = ret != 0 ? ret :
1182 ToDashTypeValue (ext1, opt) -
1183 ToDashTypeValue (ext2, opt);
1184 ret = ret != 0 ? ret : CompareFlagPair (
1185 Uni.IsHiragana ((char) i1),
1186 Uni.IsHiragana ((char) i2));
1187 ret = ret != 0 ? ret : CompareFlagPair (
1188 !IsHalfKana ((char) i1, opt),
1189 !IsHalfKana ((char) i2, opt));
1190 if (ret != 0) {
1191 if (immediateBreakup)
1192 return -1; // different
1193 finalResult = ret;
1194 currentLevel = 3;
1195 continue;
1200 // If there were only level 3 or lower differences,
1201 // then we still have to find diacritical differences
1202 // if any.
1203 if (!ignoreNonSpace &&
1204 finalResult != 0 && currentLevel > 2) {
1205 while (idx1 < end1 && idx2 < end2) {
1206 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1207 break;
1208 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1209 break;
1210 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1211 if (finalResult != 0)
1212 break;
1213 idx1++;
1214 idx2++;
1215 // they should work only for the first character
1216 ext1 = ExtenderType.None;
1217 ext2 = ExtenderType.None;
1220 if (currentLevel == 1 && finalResult != 0) {
1221 while (idx1 < end1) {
1222 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1223 idx1++;
1224 else
1225 break;
1227 while (idx2 < end2) {
1228 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1229 idx2++;
1230 else
1231 break;
1234 // we still have to check level 5
1235 if (finalResult == 0) {
1236 if (lv5At1 < 0 && lv5At2 >= 0)
1237 finalResult = -1;
1238 else if (lv5At2 < 0 && lv5At1 >= 0)
1239 finalResult = 1;
1240 else {
1241 finalResult = lv5At1 - lv5At2;
1242 if (finalResult == 0)
1243 finalResult = lv5Value1 - lv5Value2;
1246 if (finalResult == 0) {
1247 if (idx2 == end2)
1248 targetConsumed = true;
1249 if (idx1 == end1)
1250 sourceConsumed = true;
1252 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1255 int CompareFlagPair (bool b1, bool b2)
1257 return b1 == b2 ? 0 : b1 ? 1 : -1;
1260 #endregion
1262 #region IsPrefix() and IsSuffix()
1264 public bool IsPrefix (string src, string target, CompareOptions opt)
1266 return IsPrefix (src, target, 0, src.Length, opt);
1269 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1271 if (target.Length == 0)
1272 return true;
1273 byte* sk1 = stackalloc byte [4];
1274 byte* sk2 = stackalloc byte [4];
1275 ClearBuffer (sk1, 4);
1276 ClearBuffer (sk2, 4);
1277 Context ctx = new Context (opt, null, null, sk1, sk2, null,
1278 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1279 return IsPrefix (s, target, start, length, true, ref ctx);
1282 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1284 bool consumed, dummy;
1285 CompareInternal (s, start, length,
1286 target, 0, target.Length,
1287 out consumed, out dummy, skipHeadingExtenders,
1288 true, ref ctx);
1289 return consumed;
1292 // IsSuffix()
1294 public bool IsSuffix (string src, string target, CompareOptions opt)
1296 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1299 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1301 if (target.Length == 0)
1302 return true;
1303 int last = LastIndexOf (s, target, start, length, opt);
1304 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1306 // quick check : simple codepoint comparison
1307 if (s.Length >= target.Length) {
1308 int si = start;
1309 int se = start - length;
1310 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1311 if (s [si] != target [i])
1312 break;
1313 if (si == start + target.Length)
1314 return true;
1317 PreviousInfo prev = new PreviousInfo (false);
1318 byte* sk1 = stackalloc byte [4];
1319 byte* sk2 = stackalloc byte [4];
1320 ClearBuffer (sk1, 4);
1321 ClearBuffer (sk2, 4);
1322 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1326 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1328 int tstart = 0;
1329 COpt opt = ctx.Option;
1331 for (;tstart < t.Length; tstart++)
1332 if (!IsIgnorable (t [tstart], opt))
1333 break;
1334 if (tstart == t.Length)
1335 return true; // as if target is String.Empty.
1337 #if false
1338 // This is still not working. If it is not likely to get working, then just remove it.
1339 int si = start;
1340 int send = start - length;
1341 int ti = t.Length - 1;
1342 int tend = -1;
1344 int sStep = start + 1;
1345 int tStep = t.Length;
1346 bool sourceConsumed, targetConsumed;
1347 while (true) {
1348 for (; send < si; si--)
1349 if (!IsIgnorable (s [si]))
1350 break;
1351 for (; tend < ti; ti--)
1352 if (!IsIgnorable (t [ti]))
1353 break;
1354 if (tend == ti)
1355 break;
1356 for (; send < si; si--)
1357 if (IsSafe (s [si]))
1358 break;
1359 for (; tend < ti; ti--)
1360 if (IsSafe (t [ti]))
1361 break;
1362 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1363 if (CompareInternal (opt, s, si, sStep - si,
1364 t, ti, tStep - ti,
1365 out targetConsumed, out sourceConsumed,
1366 true) != 0)
1367 return false;
1368 if (send == si)
1369 return false;
1370 sStep = si;
1371 tStep = ti;
1372 si--;
1373 ti--;
1375 return true;
1376 #else
1377 // FIXME: it is not efficient for very long target.
1378 bool sourceConsumed, targetConsumed;
1379 int mismatchCount = 0;
1380 for (int i = 0; i < length; i++) {
1381 ctx.ClearPrevInfo ();
1383 int ret = CompareInternal (s, start - i, i + 1,
1384 t, tstart, t.Length - tstart,
1385 out targetConsumed,
1386 // FIXME: could immediately breakup
1387 out sourceConsumed, true, true, ref ctx);
1388 if (ret == 0)
1389 return true;
1390 if (!sourceConsumed && targetConsumed)
1391 return false;
1392 if (!targetConsumed) {
1393 mismatchCount++;
1394 if (mismatchCount > Uni.MaxExpansionLength)
1395 // The largest length of an
1396 // expansion is 3, so if the
1397 // target was not consumed more
1398 // than 3 times, then the tail
1399 // character does not match.
1400 return false;
1403 return false;
1404 #endif
1407 #endregion
1409 #region IndexOf() / LastIndexOf()
1411 // string
1413 public int IndexOf (string s, string target, CompareOptions opt)
1415 return IndexOf (s, target, 0, s.Length, opt);
1418 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1420 int testedSourcePos = -1, testedTargetPos = -1;
1422 testWasUnable = true;
1423 if (target.Length == 0)
1424 return 0;
1425 else if (target.Length > length)
1426 return -1;
1427 testWasUnable = false;
1429 int end = start + length - target.Length + 1;
1430 for (int i = start; i < end; i++) {
1431 bool no = false;
1432 for (int j = 0; j < target.Length; j++) {
1433 if (testedTargetPos < j) {
1434 if (target [j] >= 0x80) {
1435 testWasUnable = true;
1436 return -1;
1438 else
1439 testedTargetPos = j;
1441 if (testedSourcePos < i + j) {
1442 if (s [i + j] >= 0x80) {
1443 testWasUnable = true;
1444 return -1;
1446 else
1447 testedSourcePos = i + j;
1449 if (s [i + j] != target [j]) {
1450 no = true;
1451 break;
1454 if (no)
1455 continue;
1456 return i;
1458 return -1;
1461 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1463 if (opt == CompareOptions.Ordinal)
1464 return IndexOfOrdinal (s, target, start, length);
1465 if (opt == CompareOptions.OrdinalIgnoreCase)
1466 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1467 if (opt == CompareOptions.None) {
1468 bool testWasUnable;
1469 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1470 if (!testWasUnable)
1471 return ret;
1474 byte* alwaysMatchFlags = stackalloc byte [16];
1475 byte* neverMatchFlags = stackalloc byte [16];
1476 byte* targetSortKey = stackalloc byte [4];
1477 byte* sk1 = stackalloc byte [4];
1478 byte* sk2 = stackalloc byte [4];
1479 ClearBuffer (alwaysMatchFlags, 16);
1480 ClearBuffer (neverMatchFlags, 16);
1481 ClearBuffer (targetSortKey, 4);
1482 ClearBuffer (sk1, 4);
1483 ClearBuffer (sk2, 4);
1484 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1486 return IndexOf (s, target, start, length,
1487 targetSortKey, ref ctx);
1490 // note that it wouldn't be used since ordinal IndexOf()
1491 // should be directed to icall.
1492 int IndexOfOrdinal (string s, string target, int start, int length)
1494 if (target.Length == 0)
1495 return 0;
1496 else if (target.Length > length)
1497 return -1;
1499 int end = start + length - target.Length + 1;
1500 for (int i = start; i < end; i++) {
1501 bool no = false;
1502 for (int j = 0; j < target.Length; j++) {
1503 if (s [i + j] != target [j]) {
1504 no = true;
1505 break;
1508 if (no)
1509 continue;
1510 return i;
1512 return -1;
1515 int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1517 if (target.Length == 0)
1518 return 0;
1519 else if (target.Length > length)
1520 return -1;
1522 int end = start + length - target.Length + 1;
1523 for (int i = start; i < end; i++) {
1524 bool no = false;
1525 for (int j = 0; j < target.Length; j++) {
1526 // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
1527 if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
1528 no = true;
1529 break;
1532 if (no)
1533 continue;
1534 return i;
1536 return -1;
1539 // char
1541 public int IndexOf (string s, char target, CompareOptions opt)
1543 return IndexOf (s, target, 0, s.Length, opt);
1546 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1548 if (opt == CompareOptions.Ordinal)
1549 return IndexOfOrdinal (s, target, start, length);
1550 if (opt == CompareOptions.OrdinalIgnoreCase)
1551 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1552 byte* alwaysMatchFlags = stackalloc byte [16];
1553 byte* neverMatchFlags = stackalloc byte [16];
1554 byte* targetSortKey = stackalloc byte [4];
1555 byte* sk1 = stackalloc byte [4];
1556 byte* sk2 = stackalloc byte [4];
1557 ClearBuffer (alwaysMatchFlags, 16);
1558 ClearBuffer (neverMatchFlags, 16);
1559 ClearBuffer (targetSortKey, 4);
1560 ClearBuffer (sk1, 4);
1561 ClearBuffer (sk2, 4);
1562 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1564 // If target is contraction, then use string search.
1565 Contraction ct = GetContraction (target);
1566 if (ct != null) {
1567 if (ct.Replacement != null)
1568 return IndexOf (s, ct.Replacement,
1569 start, length, targetSortKey, ref ctx);
1570 else {
1571 for (int i = 0; i < ct.SortKey.Length; i++)
1572 sk2 [i] = ct.SortKey [i];
1573 return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1575 } else {
1576 int ti = FilterOptions ((int) target, opt);
1577 targetSortKey [0] = Category (ti);
1578 targetSortKey [1] = Level1 (ti);
1579 if ((opt & COpt.IgnoreNonSpace) == 0)
1580 targetSortKey [2] =
1581 Level2 (ti, ExtenderType.None);
1582 targetSortKey [3] = Uni.Level3 (ti);
1583 return IndexOfSortKey (s, start, length,
1584 targetSortKey, target, ti,
1585 !Uni.HasSpecialWeight ((char) ti), ref ctx);
1589 // note that it wouldn't be used since ordinal IndexOf()
1590 // should be directed to icall.
1591 int IndexOfOrdinal (string s, char target, int start, int length)
1593 int end = start + length;
1594 for (int i = start; i < end; i++)
1595 if (s [i] == target)
1596 return i;
1597 return -1;
1600 int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1602 int end = start + length;
1603 target = textInfo.ToLower (target);
1604 for (int i = start; i < end; i++)
1605 if (textInfo.ToLower (s [i]) == target)
1606 return i;
1607 return -1;
1610 // Searches target byte[] keydata
1611 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1613 int end = start + length;
1614 int idx = start;
1616 while (idx < end) {
1617 int cur = idx;
1618 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1619 return cur;
1621 return -1;
1624 // Searches string. Search head character (or keydata when
1625 // the head is contraction sortkey) and try IsPrefix().
1626 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1628 COpt opt = ctx.Option;
1629 int tidx = 0;
1630 for (; tidx < target.Length; tidx++)
1631 if (!IsIgnorable (target [tidx], opt))
1632 break;
1633 if (tidx == target.Length)
1634 return start;
1635 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1636 string replace = ct != null ? ct.Replacement : null;
1637 byte* sk = replace == null ? targetSortKey : null;
1638 bool noLv4 = true;
1639 char tc = char.MinValue;
1640 int ti = -1;
1641 if (ct != null && sk != null) {
1642 for (int i = 0; i < ct.SortKey.Length; i++)
1643 sk [i] = ct.SortKey [i];
1644 } else if (sk != null) {
1645 tc = target [tidx];
1646 ti = FilterOptions (target [tidx], opt);
1647 sk [0] = Category (ti);
1648 sk [1] = Level1 (ti);
1649 if ((opt & COpt.IgnoreNonSpace) == 0)
1650 sk [2] = Level2 (ti, ExtenderType.None);
1651 sk [3] = Uni.Level3 (ti);
1652 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1654 if (sk != null) {
1655 for (tidx++; tidx < target.Length; tidx++) {
1656 if (Category (target [tidx]) != 1)
1657 break;
1658 if (sk [2] == 0)
1659 sk [2] = 2;
1660 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1664 do {
1665 int idx = 0;
1666 if (replace != null)
1667 idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1668 else
1669 idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1670 if (idx < 0)
1671 return -1;
1672 length -= idx - start;
1673 start = idx;
1674 if (IsPrefix (s, target, start, length, false, ref ctx))
1675 return idx;
1676 Contraction cts = GetContraction (s, start, length);
1677 if (cts != null) {
1678 start += cts.Source.Length;
1679 length -= cts.Source.Length;
1680 } else {
1681 start++;
1682 length--;
1684 } while (length > 0);
1685 return -1;
1689 // There are the same number of LastIndexOf() methods as
1690 // IndexOf() related methods, with the same functionalities
1691 // for each.
1694 // string
1696 public int LastIndexOf (string s, string target, CompareOptions opt)
1698 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1701 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1703 if (opt == CompareOptions.Ordinal)
1704 return LastIndexOfOrdinal (s, target, start, length);
1705 if (opt == CompareOptions.OrdinalIgnoreCase)
1706 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1707 byte* alwaysMatchFlags = stackalloc byte [16];
1708 byte* neverMatchFlags = stackalloc byte [16];
1709 byte* targetSortKey = stackalloc byte [4];
1710 byte* sk1 = stackalloc byte [4];
1711 byte* sk2 = stackalloc byte [4];
1712 ClearBuffer (alwaysMatchFlags, 16);
1713 ClearBuffer (neverMatchFlags, 16);
1714 ClearBuffer (targetSortKey, 4);
1715 ClearBuffer (sk1, 4);
1716 ClearBuffer (sk2, 4);
1717 // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1718 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1719 return LastIndexOf (s, target, start, length,
1720 targetSortKey, ref ctx);
1723 int LastIndexOfOrdinal (string s, string target, int start, int length)
1725 if (target.Length == 0)
1726 return 0;
1727 if (s.Length < target.Length || target.Length > length)
1728 return -1;
1729 int end = start - length + target.Length -1;
1730 char tail = target [target.Length - 1];
1731 for (int i = start; i > end;) {
1732 if (s [i] != tail) {
1733 i--;
1734 continue;
1736 int x = i - target.Length + 1;
1737 i--;
1738 bool mismatch = false;
1739 for (int j = target.Length - 2; j >= 0; j--)
1740 if (s [x + j] != target [j]) {
1741 mismatch = true;
1742 break;
1744 if (mismatch)
1745 continue;
1746 return x;
1748 return -1;
1751 int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1753 if (target.Length == 0)
1754 return 0;
1755 if (s.Length < length || target.Length > length)
1756 return -1;
1757 int end = start - length + target.Length - 1;
1758 char tail = textInfo.ToLower (target [target.Length - 1]);
1759 for (int i = start; i > end;) {
1760 if (textInfo.ToLower (s [i]) != tail) {
1761 i--;
1762 continue;
1764 int x = i - target.Length + 1;
1765 i--;
1766 bool mismatch = false;
1767 for (int j = target.Length - 2; j >= 0; j--)
1768 if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
1769 mismatch = true;
1770 break;
1772 if (mismatch)
1773 continue;
1774 return x;
1776 return -1;
1779 // char
1781 public int LastIndexOf (string s, char target, CompareOptions opt)
1783 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1786 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1788 if (opt == CompareOptions.Ordinal)
1789 return LastIndexOfOrdinal (s, target, start, length);
1790 if (opt == CompareOptions.OrdinalIgnoreCase)
1791 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1792 byte* alwaysMatchFlags = stackalloc byte [16];
1793 byte* neverMatchFlags = stackalloc byte [16];
1794 byte* targetSortKey = stackalloc byte [4];
1795 byte* sk1 = stackalloc byte [4];
1796 byte* sk2 = stackalloc byte [4];
1797 ClearBuffer (alwaysMatchFlags, 16);
1798 ClearBuffer (neverMatchFlags, 16);
1799 ClearBuffer (targetSortKey, 4);
1800 ClearBuffer (sk1, 4);
1801 ClearBuffer (sk2, 4);
1802 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1804 // If target is a replacement contraction, then use
1805 // string search.
1806 Contraction ct = GetContraction (target);
1807 if (ct != null) {
1808 if (ct.Replacement != null)
1809 return LastIndexOf (s,
1810 ct.Replacement, start, length,
1811 targetSortKey, ref ctx);
1812 else {
1813 for (int bi = 0; bi < ct.SortKey.Length; bi++)
1814 sk2 [bi] = ct.SortKey [bi];
1815 return LastIndexOfSortKey (s, start,
1816 start, length, sk2,
1817 -1, true, ref ctx);
1820 else {
1821 int ti = FilterOptions ((int) target, opt);
1822 targetSortKey [0] = Category (ti);
1823 targetSortKey [1] = Level1 (ti);
1824 if ((opt & COpt.IgnoreNonSpace) == 0)
1825 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1826 targetSortKey [3] = Uni.Level3 (ti);
1827 return LastIndexOfSortKey (s, start, start,
1828 length, targetSortKey,
1829 ti, !Uni.HasSpecialWeight ((char) ti),
1830 ref ctx);
1834 int LastIndexOfOrdinal (string s, char target, int start, int length)
1836 if (s.Length == 0)
1837 return -1;
1838 int end = start - length;
1839 for (int i = start; i > end; i--)
1840 if (s [i] == target)
1841 return i;
1842 return -1;
1845 int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1847 if (s.Length == 0)
1848 return -1;
1849 int end = start - length;
1850 char c = textInfo.ToUpper (target);
1851 for (int i = start; i > end; i--)
1852 if (textInfo.ToUpper (s [i]) == c)
1853 return i;
1854 return -1;
1857 // Searches target byte[] keydata
1858 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1860 int end = start - length;
1861 int idx = start;
1862 while (idx > end) {
1863 int cur = idx;
1864 if (MatchesBackward (s, ref idx, end, orgStart,
1865 ti, sortkey, noLv4, ref ctx))
1866 return cur;
1868 return -1;
1871 // Searches string. Search head character (or keydata when
1872 // the head is contraction sortkey) and try IsPrefix().
1873 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1875 COpt opt = ctx.Option;
1876 int orgStart = start;
1877 int tidx = 0;
1878 for (; tidx < target.Length; tidx++)
1879 if (!IsIgnorable (target [tidx], opt))
1880 break;
1881 if (tidx == target.Length)
1882 return start;
1883 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1884 string replace = ct != null ? ct.Replacement : null;
1885 byte* sk = replace == null ? targetSortKey : null;
1887 bool noLv4 = true;
1888 int ti = -1;
1889 if (ct != null && sk != null) {
1890 for (int i = 0; i < ct.SortKey.Length; i++)
1891 sk [i] = ct.SortKey [i];
1892 } else if (sk != null) {
1893 ti = FilterOptions (target [tidx], opt);
1894 sk [0] = Category (ti);
1895 sk [1] = Level1 (ti);
1896 if ((opt & COpt.IgnoreNonSpace) == 0)
1897 sk [2] = Level2 (ti, ExtenderType.None);
1898 sk [3] = Uni.Level3 (ti);
1899 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1901 if (sk != null) {
1902 for (tidx++; tidx < target.Length; tidx++) {
1903 if (Category (target [tidx]) != 1)
1904 break;
1905 if (sk [2] == 0)
1906 sk [2] = 2;
1907 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1911 do {
1912 int idx = 0;
1914 if (replace != null)
1915 idx = LastIndexOf (s, replace,
1916 start, length,
1917 targetSortKey, ref ctx);
1918 else
1919 idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1920 if (idx < 0)
1921 return -1;
1922 length -= start - idx;
1923 start = idx;
1925 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1926 for (;idx < orgStart; idx++)
1927 if (!IsIgnorable (s [idx], opt))
1928 break;
1929 return idx;
1931 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1932 if (cts != null) {
1933 start -= cts.Source.Length;
1934 length -= cts.Source.Length;
1935 } else {
1936 start--;
1937 length--;
1939 } while (length > 0);
1940 return -1;
1943 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1945 int si = s [idx];
1946 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1947 return true;
1948 if (ctx.NeverMatchFlags != null &&
1949 si < 128 &&
1950 (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1951 idx++;
1952 return false;
1954 ExtenderType ext = GetExtenderType (s [idx]);
1955 Contraction ct = null;
1956 if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1957 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1958 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1959 return true;
1961 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1962 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1963 return false;
1966 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1968 COpt opt = ctx.Option;
1969 byte* charSortKey = ctx.Buffer1;
1970 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1971 int si = -1;
1972 if (ext == ExtenderType.None)
1973 ct = GetContraction (s, idx, end);
1974 else if (ctx.PrevCode < 0) {
1975 if (ctx.PrevSortKey == null) {
1976 idx++;
1977 return false;
1979 charSortKey = ctx.PrevSortKey;
1981 else
1982 si = FilterExtender (ctx.PrevCode, ext, opt);
1983 // if lv4 exists, it never matches contraction
1984 if (ct != null) {
1985 idx += ct.Source.Length;
1986 if (!noLv4)
1987 return false;
1988 if (ct.SortKey != null) {
1989 for (int i = 0; i < 4; i++)
1990 charSortKey [i] = sortkey [i];
1991 ctx.PrevCode = -1;
1992 ctx.PrevSortKey = charSortKey;
1993 } else {
1994 // Here is the core of LAMESPEC
1995 // described at the top of the source.
1996 int dummy = 0;
1997 return MatchesForward (ct.Replacement, ref dummy,
1998 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
2000 } else {
2001 if (si < 0)
2002 si = FilterOptions (s [idx], opt);
2003 idx++;
2004 charSortKey [0] = Category (si);
2005 bool noMatch = false;
2006 if (sortkey [0] == charSortKey [0])
2007 charSortKey [1] = Level1 (si);
2008 else
2009 noMatch = true;
2010 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
2011 charSortKey [2] = Level2 (si, ext);
2012 else if (!ignoreNonSpace)
2013 noMatch = true;
2014 if (noMatch) {
2015 for (; idx < end; idx++) {
2016 if (Category (s [idx]) != 1)
2017 break;
2019 return false;
2021 charSortKey [3] = Uni.Level3 (si);
2022 if (charSortKey [0] != 1)
2023 ctx.PrevCode = si;
2025 for (; idx < end; idx++) {
2026 if (Category (s [idx]) != 1)
2027 break;
2028 if (ignoreNonSpace)
2029 continue;
2030 if (charSortKey [2] == 0)
2031 charSortKey [2] = 2;
2032 charSortKey [2] = (byte) (charSortKey [2]
2033 + Level2 (s [idx], ExtenderType.None));
2036 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2039 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
2041 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2042 if (source [0] != target [0] ||
2043 source [1] != target [1] ||
2044 (!ignoreNonSpace && source [2] != target [2]) ||
2045 source [3] != target [3])
2046 return false;
2047 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
2048 return true;
2049 else if (noLv4)
2050 return false;
2051 // Since target can never be an extender, if the source
2052 // is an expander and it matters, then they never match.
2053 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
2054 return false;
2055 if (Uni.IsJapaneseSmallLetter ((char) si) !=
2056 Uni.IsJapaneseSmallLetter ((char) ti) ||
2057 ToDashTypeValue (ext, opt) !=
2058 // FIXME: we will have to specify correct value for target
2059 ToDashTypeValue (ExtenderType.None, opt) ||
2060 !Uni.IsHiragana ((char) si) !=
2061 !Uni.IsHiragana ((char) ti) ||
2062 IsHalfKana ((char) si, opt) !=
2063 IsHalfKana ((char) ti, opt))
2064 return false;
2065 return true;
2068 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
2070 int si = s [idx];
2071 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
2072 return true;
2073 if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
2074 idx--;
2075 return false;
2077 ExtenderType ext = GetExtenderType (s [idx]);
2078 Contraction ct = null;
2079 if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
2080 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
2081 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2082 return true;
2084 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
2085 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2087 return false;
2090 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
2092 COpt opt = ctx.Option;
2093 byte* charSortKey = ctx.Buffer1;
2094 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2095 int cur = idx;
2096 int si = -1;
2097 // To handle extenders in source, we need to
2098 // check next _primary_ character.
2099 if (ext != ExtenderType.None) {
2100 byte diacritical = 0;
2101 for (int tmp = 0; ; tmp--) {
2102 if (tmp < 0) // heading extender
2103 return false;
2104 if (IsIgnorable (s [tmp], opt))
2105 continue;
2106 int tmpi = FilterOptions (s [tmp], opt);
2107 byte category = Category (tmpi);
2108 if (category == 1) {
2109 diacritical = Level2 (tmpi, ExtenderType.None);
2110 continue;
2112 si = FilterExtender (tmpi, ext, opt);
2113 charSortKey [0] = category;
2114 charSortKey [1] = Level1 (si);
2115 if (!ignoreNonSpace)
2116 charSortKey [2] = Level2 (si, ext);
2117 charSortKey [3] = Uni.Level3 (si);
2118 if (ext != ExtenderType.Conditional &&
2119 diacritical != 0)
2120 charSortKey [2] =
2121 (charSortKey [2] == 0) ?
2122 (byte) (diacritical + 2) :
2123 diacritical;
2124 break;
2126 idx--;
2128 if (ext == ExtenderType.None)
2129 ct = GetTailContraction (s, idx, end);
2130 // if lv4 exists, it never matches contraction
2131 if (ct != null) {
2132 idx -= ct.Source.Length;
2133 if (!noLv4)
2134 return false;
2135 if (ct.SortKey != null) {
2136 for (int i = 0; i < 4; i++)
2137 charSortKey [i] = sortkey [i];
2138 ctx.PrevCode = -1;
2139 ctx.PrevSortKey = charSortKey;
2140 } else {
2141 // Here is the core of LAMESPEC
2142 // described at the top of the source.
2143 int dummy = ct.Replacement.Length - 1;
2144 return 0 <= LastIndexOfSortKey (
2145 ct.Replacement, dummy, dummy,
2146 ct.Replacement.Length, sortkey,
2147 ti, noLv4, ref ctx);
2149 } else if (ext == ExtenderType.None) {
2150 if (si < 0)
2151 si = FilterOptions (s [idx], opt);
2152 idx--;
2153 bool noMatch = false;
2154 charSortKey [0] = Category (si);
2155 if (charSortKey [0] == sortkey [0])
2156 charSortKey [1] = Level1 (si);
2157 else
2158 noMatch = true;
2159 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
2160 charSortKey [2] = Level2 (si, ext);
2161 else if (!ignoreNonSpace)
2162 noMatch = true;
2163 if (noMatch)
2164 return false;
2165 charSortKey [3] = Uni.Level3 (si);
2166 if (charSortKey [0] != 1)
2167 ctx.PrevCode = si;
2169 if (ext == ExtenderType.None) {
2170 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
2171 if (Category (s [tmp]) != 1)
2172 break;
2173 if (ignoreNonSpace)
2174 continue;
2175 if (charSortKey [2] == 0)
2176 charSortKey [2] = 2;
2177 charSortKey [2] = (byte) (charSortKey [2]
2178 + Level2 (s [tmp], ExtenderType.None));
2181 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2183 #endregion