Fix linux build.
[mono-project.git] / mcs / class / corlib / Mono.Globalization.Unicode / SimpleCollator.cs
blobd7e70785590b07f2871f6ab15bb6996f04bceace
1 //
2 // SimpleCollator.cs : the core collator implementation
3 //
4 // Author:
5 // Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // Copyright (C) 2005 Novell, Inc (http://www.novell.com)
8 //
9 // Permission is hereby granted, free of charge, to any person obtaining
10 // a copy of this software and associated documentation files (the
11 // "Software"), to deal in the Software without restriction, including
12 // without limitation the rights to use, copy, modify, merge, publish,
13 // distribute, sublicense, and/or sell copies of the Software, and to
14 // permit persons to whom the Software is furnished to do so, subject to
15 // the following conditions:
16 //
17 // The above copyright notice and this permission notice shall be
18 // included in all copies or substantial portions of the Software.
19 //
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 // Here's a summary for supporting contractions, expansions and diacritical
31 // remappings.
33 // Diacritical mapping is a simple tailoring rule that "remaps" diacritical
34 // weight value from one to another. For now it applies to all range of
35 // characters, but at some stage we might need to limit the remapping ranges.
37 // A Contraction consists of a string (actually char[]) and a sortkey entry
38 // (i.e. byte[]). It indicates that a sequence of characters is interpreted
39 // as a single character that has the mapped sortkey value. There is no
40 // character which goes across "special rules". When the collator encountered
41 // such a sequence of characters, it just returns the sortkey value without
42 // further replacement.
44 // Since it is still likely to happen that a contraction sequence matches
45 // other character than the identical sequence (depending on CompareOptions
46 // and of course, defined sortkey value itself), comparison cannot be done
47 // at source char[] level.
49 // In IndexOf(), the first character in the target string or the target char
50 // itself is turned into sortkey bytes. If the character has a contraction and
51 // that is sortkey map, then it is used instead. If the contraction exists and
52 // that is replacement map, then the first character of the replacement string
53 // is searched instead. IndexOf() always searches only for the top character,
54 // and if it returned negative value, then it returns -1. Otherwise, it then
55 // tries IsPrefix() from that location. If it returns true, then it returns
56 // the index.
58 // LAMESPEC: IndexOf() is lame as a whole API. It never matches in the middle
59 // of expansion and there is no proper way to return such indexes within
60 // a single int return value.
62 // For example, try below in .NET:
63 // IndexOf("\u00E6", "a")
64 // IndexOf("\u00E6", "e")
68 using System;
69 using System.Collections;
70 using System.Globalization;
72 using Uni = Mono.Globalization.Unicode.MSCompatUnicodeTable;
73 using UUtil = Mono.Globalization.Unicode.MSCompatUnicodeTableUtil;
74 using COpt = System.Globalization.CompareOptions;
76 namespace Mono.Globalization.Unicode
78 internal class SimpleCollator
80 // this environment variable is for debugging quick check.
81 #pragma warning disable 169, 414
82 static bool QuickCheckDisabled =
83 Environment.internalGetEnvironmentVariable (
84 "MONO_COLLATION_QUICK_CHECK_DISABLED") == "yes";
85 #pragma warning restore 169, 414
87 unsafe internal struct Context
89 public Context (CompareOptions opt, byte* alwaysMatchFlags, byte* neverMatchFlags, byte* buffer1, byte* buffer2, byte* prev1, bool quickCheckPossible)
91 Option = opt;
92 AlwaysMatchFlags = alwaysMatchFlags;
93 NeverMatchFlags = neverMatchFlags;
94 Buffer1 = buffer1;
95 Buffer2 = buffer2;
96 PrevSortKey = prev1;
97 PrevCode = -1;
98 QuickCheckPossible = quickCheckPossible;
101 public readonly CompareOptions Option;
102 public readonly byte* NeverMatchFlags;
103 public readonly byte* AlwaysMatchFlags;
104 public byte* Buffer1;
105 public byte* Buffer2;
106 public int PrevCode;
107 public byte* PrevSortKey;
108 public readonly bool QuickCheckPossible;
110 public void ClearPrevInfo ()
112 PrevCode = -1;
113 PrevSortKey = null;
117 unsafe struct PreviousInfo
119 public int Code;
120 public byte* SortKey;
122 public PreviousInfo (bool dummy)
124 Code = -1;
125 SortKey = null;
129 struct Escape
131 public string Source;
132 public int Index;
133 public int Start;
134 public int End;
135 public int Optional;
138 static SimpleCollator invariant =
139 new SimpleCollator (CultureInfo.InvariantCulture);
141 readonly TextInfo textInfo; // for ToLower().
142 readonly CodePointIndexer cjkIndexer;
143 readonly Contraction [] contractions;
144 readonly Level2Map [] level2Maps;
145 // This flag marks characters as "unsafe", where the character
146 // could be used as part of a contraction (whose length > 1).
147 readonly byte [] unsafeFlags;
149 unsafe readonly byte* cjkCatTable;
150 unsafe readonly byte* cjkLv1Table;
151 unsafe readonly byte* cjkLv2Table;
152 readonly CodePointIndexer cjkLv2Indexer;
153 readonly int lcid;
154 readonly bool frenchSort;
157 const int UnsafeFlagLength = 0x300 / 8;
159 // readonly byte [] contractionFlags = new byte [16];
161 // This array is internally used inside IndexOf() to store
162 // "no need to check" ASCII characters.
164 // Now that it should be thread safe, this array is allocated
165 // at every time.
166 // byte [] neverMatchFlags = new byte [128 / 8];
168 #region .ctor() and split functions
170 public SimpleCollator (CultureInfo culture)
172 lcid = culture.LCID;
173 textInfo = culture.TextInfo;
175 unsafe {
176 SetCJKTable (culture, ref cjkIndexer,
177 ref cjkCatTable, ref cjkLv1Table,
178 ref cjkLv2Indexer, ref cjkLv2Table);
181 // Get tailoring info
182 TailoringInfo t = null;
183 for (CultureInfo ci = culture; ci.LCID != 127; ci = ci.Parent) {
184 t = Uni.GetTailoringInfo (ci.LCID);
185 if (t != null)
186 break;
188 if (t == null) // then use invariant
189 t = Uni.GetTailoringInfo (127);
191 frenchSort = t.FrenchSort;
192 Uni.BuildTailoringTables (culture, t, ref contractions,
193 ref level2Maps);
194 unsafeFlags = new byte [UnsafeFlagLength];
195 Console.WriteLine ("contractions == {0}", contractions == null);
196 foreach (Contraction c in contractions) {
197 if (c.Source.Length > 1)
198 foreach (char ch in c.Source)
199 unsafeFlags [(int) ch / 8 ]
200 |= (byte) (1 << ((int) ch & 7));
201 // for (int i = 0; i < c.Source.Length; i++) {
202 // int ch = c.Source [i];
203 // if (ch > 127)
204 // continue;
205 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
206 // }
208 if (lcid != 127)
209 foreach (Contraction c in invariant.contractions) {
210 if (c.Source.Length > 1)
211 foreach (char ch in c.Source)
212 unsafeFlags [(int) ch / 8 ]
213 |= (byte) (1 << ((int) ch & 7));
214 // for (int i = 0; i < c.Source.Length; i++) {
215 // int ch = c.Source [i];
216 // if (ch > 127)
217 // continue;
218 // contractionFlags [ch / 8] |= (byte) (1 << (ch & 7));
219 // }
222 // FIXME: Since tailorings are mostly for latin
223 // (and in some cases Cyrillic) characters, it would
224 // be much better for performance to store "start
225 // indexes" for > 370 (culture-specific letters).
228 // dump tailoring table
229 Console.WriteLine ("******** building table for {0} : contractions - {1} diacritical - {2}",
230 culture.LCID, contractions.Length, level2Maps.Length);
231 foreach (Contraction c in contractions) {
232 foreach (char cc in c.Source)
233 Console.Write ("{0:X4} ", (int) cc);
234 Console.WriteLine (" -> '{0}'", c.Replacement);
239 unsafe private void SetCJKTable (
240 CultureInfo culture, ref CodePointIndexer cjkIndexer,
241 ref byte* catTable, ref byte* lv1Table,
242 ref CodePointIndexer lv2Indexer, ref byte* lv2Table)
244 string name = GetNeutralCulture (culture).Name;
246 Uni.FillCJK (name, ref cjkIndexer, ref catTable,
247 ref lv1Table, ref lv2Indexer, ref lv2Table);
250 static CultureInfo GetNeutralCulture (CultureInfo info)
252 CultureInfo ret = info;
253 while (ret.Parent != null && ret.Parent.LCID != 127)
254 ret = ret.Parent;
255 return ret;
258 #endregion
260 unsafe byte Category (int cp)
262 if (cp < 0x3000 || cjkCatTable == null)
263 return Uni.Category (cp);
264 int idx = cjkIndexer.ToIndex (cp);
265 return idx < 0 ? Uni.Category (cp) : cjkCatTable [idx];
268 unsafe byte Level1 (int cp)
270 if (cp < 0x3000 || cjkLv1Table == null)
271 return Uni.Level1 (cp);
272 int idx = cjkIndexer.ToIndex (cp);
273 return idx < 0 ? Uni.Level1 (cp) : cjkLv1Table [idx];
276 unsafe byte Level2 (int cp, ExtenderType ext)
278 if (ext == ExtenderType.Buggy)
279 return 5;
280 else if (ext == ExtenderType.Conditional)
281 return 0;
283 if (cp < 0x3000 || cjkLv2Table == null)
284 return Uni.Level2 (cp);
285 int idx = cjkLv2Indexer.ToIndex (cp);
286 byte ret = idx < 0 ? (byte) 0 : cjkLv2Table [idx];
287 if (ret != 0)
288 return ret;
289 ret = Uni.Level2 (cp);
290 if (level2Maps.Length == 0)
291 return ret;
292 for (int i = 0; i < level2Maps.Length; i++) {
293 if (level2Maps [i].Source == ret)
294 return level2Maps [i].Replace;
295 else if (level2Maps [i].Source > ret)
296 break;
298 return ret;
301 static bool IsHalfKana (int cp, COpt opt)
303 return (opt & COpt.IgnoreWidth) != 0 ||
304 Uni.IsHalfWidthKana ((char) cp);
307 Contraction GetContraction (string s, int start, int end)
309 // int si = s [start];
310 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
311 // return null;
312 Contraction c = GetContraction (s, start, end, contractions);
313 if (c != null || lcid == 127)
314 return c;
315 return GetContraction (s, start, end, invariant.contractions);
318 Contraction GetContraction (string s, int start, int end, Contraction [] clist)
320 for (int i = 0; i < clist.Length; i++) {
321 Contraction ct = clist [i];
322 int diff = ct.Source [0] - s [start];
323 if (diff > 0)
324 return null; // it's already sorted
325 else if (diff < 0)
326 continue;
327 char [] chars = ct.Source;
328 if (end - start < chars.Length)
329 continue;
330 bool match = true;
331 for (int n = 0; n < chars.Length; n++)
332 if (s [start + n] != chars [n]) {
333 match = false;
334 break;
336 if (match)
337 return ct;
339 return null;
342 Contraction GetTailContraction (string s, int start, int end)
344 // int si = s [end - 1];
345 // if (si < 128 && (contractionFlags [si / 8] & (1 << (si & 7))) == 0)
346 // return null;
347 Contraction c = GetTailContraction (s, start, end, contractions);
348 if (c != null || lcid == 127)
349 return c;
350 return GetTailContraction (s, start, end, invariant.contractions);
353 Contraction GetTailContraction (string s, int start, int end, Contraction [] clist)
355 if (start == end || end < -1 || start >= s.Length || s.Length <= end + 1)
356 throw new SystemException (String.Format ("MONO internal error. Failed to get TailContraction. start = {0} end = {1} string = '{2}'", start, end, s));
357 for (int i = 0; i < clist.Length; i++) {
358 Contraction ct = clist [i];
359 char [] chars = ct.Source;
360 if (chars.Length > start - end)
361 continue;
362 if (chars [chars.Length - 1] != s [start])
363 continue;
365 bool match = true;
366 for (int n = 0, spos = start - chars.Length + 1;
367 n < chars.Length;
368 n++, spos++) {
369 if (s [spos] != chars [n]) {
370 match = false;
371 break;
374 if (match)
375 return ct;
377 return null;
380 Contraction GetContraction (char c)
382 Contraction ct = GetContraction (c, contractions);
383 if (ct != null || lcid == 127)
384 return ct;
385 return GetContraction (c, invariant.contractions);
388 Contraction GetContraction (char c, Contraction [] clist)
390 for (int i = 0; i < clist.Length; i++) {
391 Contraction ct = clist [i];
392 if (ct.Source [0] > c)
393 return null; // it's already sorted
394 if (ct.Source [0] == c && ct.Source.Length == 1)
395 return ct;
397 return null;
400 int FilterOptions (int i, COpt opt)
402 if ((opt & COpt.IgnoreWidth) != 0) {
403 int x = Uni.ToWidthCompat (i);
404 if (x != 0)
405 i = x;
407 if ((opt & COpt.OrdinalIgnoreCase) != 0)
408 i = textInfo.ToLower ((char) i);
409 if ((opt & COpt.IgnoreCase) != 0)
410 i = textInfo.ToLower ((char) i);
411 if ((opt & COpt.IgnoreKanaType) != 0)
412 i = Uni.ToKanaTypeInsensitive (i);
413 return i;
416 enum ExtenderType {
417 None,
418 Simple,
419 Voiced,
420 Conditional,
421 Buggy,
424 ExtenderType GetExtenderType (int i)
426 // LAMESPEC: Windows expects true for U+3005, but
427 // sometimes it does not represent to repeat just
428 // one character.
429 // Windows also expects true for U+3031 and U+3032,
430 // but they should *never* repeat one character.
432 // U+2015 becomes an extender only when it is Japanese
433 if (i == 0x2015)
434 return lcid == 16 ? ExtenderType.Conditional :
435 ExtenderType.None;
437 if (i < 0x3005 || i > 0xFF70)
438 return ExtenderType.None;
439 if (i >= 0xFE7C) {
440 switch (i) {
441 case 0xFE7C:
442 case 0xFE7D:
443 return ExtenderType.Simple;
444 case 0xFF70:
445 return ExtenderType.Conditional;
446 case 0xFF9E:
447 case 0xFF9F:
448 return ExtenderType.Voiced;
451 if (i > 0x30FE)
452 return ExtenderType.None;
453 switch (i) {
454 case 0x3005: // LAMESPEC: see above.
455 return ExtenderType.Buggy;
456 case 0x3031: // LAMESPEC: see above.
457 case 0x3032: // LAMESPEC: see above.
458 case 0x309D:
459 case 0x30FD:
460 return ExtenderType.Simple;
461 case 0x309E:
462 case 0x30FE:
463 return ExtenderType.Voiced;
464 case 0x30FC:
465 return ExtenderType.Conditional;
466 default:
467 return ExtenderType.None;
471 static byte ToDashTypeValue (ExtenderType ext, COpt opt)
473 if ((opt & COpt.IgnoreNonSpace) != 0) // LAMESPEC: huh, why?
474 return 3;
475 switch (ext) {
476 case ExtenderType.None:
477 return 3;
478 case ExtenderType.Conditional:
479 return 5;
480 default:
481 return 4;
485 int FilterExtender (int i, ExtenderType ext, COpt opt)
487 if (ext == ExtenderType.Conditional &&
488 Uni.HasSpecialWeight ((char) i)) {
489 bool half = IsHalfKana ((char) i, opt);
490 bool katakana = !Uni.IsHiragana ((char) i);
491 switch (Level1 (i) & 7) {
492 case 2:
493 return half ? 0xFF71 : katakana ? 0x30A2 : 0x3042;
494 case 3:
495 return half ? 0xFF72 : katakana ? 0x30A4 : 0x3044;
496 case 4:
497 return half ? 0xFF73 : katakana ? 0x30A6 : 0x3046;
498 case 5:
499 return half ? 0xFF74 : katakana ? 0x30A8 : 0x3048;
500 case 6:
501 return half ? 0xFF75 : katakana ? 0x30AA : 0x304A;
504 return i;
507 static bool IsIgnorable (int i, COpt opt)
509 return Uni.IsIgnorable (i, (byte) (1 +
510 ((opt & COpt.IgnoreSymbols) != 0 ? 2 : 0) +
511 ((opt & COpt.IgnoreNonSpace) != 0 ? 4 : 0)));
515 bool IsSafe (int i)
517 return i / 8 >= unsafeFlags.Length ? true : (unsafeFlags [i / 8] & (1 << (i % 8))) == 0;
520 #region GetSortKey()
522 public SortKey GetSortKey (string s)
524 return GetSortKey (s, CompareOptions.None);
527 public SortKey GetSortKey (string s, CompareOptions options)
529 return GetSortKey (s, 0, s.Length, options);
532 public SortKey GetSortKey (string s, int start, int length, CompareOptions options)
534 SortKeyBuffer buf = new SortKeyBuffer (lcid);
535 buf.Initialize (options, lcid, s, frenchSort);
536 int end = start + length;
537 GetSortKey (s, start, end, buf, options);
538 return buf.GetResultAndReset ();
541 unsafe void GetSortKey (string s, int start, int end,
542 SortKeyBuffer buf, CompareOptions opt)
544 byte* prevbuf = stackalloc byte [4];
545 ClearBuffer (prevbuf, 4);
546 Context ctx = new Context (opt, null, null, null, null, prevbuf, false);
548 for (int n = start; n < end; n++) {
549 int i = s [n];
551 ExtenderType ext = GetExtenderType (i);
552 if (ext != ExtenderType.None) {
553 i = FilterExtender (ctx.PrevCode, ext, opt);
554 if (i >= 0)
555 FillSortKeyRaw (i, ext, buf, opt);
556 else if (ctx.PrevSortKey != null) {
557 byte* b = ctx.PrevSortKey;
558 buf.AppendNormal (
559 b [0],
560 b [1],
561 b [2] != 1 ? b [2] : Level2 (i, ext),
562 b [3] != 1 ? b [3] : Uni.Level3 (i));
564 // otherwise do nothing.
565 // (if the extender is the first char
566 // in the string, then just ignore.)
567 continue;
570 if (IsIgnorable (i, opt))
571 continue;
572 i = FilterOptions (i, opt);
574 Contraction ct = GetContraction (s, n, end);
575 if (ct != null) {
576 if (ct.Replacement != null) {
577 GetSortKey (ct.Replacement, 0, ct.Replacement.Length, buf, opt);
578 } else {
579 byte* b = ctx.PrevSortKey;
580 for (int bi = 0; bi < ct.SortKey.Length; bi++)
581 b [bi] = ct.SortKey [bi];
582 buf.AppendNormal (
583 b [0],
584 b [1],
585 b [2] != 1 ? b [2] : Level2 (i, ext),
586 b [3] != 1 ? b [3] : Uni.Level3 (i));
587 ctx.PrevCode = -1;
589 n += ct.Source.Length - 1;
591 else {
592 if (!Uni.IsIgnorableNonSpacing (i))
593 ctx.PrevCode = i;
594 FillSortKeyRaw (i, ExtenderType.None, buf, opt);
599 void FillSortKeyRaw (int i, ExtenderType ext,
600 SortKeyBuffer buf, CompareOptions opt)
602 if (0x3400 <= i && i <= 0x4DB5) {
603 int diff = i - 0x3400;
604 buf.AppendCJKExtension (
605 (byte) (0x10 + diff / 254),
606 (byte) (diff % 254 + 2));
607 return;
610 UnicodeCategory uc = char.GetUnicodeCategory ((char) i);
611 switch (uc) {
612 case UnicodeCategory.PrivateUse:
613 int diff = i - 0xE000;
614 buf.AppendNormal (
615 (byte) (0xE5 + diff / 254),
616 (byte) (diff % 254 + 2),
619 return;
620 case UnicodeCategory.Surrogate:
621 FillSurrogateSortKeyRaw (i, buf);
622 return;
625 byte level2 = Level2 (i, ext);
626 if (Uni.HasSpecialWeight ((char) i)) {
627 byte level1 = Level1 (i);
628 buf.AppendKana (
629 Category (i),
630 level1,
631 level2,
632 Uni.Level3 (i),
633 Uni.IsJapaneseSmallLetter ((char) i),
634 ToDashTypeValue (ext, opt),
635 !Uni.IsHiragana ((char) i),
636 IsHalfKana ((char) i, opt)
638 if ((opt & COpt.IgnoreNonSpace) == 0 && ext == ExtenderType.Voiced)
639 // Append voice weight
640 buf.AppendNormal (1, 1, 1, 0);
642 else
643 buf.AppendNormal (
644 Category (i),
645 Level1 (i),
646 level2,
647 Uni.Level3 (i));
650 void FillSurrogateSortKeyRaw (int i, SortKeyBuffer buf)
652 int diffbase = 0;
653 int segment = 0;
654 byte lower = 0;
656 if (i < 0xD840) {
657 diffbase = 0xD800;
658 segment = 0x41;
659 lower = (byte) ((i == 0xD800) ? 0x3E : 0x3F);
660 } else if (0xD840 <= i && i < 0xD880) {
661 diffbase = 0xD840;
662 segment = 0xF2;
663 lower = 0x3E;
664 } else if (0xDB80 <= i && i < 0xDC00) {
665 diffbase = 0xDB80 - 0x40;
666 segment = 0xFE;
667 lower = 0x3E;
668 } else {
669 diffbase = 0xDC00 - 0xF8 + 2;
670 segment = 0x41;
671 lower = 0x3F;
673 int diff = i - diffbase;
675 buf.AppendNormal (
676 (byte) (segment + diff / 254),
677 (byte) (diff % 254 + 2),
678 lower,
679 lower);
682 #endregion
684 #region Compare()
686 public int Compare (string s1, string s2)
688 return Compare (s1, s2, CompareOptions.None);
691 public int Compare (string s1, string s2, CompareOptions options)
693 return Compare (s1, 0, s1.Length, s2, 0, s2.Length, options);
696 private int CompareOrdinal (string s1, int idx1, int len1,
697 string s2, int idx2, int len2)
699 int min = len1 < len2 ? len1 : len2;
700 int end1 = idx1 + min;
701 int end2 = idx2 + min;
702 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
703 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
704 for (int i1 = idx1, i2 = idx2;
705 i1 < end1 && i2 < end2; i1++, i2++)
706 if (s1 [i1] != s2 [i2])
707 return s1 [i1] - s2 [i2];
708 return len1 == len2 ? 0 :
709 len1 == min ? - 1 : 1;
712 // mostly equivalent to CompareOrdinal, but the return value is
713 // not based on codepoints.
714 private int CompareQuick (string s1, int idx1, int len1,
715 string s2, int idx2, int len2, out bool sourceConsumed,
716 out bool targetConsumed, bool immediateBreakup)
718 sourceConsumed = false;
719 targetConsumed = false;
720 int min = len1 < len2 ? len1 : len2;
721 int end1 = idx1 + min;
722 int end2 = idx2 + min;
723 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
724 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
725 for (int i1 = idx1, i2 = idx2;
726 i1 < end1 && i2 < end2; i1++, i2++)
727 if (s1 [i1] != s2 [i2]) {
728 if (immediateBreakup)
729 return -1;
730 int ret = Category (s1 [i1]) - Category (s2 [i2]);
731 if (ret == 0)
732 ret = Level1 (s1 [i1]) - Level1 (s2 [i2]);
733 // no level2 and 4
734 if (ret == 0)
735 ret = Uni.Level3 (s1 [i1]) - Uni.Level3 (s2 [i2]);
736 if (ret == 0)
737 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. '{0}' {2} {3} '{1}' {4} {5}", s1, s2, idx1, end1, idx2, end2));
738 return ret;
740 sourceConsumed = len1 <= len2;
741 targetConsumed = len1 >= len2;
742 return len1 == len2 ? 0 :
743 len1 == min ? - 1 : 1;
746 private int CompareOrdinalIgnoreCase (string s1, int idx1, int len1,
747 string s2, int idx2, int len2)
749 int min = len1 < len2 ? len1 : len2;
750 int end1 = idx1 + min;
751 int end2 = idx2 + min;
752 if (idx1 < 0 || idx2 < 0 || end1 > s1.Length || end2 > s2.Length)
753 throw new SystemException (String.Format ("CompareInfo Internal Error: Should not happen. {0} {1} {2} {3} {4} {5}", idx1, idx2, len1, len2, s1.Length, s2.Length));
754 TextInfo ti = invariant.textInfo;
755 for (int i1 = idx1, i2 = idx2;
756 i1 < end1 && i2 < end2; i1++, i2++)
757 if (ti.ToLower (s1 [i1]) != ti.ToLower (s2 [i2]))
758 return ti.ToLower (s1 [i1]) - ti.ToLower (s2 [i2]);
759 return len1 == len2 ? 0 :
760 len1 == min ? - 1 : 1;
763 public unsafe int Compare (string s1, int idx1, int len1,
764 string s2, int idx2, int len2, CompareOptions options)
766 // quick equality check
767 if (idx1 == idx2 && len1 == len2 &&
768 Object.ReferenceEquals (s1, s2))
769 return 0;
770 if (options == CompareOptions.Ordinal)
771 return CompareOrdinal (s1, idx1, len1, s2, idx2, len2);
772 if (options == CompareOptions.OrdinalIgnoreCase)
773 return CompareOrdinalIgnoreCase (s1, idx1, len1, s2, idx2, len2);
775 #if false // stable easy version, depends on GetSortKey().
776 SortKey sk1 = GetSortKey (s1, idx1, len1, options);
777 SortKey sk2 = GetSortKey (s2, idx2, len2, options);
778 byte [] d1 = sk1.KeyData;
779 byte [] d2 = sk2.KeyData;
780 int len = d1.Length > d2.Length ? d2.Length : d1.Length;
781 for (int i = 0; i < len; i++)
782 if (d1 [i] != d2 [i])
783 return d1 [i] < d2 [i] ? -1 : 1;
784 return d1.Length == d2.Length ? 0 : d1.Length < d2.Length ? -1 : 1;
785 #else
786 byte* sk1 = stackalloc byte [4];
787 byte* sk2 = stackalloc byte [4];
788 ClearBuffer (sk1, 4);
789 ClearBuffer (sk2, 4);
790 Context ctx = new Context (options, null, null, sk1, sk2, null,
791 QuickCheckPossible (s1, idx1, idx1 + len1, s2, idx2, idx2 + len2));
793 bool dummy, dummy2;
794 int ret = CompareInternal (s1, idx1, len1, s2, idx2, len2, out dummy, out dummy2, true, false, ref ctx);
795 return ret == 0 ? 0 : ret < 0 ? -1 : 1;
796 #endif
799 unsafe void ClearBuffer (byte* buffer, int size)
801 for (int i = 0; i < size; i++)
802 buffer [i] = 0;
805 bool QuickCheckPossible (string s1, int idx1, int end1,
806 string s2, int idx2, int end2)
808 #if true
809 // looks like it is buggy and inefficient, so just disable it.
810 return false;
811 #else
812 if (QuickCheckDisabled)
813 return false;
814 // if (s1.Length > 100 || s2.Length > 100)
815 // return false;
816 for (int i = idx1; i < end1; i++)
817 if (s1 [i] < 0x20 && (s1 [i] < '\x9' || s1 [i] > '\xD') || s1 [i] >= 0x80 || s1 [i] == '-' || s1 [i] == '\'')
818 return false;
819 for (int i = idx2; i < end2; i++)
820 if (s2 [i] < 0x20 && (s2 [i] < '\x9' || s2 [i] > '\xD') || s2 [i] >= 0x80 || s2 [i] == '-' || s2 [i] == '\'')
821 return false;
822 return true;
823 #endif
826 unsafe int CompareInternal (string s1, int idx1, int len1, string s2,
827 int idx2, int len2,
828 out bool targetConsumed, out bool sourceConsumed,
829 bool skipHeadingExtenders, bool immediateBreakup,
830 ref Context ctx)
832 COpt opt = ctx.Option;
833 int start1 = idx1;
834 int start2 = idx2;
835 int end1 = idx1 + len1;
836 int end2 = idx2 + len2;
837 targetConsumed = false;
838 sourceConsumed = false;
839 PreviousInfo prev2 = new PreviousInfo (false);
841 if (opt == CompareOptions.None && ctx.QuickCheckPossible)
842 return CompareQuick (s1, idx1, len1, s2, idx2, len2, out sourceConsumed, out targetConsumed, immediateBreakup);
844 // It holds final result that comes from the comparison
845 // at level 2 or lower. Even if Compare() found the
846 // difference at level 2 or lower, it still has to
847 // continue level 1 comparison. FinalResult is used
848 // when there was no further differences.
849 int finalResult = 0;
850 // It holds the comparison level to do. It starts from
851 // 5, and becomes 1 when we do primary-only comparison.
852 int currentLevel = 5;
854 int lv5At1 = -1;
855 int lv5At2 = -1;
856 int lv5Value1 = 0;
857 int lv5Value2 = 0;
859 // Skip heading extenders
860 if (skipHeadingExtenders) {
861 for (; idx1 < end1; idx1++)
862 if (GetExtenderType (s1 [idx1]) == ExtenderType.None)
863 break;
864 for (; idx2 < end2; idx2++)
865 if (GetExtenderType (s2 [idx2]) == ExtenderType.None)
866 break;
869 ExtenderType ext1 = ExtenderType.None;
870 ExtenderType ext2 = ExtenderType.None;
872 int quickCheckPos1 = idx1;
873 int quickCheckPos2 = idx2;
874 bool stringSort = (opt & COpt.StringSort) != 0;
875 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
876 Escape escape1 = new Escape ();
877 Escape escape2 = new Escape ();
879 while (true) {
880 for (; idx1 < end1; idx1++)
881 if (!IsIgnorable (s1 [idx1], opt))
882 break;
883 for (; idx2 < end2; idx2++)
884 if (!IsIgnorable (s2 [idx2], opt))
885 break;
887 if (idx1 >= end1) {
888 if (escape1.Source == null)
889 break;
890 s1 = escape1.Source;
891 start1 = escape1.Start;
892 idx1 = escape1.Index;
893 end1 = escape1.End;
894 quickCheckPos1 = escape1.Optional;
895 escape1.Source = null;
896 continue;
898 if (idx2 >= end2) {
899 if (escape2.Source == null)
900 break;
901 s2 = escape2.Source;
902 start2 = escape2.Start;
903 idx2 = escape2.Index;
904 end2 = escape2.End;
905 quickCheckPos2 = escape2.Optional;
906 escape2.Source = null;
907 continue;
909 #if true
910 // If comparison is unstable, then this part is one of the most doubtful part.
911 // Here we run quick codepoint comparison and run back to "stable" area to
912 // compare characters.
914 // Strictly to say, even the first character
915 // could be compared here, but it messed
916 // backward step, so I just avoided mess.
917 if (quickCheckPos1 < idx1 && quickCheckPos2 < idx2) {
918 while (idx1 < end1 && idx2 < end2 &&
919 s1 [idx1] == s2 [idx2]) {
920 idx1++;
921 idx2++;
923 if (idx1 == end1 || idx2 == end2)
924 continue; // check replacement
926 int backwardEnd1 = quickCheckPos1;
927 int backwardEnd2 = quickCheckPos2;
928 quickCheckPos1 = idx1;
929 quickCheckPos2 = idx2;
931 idx1--;
932 idx2--;
934 for (;idx1 > backwardEnd1; idx1--)
935 if (Category (s1 [idx1]) != 1)
936 break;
937 for (;idx2 > backwardEnd2; idx2--)
938 if (Category (s2 [idx2]) != 1)
939 break;
940 for (;idx1 > backwardEnd1; idx1--)
941 if (IsSafe (s1 [idx1]))
942 break;
943 for (;idx2 > backwardEnd2; idx2--)
944 if (IsSafe (s2 [idx2]))
945 break;
947 #endif
949 int cur1 = idx1;
950 int cur2 = idx2;
951 byte* sk1 = null;
952 byte* sk2 = null;
953 int i1 = FilterOptions (s1 [idx1], opt);
954 int i2 = FilterOptions (s2 [idx2], opt);
955 bool special1 = false;
956 bool special2 = false;
958 // If current character is an expander, then
959 // repeat the previous character.
960 ext1 = GetExtenderType (i1);
961 if (ext1 != ExtenderType.None) {
962 if (ctx.PrevCode < 0) {
963 if (ctx.PrevSortKey == null) {
964 // nothing to extend
965 idx1++;
966 continue;
968 sk1 = ctx.PrevSortKey;
970 else
971 i1 = FilterExtender (ctx.PrevCode, ext1, opt);
973 ext2 = GetExtenderType (i2);
974 if (ext2 != ExtenderType.None) {
975 if (prev2.Code < 0) {
976 if (prev2.SortKey == null) {
977 // nothing to extend
978 idx2++;
979 continue;
981 sk2 = prev2.SortKey;
983 else
984 i2 = FilterExtender (prev2.Code, ext2, opt);
987 byte cat1 = Category (i1);
988 byte cat2 = Category (i2);
990 // Handle special weight characters
991 if (cat1 == 6) {
992 if (!stringSort && currentLevel == 5) {
993 lv5At1 = escape1.Source != null ?
994 escape1.Index - escape1.Start :
995 cur1 - start1;
996 // here Windows has a bug that it does
997 // not consider thirtiary weight.
998 lv5Value1 = Level1 (i1) << 8 + Uni.Level3 (i1);
1000 ctx.PrevCode = i1;
1001 idx1++;
1003 if (cat2 == 6) {
1004 if (!stringSort && currentLevel == 5) {
1005 lv5At2 = escape2.Source != null ?
1006 escape2.Index - escape2.Start :
1007 cur2 - start2;
1008 // here Windows has a bug that it does
1009 // not consider thirtiary weight.
1010 lv5Value2 = Level1 (i2) << 8 + Uni.Level3 (i2);
1012 prev2.Code = i2;
1013 idx2++;
1015 if (cat1 == 6 || cat2 == 6) {
1016 if (currentLevel == 5) {
1017 if (lv5Value1 == lv5Value2) {
1018 // There is not really difference here.
1019 lv5At1 = lv5At2 = -1;
1020 lv5Value1 = lv5Value2 = 0;
1022 else
1023 currentLevel = 4;
1025 continue;
1028 Contraction ct1 = null;
1029 if (ext1 == ExtenderType.None)
1030 ct1 = GetContraction (s1, idx1, end1);
1032 int offset1 = 1;
1033 if (sk1 != null)
1034 offset1 = 1;
1035 else if (ct1 != null) {
1036 offset1 = ct1.Source.Length;
1037 if (ct1.SortKey != null) {
1038 sk1 = ctx.Buffer1;
1039 for (int i = 0; i < ct1.SortKey.Length; i++)
1040 sk1 [i] = ct1.SortKey [i];
1041 ctx.PrevCode = -1;
1042 ctx.PrevSortKey = sk1;
1044 else if (escape1.Source == null) {
1045 escape1.Source = s1;
1046 escape1.Start = start1;
1047 escape1.Index = cur1 + ct1.Source.Length;
1048 escape1.End = end1;
1049 escape1.Optional = quickCheckPos1;
1050 s1 = ct1.Replacement;
1051 idx1 = 0;
1052 start1 = 0;
1053 end1 = s1.Length;
1054 quickCheckPos1 = 0;
1055 continue;
1058 else {
1059 sk1 = ctx.Buffer1;
1060 sk1 [0] = cat1;
1061 sk1 [1] = Level1 (i1);
1062 if (!ignoreNonSpace && currentLevel > 1)
1063 sk1 [2] = Level2 (i1, ext1);
1064 if (currentLevel > 2)
1065 sk1 [3] = Uni.Level3 (i1);
1066 if (currentLevel > 3)
1067 special1 = Uni.HasSpecialWeight ((char) i1);
1068 if (cat1 > 1)
1069 ctx.PrevCode = i1;
1072 Contraction ct2 = null;
1073 if (ext2 == ExtenderType.None)
1074 ct2 = GetContraction (s2, idx2, end2);
1076 if (sk2 != null)
1077 idx2++;
1078 else if (ct2 != null) {
1079 idx2 += ct2.Source.Length;
1080 if (ct2.SortKey != null) {
1081 sk2 = ctx.Buffer2;
1082 for (int i = 0; i < ct2.SortKey.Length; i++)
1083 sk2 [i] = ct2.SortKey [i];
1084 prev2.Code = -1;
1085 prev2.SortKey = sk2;
1087 else if (escape2.Source == null) {
1088 escape2.Source = s2;
1089 escape2.Start = start2;
1090 escape2.Index = cur2 + ct2.Source.Length;
1091 escape2.End = end2;
1092 escape2.Optional = quickCheckPos2;
1093 s2 = ct2.Replacement;
1094 idx2 = 0;
1095 start2 = 0;
1096 end2 = s2.Length;
1097 quickCheckPos2 = 0;
1098 continue;
1101 else {
1102 sk2 = ctx.Buffer2;
1103 sk2 [0] = cat2;
1104 sk2 [1] = Level1 (i2);
1105 if (!ignoreNonSpace && currentLevel > 1)
1106 sk2 [2] = Level2 (i2, ext2);
1107 if (currentLevel > 2)
1108 sk2 [3] = Uni.Level3 (i2);
1109 if (currentLevel > 3)
1110 special2 = Uni.HasSpecialWeight ((char) i2);
1111 if (cat2 > 1)
1112 prev2.Code = i2;
1113 idx2++;
1116 // Add offset here so that it does not skip
1117 // idx1 while s2 has a replacement.
1118 idx1 += offset1;
1120 if (!ignoreNonSpace) {
1121 // add diacritical marks in s1 here
1122 while (idx1 < end1) {
1123 if (Category (s1 [idx1]) != 1)
1124 break;
1125 if (sk1 [2] == 0)
1126 sk1 [2] = 2;
1127 sk1 [2] = (byte) (sk1 [2] +
1128 Level2 (s1 [idx1], ExtenderType.None));
1129 idx1++;
1132 // add diacritical marks in s2 here
1133 while (idx2 < end2) {
1134 if (Category (s2 [idx2]) != 1)
1135 break;
1136 if (sk2 [2] == 0)
1137 sk2 [2] = 2;
1138 sk2 [2] = (byte) (sk2 [2] +
1139 Level2 (s2 [idx2], ExtenderType.None));
1140 idx2++;
1144 int ret = sk1 [0] - sk2 [0];
1145 ret = ret != 0 ? ret : sk1 [1] - sk2 [1];
1146 if (ret != 0)
1147 return ret;
1148 if (currentLevel == 1)
1149 continue;
1150 if (!ignoreNonSpace) {
1151 ret = sk1 [2] - sk2 [2];
1152 if (ret != 0) {
1153 finalResult = ret;
1154 if (immediateBreakup)
1155 return -1; // different
1156 currentLevel = frenchSort ? 2 : 1;
1157 continue;
1160 if (currentLevel == 2)
1161 continue;
1162 ret = sk1 [3] - sk2 [3];
1163 if (ret != 0) {
1164 finalResult = ret;
1165 if (immediateBreakup)
1166 return -1; // different
1167 currentLevel = 2;
1168 continue;
1170 if (currentLevel == 3)
1171 continue;
1172 if (special1 != special2) {
1173 if (immediateBreakup)
1174 return -1; // different
1175 finalResult = special1 ? 1 : -1;
1176 currentLevel = 3;
1177 continue;
1179 if (special1) {
1180 ret = CompareFlagPair (
1181 !Uni.IsJapaneseSmallLetter ((char) i1),
1182 !Uni.IsJapaneseSmallLetter ((char) i2));
1183 ret = ret != 0 ? ret :
1184 ToDashTypeValue (ext1, opt) -
1185 ToDashTypeValue (ext2, opt);
1186 ret = ret != 0 ? ret : CompareFlagPair (
1187 Uni.IsHiragana ((char) i1),
1188 Uni.IsHiragana ((char) i2));
1189 ret = ret != 0 ? ret : CompareFlagPair (
1190 !IsHalfKana ((char) i1, opt),
1191 !IsHalfKana ((char) i2, opt));
1192 if (ret != 0) {
1193 if (immediateBreakup)
1194 return -1; // different
1195 finalResult = ret;
1196 currentLevel = 3;
1197 continue;
1202 // If there were only level 3 or lower differences,
1203 // then we still have to find diacritical differences
1204 // if any.
1205 if (!ignoreNonSpace &&
1206 finalResult != 0 && currentLevel > 2) {
1207 while (idx1 < end1 && idx2 < end2) {
1208 if (!Uni.IsIgnorableNonSpacing (s1 [idx1]))
1209 break;
1210 if (!Uni.IsIgnorableNonSpacing (s2 [idx2]))
1211 break;
1212 finalResult = Level2 (FilterOptions (s1 [idx1], opt), ext1) - Level2 (FilterOptions (s2 [idx2], opt), ext2);
1213 if (finalResult != 0)
1214 break;
1215 idx1++;
1216 idx2++;
1217 // they should work only for the first character
1218 ext1 = ExtenderType.None;
1219 ext2 = ExtenderType.None;
1222 if (currentLevel == 1 && finalResult != 0) {
1223 while (idx1 < end1) {
1224 if (Uni.IsIgnorableNonSpacing (s1 [idx1]))
1225 idx1++;
1226 else
1227 break;
1229 while (idx2 < end2) {
1230 if (Uni.IsIgnorableNonSpacing (s2 [idx2]))
1231 idx2++;
1232 else
1233 break;
1236 // we still have to check level 5
1237 if (finalResult == 0) {
1238 if (lv5At1 < 0 && lv5At2 >= 0)
1239 finalResult = -1;
1240 else if (lv5At2 < 0 && lv5At1 >= 0)
1241 finalResult = 1;
1242 else {
1243 finalResult = lv5At1 - lv5At2;
1244 if (finalResult == 0)
1245 finalResult = lv5Value1 - lv5Value2;
1248 if (finalResult == 0) {
1249 if (idx2 == end2)
1250 targetConsumed = true;
1251 if (idx1 == end1)
1252 sourceConsumed = true;
1254 return idx1 != end1 ? 1 : idx2 == end2 ? finalResult : -1;
1257 int CompareFlagPair (bool b1, bool b2)
1259 return b1 == b2 ? 0 : b1 ? 1 : -1;
1262 #endregion
1264 #region IsPrefix() and IsSuffix()
1266 public bool IsPrefix (string src, string target, CompareOptions opt)
1268 return IsPrefix (src, target, 0, src.Length, opt);
1271 public unsafe bool IsPrefix (string s, string target, int start, int length, CompareOptions opt)
1273 if (target.Length == 0)
1274 return true;
1275 byte* sk1 = stackalloc byte [4];
1276 byte* sk2 = stackalloc byte [4];
1277 ClearBuffer (sk1, 4);
1278 ClearBuffer (sk2, 4);
1279 Context ctx = new Context (opt, null, null, sk1, sk2, null,
1280 QuickCheckPossible (s, start, start + length, target, 0, target.Length));
1281 return IsPrefix (s, target, start, length, true, ref ctx);
1284 unsafe bool IsPrefix (string s, string target, int start, int length, bool skipHeadingExtenders, ref Context ctx)
1286 bool consumed, dummy;
1287 CompareInternal (s, start, length,
1288 target, 0, target.Length,
1289 out consumed, out dummy, skipHeadingExtenders,
1290 true, ref ctx);
1291 return consumed;
1294 // IsSuffix()
1296 public bool IsSuffix (string src, string target, CompareOptions opt)
1298 return IsSuffix (src, target, src.Length - 1, src.Length, opt);
1301 public unsafe bool IsSuffix (string s, string target, int start, int length, CompareOptions opt)
1303 if (target.Length == 0)
1304 return true;
1305 int last = LastIndexOf (s, target, start, length, opt);
1306 return last >= 0 && Compare (s, last, s.Length - last, target, 0, target.Length, opt) == 0;
1308 // quick check : simple codepoint comparison
1309 if (s.Length >= target.Length) {
1310 int si = start;
1311 int se = start - length;
1312 for (int i = target.Length - 1; si >= se && i >= 0; i--, si--)
1313 if (s [si] != target [i])
1314 break;
1315 if (si == start + target.Length)
1316 return true;
1319 PreviousInfo prev = new PreviousInfo (false);
1320 byte* sk1 = stackalloc byte [4];
1321 byte* sk2 = stackalloc byte [4];
1322 ClearBuffer (sk1, 4);
1323 ClearBuffer (sk2, 4);
1324 return IsSuffix (opt, s, target, start, length, ref prev, sk1, sk2);
1328 unsafe bool IsSuffix (string s, string t, int start, int length, ref Context ctx)
1330 int tstart = 0;
1331 COpt opt = ctx.Option;
1333 for (;tstart < t.Length; tstart++)
1334 if (!IsIgnorable (t [tstart], opt))
1335 break;
1336 if (tstart == t.Length)
1337 return true; // as if target is String.Empty.
1339 #if false
1340 // This is still not working. If it is not likely to get working, then just remove it.
1341 int si = start;
1342 int send = start - length;
1343 int ti = t.Length - 1;
1344 int tend = -1;
1346 int sStep = start + 1;
1347 int tStep = t.Length;
1348 bool sourceConsumed, targetConsumed;
1349 while (true) {
1350 for (; send < si; si--)
1351 if (!IsIgnorable (s [si]))
1352 break;
1353 for (; tend < ti; ti--)
1354 if (!IsIgnorable (t [ti]))
1355 break;
1356 if (tend == ti)
1357 break;
1358 for (; send < si; si--)
1359 if (IsSafe (s [si]))
1360 break;
1361 for (; tend < ti; ti--)
1362 if (IsSafe (t [ti]))
1363 break;
1364 Console.WriteLine ("==== {0} {1} {2} {3} {4} {5} {6} {7} {8}", s, si, send, length, t, ti, tstart, sStep - si, tStep - ti);
1365 if (CompareInternal (opt, s, si, sStep - si,
1366 t, ti, tStep - ti,
1367 out targetConsumed, out sourceConsumed,
1368 true) != 0)
1369 return false;
1370 if (send == si)
1371 return false;
1372 sStep = si;
1373 tStep = ti;
1374 si--;
1375 ti--;
1377 return true;
1378 #else
1379 // FIXME: it is not efficient for very long target.
1380 bool sourceConsumed, targetConsumed;
1381 int mismatchCount = 0;
1382 for (int i = 0; i < length; i++) {
1383 ctx.ClearPrevInfo ();
1385 int ret = CompareInternal (s, start - i, i + 1,
1386 t, tstart, t.Length - tstart,
1387 out targetConsumed,
1388 // FIXME: could immediately breakup
1389 out sourceConsumed, true, true, ref ctx);
1390 if (ret == 0)
1391 return true;
1392 if (!sourceConsumed && targetConsumed)
1393 return false;
1394 if (!targetConsumed) {
1395 mismatchCount++;
1396 if (mismatchCount > Uni.MaxExpansionLength)
1397 // The largest length of an
1398 // expansion is 3, so if the
1399 // target was not consumed more
1400 // than 3 times, then the tail
1401 // character does not match.
1402 return false;
1405 return false;
1406 #endif
1409 #endregion
1411 #region IndexOf() / LastIndexOf()
1413 // string
1415 public int IndexOf (string s, string target, CompareOptions opt)
1417 return IndexOf (s, target, 0, s.Length, opt);
1420 int QuickIndexOf (string s, string target, int start, int length, out bool testWasUnable)
1422 int testedSourcePos = -1, testedTargetPos = -1;
1424 testWasUnable = true;
1425 if (target.Length == 0)
1426 return 0;
1427 else if (target.Length > length)
1428 return -1;
1429 testWasUnable = false;
1431 int end = start + length - target.Length + 1;
1432 for (int i = start; i < end; i++) {
1433 bool no = false;
1434 for (int j = 0; j < target.Length; j++) {
1435 if (testedTargetPos < j) {
1436 char c = target [j];
1437 if (c == 0 || c >= 0x80) {
1438 testWasUnable = true;
1439 return -1;
1441 else
1442 testedTargetPos = j;
1444 if (testedSourcePos < i + j) {
1445 char c = s [i + j];
1446 if (c == 0 || c >= 0x80) {
1447 testWasUnable = true;
1448 return -1;
1450 else
1451 testedSourcePos = i + j;
1453 if (s [i + j] != target [j]) {
1454 no = true;
1455 break;
1458 if (no)
1459 continue;
1460 return i;
1462 return -1;
1465 public unsafe int IndexOf (string s, string target, int start, int length, CompareOptions opt)
1467 if (opt == CompareOptions.Ordinal)
1468 return IndexOfOrdinal (s, target, start, length);
1469 if (opt == CompareOptions.OrdinalIgnoreCase)
1470 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1471 if (opt == CompareOptions.None) {
1472 bool testWasUnable;
1473 int ret = QuickIndexOf (s, target, start, length, out testWasUnable);
1474 if (!testWasUnable)
1475 return ret;
1478 byte* alwaysMatchFlags = stackalloc byte [16];
1479 byte* neverMatchFlags = stackalloc byte [16];
1480 byte* targetSortKey = stackalloc byte [4];
1481 byte* sk1 = stackalloc byte [4];
1482 byte* sk2 = stackalloc byte [4];
1483 ClearBuffer (alwaysMatchFlags, 16);
1484 ClearBuffer (neverMatchFlags, 16);
1485 ClearBuffer (targetSortKey, 4);
1486 ClearBuffer (sk1, 4);
1487 ClearBuffer (sk2, 4);
1488 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1490 return IndexOf (s, target, start, length,
1491 targetSortKey, ref ctx);
1494 // note that it wouldn't be used since ordinal IndexOf()
1495 // should be directed to icall.
1496 int IndexOfOrdinal (string s, string target, int start, int length)
1498 if (target.Length == 0)
1499 return 0;
1500 else if (target.Length > length)
1501 return -1;
1503 int end = start + length - target.Length + 1;
1504 for (int i = start; i < end; i++) {
1505 bool no = false;
1506 for (int j = 0; j < target.Length; j++) {
1507 if (s [i + j] != target [j]) {
1508 no = true;
1509 break;
1512 if (no)
1513 continue;
1514 return i;
1516 return -1;
1519 int IndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1521 if (target.Length == 0)
1522 return 0;
1523 else if (target.Length > length)
1524 return -1;
1526 int end = start + length - target.Length + 1;
1527 for (int i = start; i < end; i++) {
1528 bool no = false;
1529 for (int j = 0; j < target.Length; j++) {
1530 // I think almost all text has more lower letters than upper ones. Thus with this invariant comparison ToLower() should be faster since it costs less operations.
1531 if (textInfo.ToLower (s [i + j]) != textInfo.ToLower (target [j])) {
1532 no = true;
1533 break;
1536 if (no)
1537 continue;
1538 return i;
1540 return -1;
1543 // char
1545 public int IndexOf (string s, char target, CompareOptions opt)
1547 return IndexOf (s, target, 0, s.Length, opt);
1550 public unsafe int IndexOf (string s, char target, int start, int length, CompareOptions opt)
1552 if (opt == CompareOptions.Ordinal)
1553 return IndexOfOrdinal (s, target, start, length);
1554 if (opt == CompareOptions.OrdinalIgnoreCase)
1555 return IndexOfOrdinalIgnoreCase (s, target, start, length);
1556 byte* alwaysMatchFlags = stackalloc byte [16];
1557 byte* neverMatchFlags = stackalloc byte [16];
1558 byte* targetSortKey = stackalloc byte [4];
1559 byte* sk1 = stackalloc byte [4];
1560 byte* sk2 = stackalloc byte [4];
1561 ClearBuffer (alwaysMatchFlags, 16);
1562 ClearBuffer (neverMatchFlags, 16);
1563 ClearBuffer (targetSortKey, 4);
1564 ClearBuffer (sk1, 4);
1565 ClearBuffer (sk2, 4);
1566 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1568 // If target is contraction, then use string search.
1569 Contraction ct = GetContraction (target);
1570 if (ct != null) {
1571 if (ct.Replacement != null)
1572 return IndexOf (s, ct.Replacement,
1573 start, length, targetSortKey, ref ctx);
1574 else {
1575 for (int i = 0; i < ct.SortKey.Length; i++)
1576 sk2 [i] = ct.SortKey [i];
1577 return IndexOfSortKey (s, start, length, sk2, char.MinValue, -1, true, ref ctx);
1579 } else {
1580 int ti = FilterOptions ((int) target, opt);
1581 targetSortKey [0] = Category (ti);
1582 targetSortKey [1] = Level1 (ti);
1583 if ((opt & COpt.IgnoreNonSpace) == 0)
1584 targetSortKey [2] =
1585 Level2 (ti, ExtenderType.None);
1586 targetSortKey [3] = Uni.Level3 (ti);
1587 return IndexOfSortKey (s, start, length,
1588 targetSortKey, target, ti,
1589 !Uni.HasSpecialWeight ((char) ti), ref ctx);
1593 // note that it wouldn't be used since ordinal IndexOf()
1594 // should be directed to icall.
1595 int IndexOfOrdinal (string s, char target, int start, int length)
1597 int end = start + length;
1598 for (int i = start; i < end; i++)
1599 if (s [i] == target)
1600 return i;
1601 return -1;
1604 int IndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1606 int end = start + length;
1607 target = textInfo.ToLower (target);
1608 for (int i = start; i < end; i++)
1609 if (textInfo.ToLower (s [i]) == target)
1610 return i;
1611 return -1;
1614 // Searches target byte[] keydata
1615 unsafe int IndexOfSortKey (string s, int start, int length, byte* sortkey, char target, int ti, bool noLv4, ref Context ctx)
1617 int end = start + length;
1618 int idx = start;
1620 while (idx < end) {
1621 int cur = idx;
1622 if (MatchesForward (s, ref idx, end, ti, sortkey, noLv4, ref ctx))
1623 return cur;
1625 return -1;
1628 // Searches string. Search head character (or keydata when
1629 // the head is contraction sortkey) and try IsPrefix().
1630 unsafe int IndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1632 COpt opt = ctx.Option;
1633 int tidx = 0;
1634 for (; tidx < target.Length; tidx++)
1635 if (!IsIgnorable (target [tidx], opt))
1636 break;
1637 if (tidx == target.Length)
1638 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1639 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
1640 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1641 string replace = ct != null ? ct.Replacement : null;
1642 byte* sk = replace == null ? targetSortKey : null;
1643 bool noLv4 = true;
1644 char tc = char.MinValue;
1645 int ti = -1;
1646 if (ct != null && sk != null) {
1647 for (int i = 0; i < ct.SortKey.Length; i++)
1648 sk [i] = ct.SortKey [i];
1649 } else if (sk != null) {
1650 tc = target [tidx];
1651 ti = FilterOptions (target [tidx], opt);
1652 sk [0] = Category (ti);
1653 sk [1] = Level1 (ti);
1654 if ((opt & COpt.IgnoreNonSpace) == 0)
1655 sk [2] = Level2 (ti, ExtenderType.None);
1656 sk [3] = Uni.Level3 (ti);
1657 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1659 if (sk != null) {
1660 for (tidx++; tidx < target.Length; tidx++) {
1661 if (Category (target [tidx]) != 1)
1662 break;
1663 if (sk [2] == 0)
1664 sk [2] = 2;
1665 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1669 do {
1670 int idx = 0;
1671 if (replace != null)
1672 idx = IndexOf (s, replace, start, length, targetSortKey, ref ctx);
1673 else
1674 idx = IndexOfSortKey (s, start, length, sk, tc, ti, noLv4, ref ctx);
1675 if (idx < 0)
1676 return -1;
1677 length -= idx - start;
1678 start = idx;
1679 if (IsPrefix (s, target, start, length, false, ref ctx))
1680 return idx;
1681 Contraction cts = GetContraction (s, start, length);
1682 if (cts != null) {
1683 start += cts.Source.Length;
1684 length -= cts.Source.Length;
1685 } else {
1686 start++;
1687 length--;
1689 } while (length > 0);
1690 return -1;
1694 // There are the same number of LastIndexOf() methods as
1695 // IndexOf() related methods, with the same functionalities
1696 // for each.
1699 // string
1701 public int LastIndexOf (string s, string target, CompareOptions opt)
1703 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1706 public unsafe int LastIndexOf (string s, string target, int start, int length, CompareOptions opt)
1708 if (opt == CompareOptions.Ordinal)
1709 return LastIndexOfOrdinal (s, target, start, length);
1710 if (opt == CompareOptions.OrdinalIgnoreCase)
1711 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1712 byte* alwaysMatchFlags = stackalloc byte [16];
1713 byte* neverMatchFlags = stackalloc byte [16];
1714 byte* targetSortKey = stackalloc byte [4];
1715 byte* sk1 = stackalloc byte [4];
1716 byte* sk2 = stackalloc byte [4];
1717 ClearBuffer (alwaysMatchFlags, 16);
1718 ClearBuffer (neverMatchFlags, 16);
1719 ClearBuffer (targetSortKey, 4);
1720 ClearBuffer (sk1, 4);
1721 ClearBuffer (sk2, 4);
1722 // For some unknown reason CompareQuick() does not work fine w/ LastIndexOf().
1723 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1724 return LastIndexOf (s, target, start, length,
1725 targetSortKey, ref ctx);
1728 int LastIndexOfOrdinal (string s, string target, int start, int length)
1730 if (target.Length == 0)
1731 return start;
1732 if (s.Length < target.Length || target.Length > length)
1733 return -1;
1734 int end = start - length + target.Length -1;
1735 char tail = target [target.Length - 1];
1736 for (int i = start; i > end;) {
1737 if (s [i] != tail) {
1738 i--;
1739 continue;
1741 int x = i - target.Length + 1;
1742 i--;
1743 bool mismatch = false;
1744 for (int j = target.Length - 2; j >= 0; j--)
1745 if (s [x + j] != target [j]) {
1746 mismatch = true;
1747 break;
1749 if (mismatch)
1750 continue;
1751 return x;
1753 return -1;
1756 int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
1758 if (target.Length == 0)
1759 return start;
1760 if (s.Length < length || target.Length > length)
1761 return -1;
1762 int end = start - length + target.Length - 1;
1763 char tail = textInfo.ToLower (target [target.Length - 1]);
1764 for (int i = start; i > end;) {
1765 if (textInfo.ToLower (s [i]) != tail) {
1766 i--;
1767 continue;
1769 int x = i - target.Length + 1;
1770 i--;
1771 bool mismatch = false;
1772 for (int j = target.Length - 2; j >= 0; j--)
1773 if (textInfo.ToLower (s [x + j]) != textInfo.ToLower (target [j])) {
1774 mismatch = true;
1775 break;
1777 if (mismatch)
1778 continue;
1779 return x;
1781 return -1;
1784 // char
1786 public int LastIndexOf (string s, char target, CompareOptions opt)
1788 return LastIndexOf (s, target, s.Length - 1, s.Length, opt);
1791 public unsafe int LastIndexOf (string s, char target, int start, int length, CompareOptions opt)
1793 if (opt == CompareOptions.Ordinal)
1794 return LastIndexOfOrdinal (s, target, start, length);
1795 if (opt == CompareOptions.OrdinalIgnoreCase)
1796 return LastIndexOfOrdinalIgnoreCase (s, target, start, length);
1797 byte* alwaysMatchFlags = stackalloc byte [16];
1798 byte* neverMatchFlags = stackalloc byte [16];
1799 byte* targetSortKey = stackalloc byte [4];
1800 byte* sk1 = stackalloc byte [4];
1801 byte* sk2 = stackalloc byte [4];
1802 ClearBuffer (alwaysMatchFlags, 16);
1803 ClearBuffer (neverMatchFlags, 16);
1804 ClearBuffer (targetSortKey, 4);
1805 ClearBuffer (sk1, 4);
1806 ClearBuffer (sk2, 4);
1807 Context ctx = new Context (opt, alwaysMatchFlags, neverMatchFlags, sk1, sk2, null, false);
1809 // If target is a replacement contraction, then use
1810 // string search.
1811 Contraction ct = GetContraction (target);
1812 if (ct != null) {
1813 if (ct.Replacement != null)
1814 return LastIndexOf (s,
1815 ct.Replacement, start, length,
1816 targetSortKey, ref ctx);
1817 else {
1818 for (int bi = 0; bi < ct.SortKey.Length; bi++)
1819 sk2 [bi] = ct.SortKey [bi];
1820 return LastIndexOfSortKey (s, start,
1821 start, length, sk2,
1822 -1, true, ref ctx);
1825 else {
1826 int ti = FilterOptions ((int) target, opt);
1827 targetSortKey [0] = Category (ti);
1828 targetSortKey [1] = Level1 (ti);
1829 if ((opt & COpt.IgnoreNonSpace) == 0)
1830 targetSortKey [2] = Level2 (ti, ExtenderType.None);
1831 targetSortKey [3] = Uni.Level3 (ti);
1832 return LastIndexOfSortKey (s, start, start,
1833 length, targetSortKey,
1834 ti, !Uni.HasSpecialWeight ((char) ti),
1835 ref ctx);
1839 int LastIndexOfOrdinal (string s, char target, int start, int length)
1841 if (s.Length == 0)
1842 return -1;
1843 int end = start - length;
1844 for (int i = start; i > end; i--)
1845 if (s [i] == target)
1846 return i;
1847 return -1;
1850 int LastIndexOfOrdinalIgnoreCase (string s, char target, int start, int length)
1852 if (s.Length == 0)
1853 return -1;
1854 int end = start - length;
1855 char c = textInfo.ToUpper (target);
1856 for (int i = start; i > end; i--)
1857 if (textInfo.ToUpper (s [i]) == c)
1858 return i;
1859 return -1;
1862 // Searches target byte[] keydata
1863 unsafe int LastIndexOfSortKey (string s, int start, int orgStart, int length, byte* sortkey, int ti, bool noLv4, ref Context ctx)
1865 int end = start - length;
1866 int idx = start;
1867 while (idx > end) {
1868 int cur = idx;
1869 if (MatchesBackward (s, ref idx, end, orgStart,
1870 ti, sortkey, noLv4, ref ctx))
1871 return cur;
1873 return -1;
1876 // Searches string. Search head character (or keydata when
1877 // the head is contraction sortkey) and try IsPrefix().
1878 unsafe int LastIndexOf (string s, string target, int start, int length, byte* targetSortKey, ref Context ctx)
1880 COpt opt = ctx.Option;
1881 int orgStart = start;
1882 int tidx = 0;
1883 for (; tidx < target.Length; tidx++)
1884 if (!IsIgnorable (target [tidx], opt))
1885 break;
1886 if (tidx == target.Length)
1887 // FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
1888 return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
1889 Contraction ct = GetContraction (target, tidx, target.Length - tidx);
1890 string replace = ct != null ? ct.Replacement : null;
1891 byte* sk = replace == null ? targetSortKey : null;
1893 bool noLv4 = true;
1894 int ti = -1;
1895 if (ct != null && sk != null) {
1896 for (int i = 0; i < ct.SortKey.Length; i++)
1897 sk [i] = ct.SortKey [i];
1898 } else if (sk != null) {
1899 ti = FilterOptions (target [tidx], opt);
1900 sk [0] = Category (ti);
1901 sk [1] = Level1 (ti);
1902 if ((opt & COpt.IgnoreNonSpace) == 0)
1903 sk [2] = Level2 (ti, ExtenderType.None);
1904 sk [3] = Uni.Level3 (ti);
1905 noLv4 = !Uni.HasSpecialWeight ((char) ti);
1907 if (sk != null) {
1908 for (tidx++; tidx < target.Length; tidx++) {
1909 if (Category (target [tidx]) != 1)
1910 break;
1911 if (sk [2] == 0)
1912 sk [2] = 2;
1913 sk [2] = (byte) (sk [2] + Level2 (target [tidx], ExtenderType.None));
1917 do {
1918 int idx = 0;
1920 if (replace != null)
1921 idx = LastIndexOf (s, replace,
1922 start, length,
1923 targetSortKey, ref ctx);
1924 else
1925 idx = LastIndexOfSortKey (s, start, orgStart, length, sk, ti, noLv4, ref ctx);
1926 if (idx < 0)
1927 return -1;
1928 length -= start - idx;
1929 start = idx;
1931 if (IsPrefix (s, target, idx, orgStart - idx + 1, false, ref ctx)) {
1932 for (;idx < orgStart; idx++)
1933 if (!IsIgnorable (s [idx], opt))
1934 break;
1935 return idx;
1937 Contraction cts = GetContraction (s, idx, orgStart - idx + 1);
1938 if (cts != null) {
1939 start -= cts.Source.Length;
1940 length -= cts.Source.Length;
1941 } else {
1942 start--;
1943 length--;
1945 } while (length > 0);
1946 return -1;
1949 unsafe bool MatchesForward (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ref Context ctx)
1951 int si = s [idx];
1952 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
1953 return true;
1954 if (ctx.NeverMatchFlags != null &&
1955 si < 128 &&
1956 (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
1957 idx++;
1958 return false;
1960 ExtenderType ext = GetExtenderType (s [idx]);
1961 Contraction ct = null;
1962 if (MatchesForwardCore (s, ref idx, end, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
1963 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1964 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1965 return true;
1967 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
1968 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
1969 return false;
1972 unsafe bool MatchesForwardCore (string s, ref int idx, int end, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
1974 COpt opt = ctx.Option;
1975 byte* charSortKey = ctx.Buffer1;
1976 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
1977 int si = -1;
1978 if (ext == ExtenderType.None)
1979 ct = GetContraction (s, idx, end);
1980 else if (ctx.PrevCode < 0) {
1981 if (ctx.PrevSortKey == null) {
1982 idx++;
1983 return false;
1985 charSortKey = ctx.PrevSortKey;
1987 else
1988 si = FilterExtender (ctx.PrevCode, ext, opt);
1989 // if lv4 exists, it never matches contraction
1990 if (ct != null) {
1991 idx += ct.Source.Length;
1992 if (!noLv4)
1993 return false;
1994 if (ct.SortKey != null) {
1995 for (int i = 0; i < 4; i++)
1996 charSortKey [i] = sortkey [i];
1997 ctx.PrevCode = -1;
1998 ctx.PrevSortKey = charSortKey;
1999 } else {
2000 // Here is the core of LAMESPEC
2001 // described at the top of the source.
2002 int dummy = 0;
2003 return MatchesForward (ct.Replacement, ref dummy,
2004 ct.Replacement.Length, ti, sortkey, noLv4, ref ctx);
2006 } else {
2007 if (si < 0)
2008 si = FilterOptions (s [idx], opt);
2009 idx++;
2010 charSortKey [0] = Category (si);
2011 bool noMatch = false;
2012 if (sortkey [0] == charSortKey [0])
2013 charSortKey [1] = Level1 (si);
2014 else
2015 noMatch = true;
2016 if (!ignoreNonSpace && sortkey [1] == charSortKey [1])
2017 charSortKey [2] = Level2 (si, ext);
2018 else if (!ignoreNonSpace)
2019 noMatch = true;
2020 if (noMatch) {
2021 for (; idx < end; idx++) {
2022 if (Category (s [idx]) != 1)
2023 break;
2025 return false;
2027 charSortKey [3] = Uni.Level3 (si);
2028 if (charSortKey [0] != 1)
2029 ctx.PrevCode = si;
2031 for (; idx < end; idx++) {
2032 if (Category (s [idx]) != 1)
2033 break;
2034 if (ignoreNonSpace)
2035 continue;
2036 if (charSortKey [2] == 0)
2037 charSortKey [2] = 2;
2038 charSortKey [2] = (byte) (charSortKey [2]
2039 + Level2 (s [idx], ExtenderType.None));
2042 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2045 unsafe bool MatchesPrimitive (COpt opt, byte* source, int si, ExtenderType ext, byte* target, int ti, bool noLv4)
2047 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2048 if (source [0] != target [0] ||
2049 source [1] != target [1] ||
2050 (!ignoreNonSpace && source [2] != target [2]) ||
2051 source [3] != target [3])
2052 return false;
2053 if (noLv4 && (si < 0 || !Uni.HasSpecialWeight ((char) si)))
2054 return true;
2055 else if (noLv4)
2056 return false;
2057 // Since target can never be an extender, if the source
2058 // is an expander and it matters, then they never match.
2059 if (!ignoreNonSpace && ext == ExtenderType.Conditional)
2060 return false;
2061 if (Uni.IsJapaneseSmallLetter ((char) si) !=
2062 Uni.IsJapaneseSmallLetter ((char) ti) ||
2063 ToDashTypeValue (ext, opt) !=
2064 // FIXME: we will have to specify correct value for target
2065 ToDashTypeValue (ExtenderType.None, opt) ||
2066 !Uni.IsHiragana ((char) si) !=
2067 !Uni.IsHiragana ((char) ti) ||
2068 IsHalfKana ((char) si, opt) !=
2069 IsHalfKana ((char) ti, opt))
2070 return false;
2071 return true;
2074 unsafe bool MatchesBackward (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ref Context ctx)
2076 int si = s [idx];
2077 if (ctx.AlwaysMatchFlags != null && si < 128 && (ctx.AlwaysMatchFlags [si / 8] & (1 << (si % 8))) != 0)
2078 return true;
2079 if (ctx.NeverMatchFlags != null && si < 128 && (ctx.NeverMatchFlags [si / 8] & (1 << (si % 8))) != 0) {
2080 idx--;
2081 return false;
2083 ExtenderType ext = GetExtenderType (s [idx]);
2084 Contraction ct = null;
2085 if (MatchesBackwardCore (s, ref idx, end, orgStart, ti, sortkey, noLv4, ext, ref ct, ref ctx)) {
2086 if (ctx.AlwaysMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128)
2087 ctx.AlwaysMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2088 return true;
2090 if (ctx.NeverMatchFlags != null && ct == null && ext == ExtenderType.None && si < 128) {
2091 ctx.NeverMatchFlags [si / 8] |= (byte) (1 << (si % 8));
2093 return false;
2096 unsafe bool MatchesBackwardCore (string s, ref int idx, int end, int orgStart, int ti, byte* sortkey, bool noLv4, ExtenderType ext, ref Contraction ct, ref Context ctx)
2098 COpt opt = ctx.Option;
2099 byte* charSortKey = ctx.Buffer1;
2100 bool ignoreNonSpace = (opt & COpt.IgnoreNonSpace) != 0;
2101 int cur = idx;
2102 int si = -1;
2103 // To handle extenders in source, we need to
2104 // check next _primary_ character.
2105 if (ext != ExtenderType.None) {
2106 byte diacritical = 0;
2107 for (int tmp = idx; ; tmp--) {
2108 if (tmp < 0) // heading extender
2109 return false;
2110 if (IsIgnorable (s [tmp], opt))
2111 continue;
2112 int tmpi = FilterOptions (s [tmp], opt);
2113 byte category = Category (tmpi);
2114 if (category == 1) {
2115 diacritical = Level2 (tmpi, ExtenderType.None);
2116 continue;
2118 si = FilterExtender (tmpi, ext, opt);
2119 charSortKey [0] = category;
2120 charSortKey [1] = Level1 (si);
2121 if (!ignoreNonSpace)
2122 charSortKey [2] = Level2 (si, ext);
2123 charSortKey [3] = Uni.Level3 (si);
2124 if (ext != ExtenderType.Conditional &&
2125 diacritical != 0)
2126 charSortKey [2] =
2127 (charSortKey [2] == 0) ?
2128 (byte) (diacritical + 2) :
2129 diacritical;
2130 break;
2132 idx--;
2134 if (ext == ExtenderType.None)
2135 ct = GetTailContraction (s, idx, end);
2136 // if lv4 exists, it never matches contraction
2137 if (ct != null) {
2138 idx -= ct.Source.Length;
2139 if (!noLv4)
2140 return false;
2141 if (ct.SortKey != null) {
2142 for (int i = 0; i < 4; i++)
2143 charSortKey [i] = sortkey [i];
2144 ctx.PrevCode = -1;
2145 ctx.PrevSortKey = charSortKey;
2146 } else {
2147 // Here is the core of LAMESPEC
2148 // described at the top of the source.
2149 int dummy = ct.Replacement.Length - 1;
2150 return 0 <= LastIndexOfSortKey (
2151 ct.Replacement, dummy, dummy,
2152 ct.Replacement.Length, sortkey,
2153 ti, noLv4, ref ctx);
2155 } else if (ext == ExtenderType.None) {
2156 if (si < 0)
2157 si = FilterOptions (s [idx], opt);
2158 idx--;
2159 bool noMatch = false;
2160 charSortKey [0] = Category (si);
2161 if (charSortKey [0] == sortkey [0])
2162 charSortKey [1] = Level1 (si);
2163 else
2164 noMatch = true;
2165 if (!ignoreNonSpace && charSortKey [1] == sortkey [1])
2166 charSortKey [2] = Level2 (si, ext);
2167 else if (!ignoreNonSpace)
2168 noMatch = true;
2169 if (noMatch)
2170 return false;
2171 charSortKey [3] = Uni.Level3 (si);
2172 if (charSortKey [0] != 1)
2173 ctx.PrevCode = si;
2175 if (ext == ExtenderType.None) {
2176 for (int tmp = cur + 1; tmp < orgStart; tmp++) {
2177 if (Category (s [tmp]) != 1)
2178 break;
2179 if (ignoreNonSpace)
2180 continue;
2181 if (charSortKey [2] == 0)
2182 charSortKey [2] = 2;
2183 charSortKey [2] = (byte) (charSortKey [2]
2184 + Level2 (s [tmp], ExtenderType.None));
2187 return MatchesPrimitive (opt, charSortKey, si, ext, sortkey, ti, noLv4);
2189 #endregion