1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System
.Diagnostics
;
6 using System
.Runtime
.CompilerServices
;
7 using System
.Runtime
.InteropServices
;
8 using System
.Runtime
.Serialization
;
10 using System
.Text
.Unicode
;
11 using Internal
.Runtime
.CompilerServices
;
13 #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
15 using nuint
= System
.UInt64
;
16 using nint
= System
.Int64
;
18 using nuint
= System
.UInt32
;
19 using nint
= System
.Int32
;
22 namespace System
.Globalization
25 /// This Class defines behaviors specific to a writing system.
26 /// A writing system is the collection of scripts and orthographic rules
27 /// required to represent a language as text.
29 public partial class TextInfo
: ICloneable
, IDeserializationCallback
31 private enum Tristate
: byte
38 private string? _listSeparator
;
39 private bool _isReadOnly
= false;
41 private readonly string _cultureName
;
42 private readonly CultureData _cultureData
;
44 // // Name of the text info we're using (ie: _cultureData.TextInfoName)
45 private readonly string _textInfoName
;
47 private Tristate _isAsciiCasingSameAsInvariant
= Tristate
.NotInitialized
;
49 // Invariant text info
50 internal static TextInfo Invariant
=> s_invariant
??= new TextInfo(CultureData
.Invariant
);
52 private static volatile TextInfo
? s_invariant
;
54 internal TextInfo(CultureData cultureData
)
56 // This is our primary data source, we don't need most of the rest of this
57 _cultureData
= cultureData
;
58 _cultureName
= _cultureData
.CultureName
;
59 _textInfoName
= _cultureData
.TextInfoName
;
61 FinishInitialization();
64 void IDeserializationCallback
.OnDeserialization(object? sender
)
66 throw new PlatformNotSupportedException();
69 public virtual int ANSICodePage
=> _cultureData
.ANSICodePage
;
71 public virtual int OEMCodePage
=> _cultureData
.OEMCodePage
;
73 public virtual int MacCodePage
=> _cultureData
.MacCodePage
;
75 public virtual int EBCDICCodePage
=> _cultureData
.EBCDICCodePage
;
77 // Just use the LCID from our text info name
78 public int LCID
=> CultureInfo
.GetCultureInfo(_textInfoName
).LCID
;
80 public string CultureName
=> _textInfoName
;
82 public bool IsReadOnly
=> _isReadOnly
;
84 public virtual object Clone()
86 object o
= MemberwiseClone();
87 ((TextInfo
)o
).SetReadOnlyState(false);
92 /// Create a cloned readonly instance or return the input one if it is
95 public static TextInfo
ReadOnly(TextInfo textInfo
)
99 throw new ArgumentNullException(nameof(textInfo
));
102 if (textInfo
.IsReadOnly
)
107 TextInfo clonedTextInfo
= (TextInfo
)(textInfo
.MemberwiseClone());
108 clonedTextInfo
.SetReadOnlyState(true);
109 return clonedTextInfo
;
112 private void VerifyWritable()
116 throw new InvalidOperationException(SR
.InvalidOperation_ReadOnly
);
120 internal void SetReadOnlyState(bool readOnly
)
122 _isReadOnly
= readOnly
;
127 /// Returns the string used to separate items in a list.
129 public virtual string ListSeparator
131 get => _listSeparator
?? (_listSeparator
= _cultureData
.ListSeparator
);
136 throw new ArgumentNullException(nameof(value));
140 _listSeparator
= value;
145 /// Converts the character or string to lower case. Certain locales
146 /// have different casing semantics from the file systems in Win32.
148 public virtual char ToLower(char c
)
150 if (GlobalizationMode
.Invariant
|| (IsAscii(c
) && IsAsciiCasingSameAsInvariant
))
152 return ToLowerAsciiInvariant(c
);
155 return ChangeCase(c
, toUpper
: false);
158 public virtual string ToLower(string str
)
162 throw new ArgumentNullException(nameof(str
));
165 if (GlobalizationMode
.Invariant
)
167 return ToLowerAsciiInvariant(str
);
170 return ChangeCaseCommon
<ToLowerConversion
>(str
);
173 private unsafe char ChangeCase(char c
, bool toUpper
)
175 Debug
.Assert(!GlobalizationMode
.Invariant
);
178 ChangeCase(&c
, 1, &dst
, 1, toUpper
);
182 [MethodImpl(MethodImplOptions
.AggressiveInlining
)]
183 internal void ChangeCaseToLower(ReadOnlySpan
<char> source
, Span
<char> destination
)
185 Debug
.Assert(destination
.Length
>= source
.Length
);
186 ChangeCaseCommon
<ToLowerConversion
>(ref MemoryMarshal
.GetReference(source
), ref MemoryMarshal
.GetReference(destination
), source
.Length
);
189 [MethodImpl(MethodImplOptions
.AggressiveInlining
)]
190 internal void ChangeCaseToUpper(ReadOnlySpan
<char> source
, Span
<char> destination
)
192 Debug
.Assert(destination
.Length
>= source
.Length
);
193 ChangeCaseCommon
<ToUpperConversion
>(ref MemoryMarshal
.GetReference(source
), ref MemoryMarshal
.GetReference(destination
), source
.Length
);
196 [MethodImpl(MethodImplOptions
.AggressiveInlining
)]
197 private void ChangeCaseCommon
<TConversion
>(ReadOnlySpan
<char> source
, Span
<char> destination
) where TConversion
: struct
199 Debug
.Assert(destination
.Length
>= source
.Length
);
200 ChangeCaseCommon
<TConversion
>(ref MemoryMarshal
.GetReference(source
), ref MemoryMarshal
.GetReference(destination
), source
.Length
);
203 private unsafe void ChangeCaseCommon
<TConversion
>(ref char source
, ref char destination
, int charCount
) where TConversion
: struct
205 Debug
.Assert(typeof(TConversion
) == typeof(ToUpperConversion
) || typeof(TConversion
) == typeof(ToLowerConversion
));
206 bool toUpper
= typeof(TConversion
) == typeof(ToUpperConversion
); // JIT will treat this as a constant in release builds
208 Debug
.Assert(!GlobalizationMode
.Invariant
);
209 Debug
.Assert(charCount
>= 0);
216 fixed (char* pSource
= &source
)
217 fixed (char* pDestination
= &destination
)
219 nuint currIdx
= 0; // in chars
221 if (IsAsciiCasingSameAsInvariant
)
223 // Read 4 chars (two 32-bit integers) at a time
227 nuint lastIndexWhereCanReadFourChars
= (uint)charCount
- 4;
230 // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
231 // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
232 // the ASCII data, we expect that characters of either case might be about equally distributed, so
233 // we want the case change operation itself to be branchless. This gives optimal performance in the
234 // common case. We also expect that developers aren't passing very long (16+ character) strings into
235 // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.
237 uint tempValue
= Unsafe
.ReadUnaligned
<uint>(pSource
+ currIdx
);
238 if (!Utf16Utility
.AllCharsInUInt32AreAscii(tempValue
))
242 tempValue
= (toUpper
) ? Utf16Utility
.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue
) : Utf16Utility
.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue
);
243 Unsafe
.WriteUnaligned
<uint>(pDestination
+ currIdx
, tempValue
);
245 tempValue
= Unsafe
.ReadUnaligned
<uint>(pSource
+ currIdx
+ 2);
246 if (!Utf16Utility
.AllCharsInUInt32AreAscii(tempValue
))
248 goto NonAsciiSkipTwoChars
;
250 tempValue
= (toUpper
) ? Utf16Utility
.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue
) : Utf16Utility
.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue
);
251 Unsafe
.WriteUnaligned
<uint>(pDestination
+ currIdx
+ 2, tempValue
);
253 } while (currIdx
<= lastIndexWhereCanReadFourChars
);
255 // At this point, there are fewer than 4 characters remaining to convert.
256 Debug
.Assert((uint)charCount
- currIdx
< 4);
259 // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
260 if ((charCount
& 2) != 0)
262 uint tempValue
= Unsafe
.ReadUnaligned
<uint>(pSource
+ currIdx
);
263 if (!Utf16Utility
.AllCharsInUInt32AreAscii(tempValue
))
267 tempValue
= (toUpper
) ? Utf16Utility
.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue
) : Utf16Utility
.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue
);
268 Unsafe
.WriteUnaligned
<uint>(pDestination
+ currIdx
, tempValue
);
272 // If there's a single character left to convert, do it now.
273 if ((charCount
& 1) != 0)
275 uint tempValue
= pSource
[currIdx
];
276 if (tempValue
> 0x7Fu
)
280 tempValue
= (toUpper
) ? Utf16Utility
.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue
) : Utf16Utility
.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue
);
281 pDestination
[currIdx
] = (char)tempValue
;
284 // And we're finished!
288 // If we reached this point, we found non-ASCII data.
289 // Fall back down the p/invoke code path.
291 NonAsciiSkipTwoChars:
295 Debug
.Assert(currIdx
< (uint)charCount
, "We somehow read past the end of the buffer.");
296 charCount
-= (int)currIdx
;
299 // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
300 // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
301 // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).
303 ChangeCase(pSource
+ currIdx
, charCount
, pDestination
+ currIdx
, charCount
, toUpper
);
310 private unsafe string ChangeCaseCommon
<TConversion
>(string source
) where TConversion
: struct
312 Debug
.Assert(typeof(TConversion
) == typeof(ToUpperConversion
) || typeof(TConversion
) == typeof(ToLowerConversion
));
313 bool toUpper
= typeof(TConversion
) == typeof(ToUpperConversion
); // JIT will treat this as a constant in release builds
315 Debug
.Assert(!GlobalizationMode
.Invariant
);
316 Debug
.Assert(source
!= null);
318 // If the string is empty, we're done.
319 if (source
.Length
== 0)
324 fixed (char* pSource
= source
)
326 nuint currIdx
= 0; // in chars
328 // If this culture's casing for ASCII is the same as invariant, try to take
329 // a fast path that'll work in managed code and ASCII rather than calling out
330 // to the OS for culture-aware casing.
331 if (IsAsciiCasingSameAsInvariant
)
333 // Read 2 chars (one 32-bit integer) at a time
335 if (source
.Length
>= 2)
337 nuint lastIndexWhereCanReadTwoChars
= (uint)source
.Length
- 2;
340 // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.
342 uint tempValue
= Unsafe
.ReadUnaligned
<uint>(pSource
+ currIdx
);
343 if (!Utf16Utility
.AllCharsInUInt32AreAscii(tempValue
))
347 if ((toUpper
) ? Utf16Utility
.UInt32ContainsAnyLowercaseAsciiChar(tempValue
) : Utf16Utility
.UInt32ContainsAnyUppercaseAsciiChar(tempValue
))
349 goto AsciiMustChangeCase
;
353 } while (currIdx
<= lastIndexWhereCanReadTwoChars
);
356 // If there's a single character left to convert, do it now.
357 if ((source
.Length
& 1) != 0)
359 uint tempValue
= pSource
[currIdx
];
360 if (tempValue
> 0x7Fu
)
364 if ((toUpper
) ? ((tempValue
- 'a') <= (uint)('z' - 'a')) : ((tempValue
- 'A') <= (uint)('Z' - 'A')))
366 goto AsciiMustChangeCase
;
370 // We got through all characters without finding anything that needed to change - done!
375 // We reached ASCII data that requires a case change.
376 // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
377 // conversion code path if we can.
379 string result
= string.FastAllocateString(source
.Length
); // changing case uses simple folding: doesn't change UTF-16 code unit count
381 // copy existing known-good data into the result
382 Span
<char> resultSpan
= new Span
<char>(ref result
.GetRawStringData(), result
.Length
);
383 source
.AsSpan(0, (int)currIdx
).CopyTo(resultSpan
);
385 // and re-run the fast span-based logic over the remainder of the data
386 ChangeCaseCommon
<TConversion
>(source
.AsSpan((int)currIdx
), resultSpan
.Slice((int)currIdx
));
393 // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
394 // In either case we need to fall back to the localization tables.
396 string result
= string.FastAllocateString(source
.Length
); // changing case uses simple folding: doesn't change UTF-16 code unit count
400 // copy existing known-good data into the result
401 Span
<char> resultSpan
= new Span
<char>(ref result
.GetRawStringData(), result
.Length
);
402 source
.AsSpan(0, (int)currIdx
).CopyTo(resultSpan
);
405 // and run the culture-aware logic over the remainder of the data
406 fixed (char* pResult
= result
)
408 ChangeCase(pSource
+ currIdx
, source
.Length
- (int)currIdx
, pResult
+ currIdx
, result
.Length
- (int)currIdx
, toUpper
);
415 internal static unsafe string ToLowerAsciiInvariant(string s
)
422 fixed (char* pSource
= s
)
427 if ((uint)(pSource
[i
] - 'A') <= (uint)('Z' - 'A'))
439 string result
= string.FastAllocateString(s
.Length
);
440 fixed (char* pResult
= result
)
442 for (int j
= 0; j
< i
; j
++)
444 pResult
[j
] = pSource
[j
];
447 pResult
[i
] = (char)(pSource
[i
] | 0x20);
452 pResult
[i
] = ToLowerAsciiInvariant(pSource
[i
]);
461 internal static void ToLowerAsciiInvariant(ReadOnlySpan
<char> source
, Span
<char> destination
)
463 Debug
.Assert(destination
.Length
>= source
.Length
);
465 for (int i
= 0; i
< source
.Length
; i
++)
467 destination
[i
] = ToLowerAsciiInvariant(source
[i
]);
471 private static unsafe string ToUpperAsciiInvariant(string s
)
478 fixed (char* pSource
= s
)
483 if ((uint)(pSource
[i
] - 'a') <= (uint)('z' - 'a'))
495 string result
= string.FastAllocateString(s
.Length
);
496 fixed (char* pResult
= result
)
498 for (int j
= 0; j
< i
; j
++)
500 pResult
[j
] = pSource
[j
];
503 pResult
[i
] = (char)(pSource
[i
] & ~
0x20);
508 pResult
[i
] = ToUpperAsciiInvariant(pSource
[i
]);
517 internal static void ToUpperAsciiInvariant(ReadOnlySpan
<char> source
, Span
<char> destination
)
519 Debug
.Assert(destination
.Length
>= source
.Length
);
521 for (int i
= 0; i
< source
.Length
; i
++)
523 destination
[i
] = ToUpperAsciiInvariant(source
[i
]);
527 private static char ToLowerAsciiInvariant(char c
)
529 if ((uint)(c
- 'A') <= (uint)('Z' - 'A'))
531 c
= (char)(c
| 0x20);
537 /// Converts the character or string to upper case. Certain locales
538 /// have different casing semantics from the file systems in Win32.
540 public virtual char ToUpper(char c
)
542 if (GlobalizationMode
.Invariant
|| (IsAscii(c
) && IsAsciiCasingSameAsInvariant
))
544 return ToUpperAsciiInvariant(c
);
547 return ChangeCase(c
, toUpper
: true);
550 public virtual string ToUpper(string str
)
554 throw new ArgumentNullException(nameof(str
));
557 if (GlobalizationMode
.Invariant
)
559 return ToUpperAsciiInvariant(str
);
562 return ChangeCaseCommon
<ToUpperConversion
>(str
);
565 internal static char ToUpperAsciiInvariant(char c
)
567 if ((uint)(c
- 'a') <= (uint)('z' - 'a'))
569 c
= (char)(c
& ~
0x20);
574 private static bool IsAscii(char c
) => c
< 0x80;
576 private bool IsAsciiCasingSameAsInvariant
578 [MethodImpl(MethodImplOptions
.AggressiveInlining
)]
581 if (_isAsciiCasingSameAsInvariant
== Tristate
.NotInitialized
)
583 PopulateIsAsciiCasingSameAsInvariant();
586 Debug
.Assert(_isAsciiCasingSameAsInvariant
== Tristate
.True
|| _isAsciiCasingSameAsInvariant
== Tristate
.False
);
587 return _isAsciiCasingSameAsInvariant
== Tristate
.True
;
591 [MethodImpl(MethodImplOptions
.NoInlining
)]
592 private void PopulateIsAsciiCasingSameAsInvariant()
594 bool compareResult
= CultureInfo
.GetCultureInfo(_textInfoName
).CompareInfo
.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions
.IgnoreCase
) == 0;
595 _isAsciiCasingSameAsInvariant
= (compareResult
) ? Tristate
.True
: Tristate
.False
;
599 /// Returns true if the dominant direction of text and UI such as the
600 /// relative position of buttons and scroll bars
602 public bool IsRightToLeft
=> _cultureData
.IsRightToLeft
;
604 public override bool Equals(object? obj
)
606 return obj
is TextInfo otherTextInfo
607 && CultureName
.Equals(otherTextInfo
.CultureName
);
610 public override int GetHashCode() => CultureName
.GetHashCode();
612 public override string ToString()
614 return "TextInfo - " + _cultureData
.CultureName
;
618 /// Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
619 /// and the rest of the letters are lowercase. The choice of which words to titlecase in headings
620 /// and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor"
621 /// is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
622 /// In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
623 /// are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
625 /// Moreover, the determination of what actually constitutes a word is language dependent, and this can
626 /// influence which letter or letters of a "word" are uppercased when titlecasing strings. For example
627 /// "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
629 public unsafe string ToTitleCase(string str
)
633 throw new ArgumentNullException(nameof(str
));
641 StringBuilder result
= new StringBuilder();
642 string? lowercaseData
= null;
643 // Store if the current culture is Dutch (special case)
644 bool isDutchCulture
= CultureName
.StartsWith("nl-", StringComparison
.OrdinalIgnoreCase
);
646 for (int i
= 0; i
< str
.Length
; i
++)
649 UnicodeCategory charType
= CharUnicodeInfo
.InternalGetUnicodeCategory(str
, i
, out charLen
);
650 if (char.CheckLetter(charType
))
652 // Special case to check for Dutch specific titlecasing with "IJ" characters
653 // at the beginning of a word
654 if (isDutchCulture
&& i
< str
.Length
- 1 && (str
[i
] == 'i' || str
[i
] == 'I') && (str
[i
+1] == 'j' || str
[i
+1] == 'J'))
661 // Do the titlecasing for the first character of the word.
662 i
= AddTitlecaseLetter(ref result
, ref str
, i
, charLen
) + 1;
665 // Convert the characters until the end of the this word
667 int lowercaseStart
= i
;
669 // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
670 // This is in line with Word 2000 behavior of titlecasing.
671 bool hasLowerCase
= (charType
== UnicodeCategory
.LowercaseLetter
);
673 // Use a loop to find all of the other letters following this letter.
674 while (i
< str
.Length
)
676 charType
= CharUnicodeInfo
.InternalGetUnicodeCategory(str
, i
, out charLen
);
677 if (IsLetterCategory(charType
))
679 if (charType
== UnicodeCategory
.LowercaseLetter
)
685 else if (str
[i
] == '\'')
690 if (lowercaseData
== null)
692 lowercaseData
= ToLower(str
);
694 result
.Append(lowercaseData
, lowercaseStart
, i
- lowercaseStart
);
698 result
.Append(str
, lowercaseStart
, i
- lowercaseStart
);
703 else if (!IsWordSeparator(charType
))
705 // This category is considered to be part of the word.
706 // This is any category that is marked as false in wordSeprator array.
711 // A word separator. Break out of the loop.
716 int count
= i
- lowercaseStart
;
722 if (lowercaseData
== null)
724 lowercaseData
= ToLower(str
);
726 result
.Append(lowercaseData
, lowercaseStart
, count
);
730 result
.Append(str
, lowercaseStart
, count
);
736 // not a letter, just append it
737 i
= AddNonLetter(ref result
, ref str
, i
, charLen
);
742 // not a letter, just append it
743 i
= AddNonLetter(ref result
, ref str
, i
, charLen
);
746 return result
.ToString();
749 private static int AddNonLetter(ref StringBuilder result
, ref string input
, int inputIndex
, int charLen
)
751 Debug
.Assert(charLen
== 1 || charLen
== 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
755 result
.Append(input
[inputIndex
++]);
756 result
.Append(input
[inputIndex
]);
760 result
.Append(input
[inputIndex
]);
765 private int AddTitlecaseLetter(ref StringBuilder result
, ref string input
, int inputIndex
, int charLen
)
767 Debug
.Assert(charLen
== 1 || charLen
== 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
771 // for surrogate pairs do a ToUpper operation on the substring
772 ReadOnlySpan
<char> src
= input
.AsSpan(inputIndex
, 2);
773 if (GlobalizationMode
.Invariant
)
775 result
.Append(src
); // surrogate pair in invariant mode, so changing case is a nop
779 Span
<char> dst
= stackalloc char[2];
780 ChangeCaseToUpper(src
, dst
);
787 switch (input
[inputIndex
])
789 // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
790 case (char) 0x01C4: // DZ with Caron -> Dz with Caron
791 case (char) 0x01C5: // Dz with Caron -> Dz with Caron
792 case (char) 0x01C6: // dz with Caron -> Dz with Caron
793 result
.Append((char) 0x01C5);
795 case (char) 0x01C7: // LJ -> Lj
796 case (char) 0x01C8: // Lj -> Lj
797 case (char) 0x01C9: // lj -> Lj
798 result
.Append((char) 0x01C8);
800 case (char) 0x01CA: // NJ -> Nj
801 case (char) 0x01CB: // Nj -> Nj
802 case (char) 0x01CC: // nj -> Nj
803 result
.Append((char) 0x01CB);
805 case (char) 0x01F1: // DZ -> Dz
806 case (char) 0x01F2: // Dz -> Dz
807 case (char) 0x01F3: // dz -> Dz
808 result
.Append((char) 0x01F2);
811 result
.Append(ToUpper(input
[inputIndex
]));
818 // Used in ToTitleCase():
819 // When we find a starting letter, the following array decides if a category should be
820 // considered as word seprator or not.
821 private const int c_wordSeparatorMask
=
822 /* false */ (0 << 0) | // UppercaseLetter = 0,
823 /* false */ (0 << 1) | // LowercaseLetter = 1,
824 /* false */ (0 << 2) | // TitlecaseLetter = 2,
825 /* false */ (0 << 3) | // ModifierLetter = 3,
826 /* false */ (0 << 4) | // OtherLetter = 4,
827 /* false */ (0 << 5) | // NonSpacingMark = 5,
828 /* false */ (0 << 6) | // SpacingCombiningMark = 6,
829 /* false */ (0 << 7) | // EnclosingMark = 7,
830 /* false */ (0 << 8) | // DecimalDigitNumber = 8,
831 /* false */ (0 << 9) | // LetterNumber = 9,
832 /* false */ (0 << 10) | // OtherNumber = 10,
833 /* true */ (1 << 11) | // SpaceSeparator = 11,
834 /* true */ (1 << 12) | // LineSeparator = 12,
835 /* true */ (1 << 13) | // ParagraphSeparator = 13,
836 /* true */ (1 << 14) | // Control = 14,
837 /* true */ (1 << 15) | // Format = 15,
838 /* false */ (0 << 16) | // Surrogate = 16,
839 /* false */ (0 << 17) | // PrivateUse = 17,
840 /* true */ (1 << 18) | // ConnectorPunctuation = 18,
841 /* true */ (1 << 19) | // DashPunctuation = 19,
842 /* true */ (1 << 20) | // OpenPunctuation = 20,
843 /* true */ (1 << 21) | // ClosePunctuation = 21,
844 /* true */ (1 << 22) | // InitialQuotePunctuation = 22,
845 /* true */ (1 << 23) | // FinalQuotePunctuation = 23,
846 /* true */ (1 << 24) | // OtherPunctuation = 24,
847 /* true */ (1 << 25) | // MathSymbol = 25,
848 /* true */ (1 << 26) | // CurrencySymbol = 26,
849 /* true */ (1 << 27) | // ModifierSymbol = 27,
850 /* true */ (1 << 28) | // OtherSymbol = 28,
851 /* false */ (0 << 29); // OtherNotAssigned = 29;
853 private static bool IsWordSeparator(UnicodeCategory category
)
855 return (c_wordSeparatorMask
& (1 << (int) category
)) != 0;
858 private static bool IsLetterCategory(UnicodeCategory uc
)
860 return (uc
== UnicodeCategory
.UppercaseLetter
861 || uc
== UnicodeCategory
.LowercaseLetter
862 || uc
== UnicodeCategory
.TitlecaseLetter
863 || uc
== UnicodeCategory
.ModifierLetter
864 || uc
== UnicodeCategory
.OtherLetter
);
867 // A dummy struct that is used for 'ToUpper' in generic parameters
868 private readonly struct ToUpperConversion { }
870 // A dummy struct that is used for 'ToLower' in generic parameters
871 private readonly struct ToLowerConversion { }