Fix IDE0025 (use expression body for properties)
[mono-project.git] / netcore / System.Private.CoreLib / shared / System / Globalization / TextInfo.cs
blob52ea35043dbe5f5b6dd5d2ad0a95dcb11fca7d79
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 using System.Diagnostics;
6 using System.Runtime.CompilerServices;
7 using System.Runtime.InteropServices;
8 using System.Runtime.Serialization;
9 using System.Text;
10 using System.Text.Unicode;
11 using Internal.Runtime.CompilerServices;
13 #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
14 #if BIT64
15 using nuint = System.UInt64;
16 using nint = System.Int64;
17 #else // BIT64
18 using nuint = System.UInt32;
19 using nint = System.Int32;
20 #endif // BIT64
22 namespace System.Globalization
24 /// <summary>
25 /// This Class defines behaviors specific to a writing system.
26 /// A writing system is the collection of scripts and orthographic rules
27 /// required to represent a language as text.
28 /// </summary>
29 public partial class TextInfo : ICloneable, IDeserializationCallback
31 private enum Tristate : byte
33 NotInitialized = 0,
34 False = 1,
35 True = 2
38 private string? _listSeparator;
39 private bool _isReadOnly = false;
41 private readonly string _cultureName;
42 private readonly CultureData _cultureData;
44 // // Name of the text info we're using (ie: _cultureData.TextInfoName)
45 private readonly string _textInfoName;
47 private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;
49 // Invariant text info
50 internal static TextInfo Invariant => s_invariant ??= new TextInfo(CultureData.Invariant);
52 private static volatile TextInfo? s_invariant;
54 internal TextInfo(CultureData cultureData)
56 // This is our primary data source, we don't need most of the rest of this
57 _cultureData = cultureData;
58 _cultureName = _cultureData.CultureName;
59 _textInfoName = _cultureData.TextInfoName;
61 FinishInitialization();
64 void IDeserializationCallback.OnDeserialization(object? sender)
66 throw new PlatformNotSupportedException();
69 public virtual int ANSICodePage => _cultureData.ANSICodePage;
71 public virtual int OEMCodePage => _cultureData.OEMCodePage;
73 public virtual int MacCodePage => _cultureData.MacCodePage;
75 public virtual int EBCDICCodePage => _cultureData.EBCDICCodePage;
77 // Just use the LCID from our text info name
78 public int LCID => CultureInfo.GetCultureInfo(_textInfoName).LCID;
80 public string CultureName => _textInfoName;
82 public bool IsReadOnly => _isReadOnly;
84 public virtual object Clone()
86 object o = MemberwiseClone();
87 ((TextInfo)o).SetReadOnlyState(false);
88 return o;
91 /// <summary>
92 /// Create a cloned readonly instance or return the input one if it is
93 /// readonly.
94 /// </summary>
95 public static TextInfo ReadOnly(TextInfo textInfo)
97 if (textInfo == null)
99 throw new ArgumentNullException(nameof(textInfo));
102 if (textInfo.IsReadOnly)
104 return textInfo;
107 TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
108 clonedTextInfo.SetReadOnlyState(true);
109 return clonedTextInfo;
112 private void VerifyWritable()
114 if (_isReadOnly)
116 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
120 internal void SetReadOnlyState(bool readOnly)
122 _isReadOnly = readOnly;
126 /// <summary>
127 /// Returns the string used to separate items in a list.
128 /// </summary>
129 public virtual string ListSeparator
131 get => _listSeparator ?? (_listSeparator = _cultureData.ListSeparator);
134 if (value == null)
136 throw new ArgumentNullException(nameof(value));
139 VerifyWritable();
140 _listSeparator = value;
144 /// <summary>
145 /// Converts the character or string to lower case. Certain locales
146 /// have different casing semantics from the file systems in Win32.
147 /// </summary>
148 public virtual char ToLower(char c)
150 if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
152 return ToLowerAsciiInvariant(c);
155 return ChangeCase(c, toUpper: false);
158 public virtual string ToLower(string str)
160 if (str == null)
162 throw new ArgumentNullException(nameof(str));
165 if (GlobalizationMode.Invariant)
167 return ToLowerAsciiInvariant(str);
170 return ChangeCaseCommon<ToLowerConversion>(str);
173 private unsafe char ChangeCase(char c, bool toUpper)
175 Debug.Assert(!GlobalizationMode.Invariant);
177 char dst = default;
178 ChangeCase(&c, 1, &dst, 1, toUpper);
179 return dst;
182 [MethodImpl(MethodImplOptions.AggressiveInlining)]
183 internal void ChangeCaseToLower(ReadOnlySpan<char> source, Span<char> destination)
185 Debug.Assert(destination.Length >= source.Length);
186 ChangeCaseCommon<ToLowerConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
189 [MethodImpl(MethodImplOptions.AggressiveInlining)]
190 internal void ChangeCaseToUpper(ReadOnlySpan<char> source, Span<char> destination)
192 Debug.Assert(destination.Length >= source.Length);
193 ChangeCaseCommon<ToUpperConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
196 [MethodImpl(MethodImplOptions.AggressiveInlining)]
197 private void ChangeCaseCommon<TConversion>(ReadOnlySpan<char> source, Span<char> destination) where TConversion : struct
199 Debug.Assert(destination.Length >= source.Length);
200 ChangeCaseCommon<TConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
203 private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
205 Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
206 bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
208 Debug.Assert(!GlobalizationMode.Invariant);
209 Debug.Assert(charCount >= 0);
211 if (charCount == 0)
213 goto Return;
216 fixed (char* pSource = &source)
217 fixed (char* pDestination = &destination)
219 nuint currIdx = 0; // in chars
221 if (IsAsciiCasingSameAsInvariant)
223 // Read 4 chars (two 32-bit integers) at a time
225 if (charCount >= 4)
227 nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4;
230 // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
231 // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
232 // the ASCII data, we expect that characters of either case might be about equally distributed, so
233 // we want the case change operation itself to be branchless. This gives optimal performance in the
234 // common case. We also expect that developers aren't passing very long (16+ character) strings into
235 // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.
237 uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
238 if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
240 goto NonAscii;
242 tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
243 Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
245 tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx + 2);
246 if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
248 goto NonAsciiSkipTwoChars;
250 tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
251 Unsafe.WriteUnaligned<uint>(pDestination + currIdx + 2, tempValue);
252 currIdx += 4;
253 } while (currIdx <= lastIndexWhereCanReadFourChars);
255 // At this point, there are fewer than 4 characters remaining to convert.
256 Debug.Assert((uint)charCount - currIdx < 4);
259 // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
260 if ((charCount & 2) != 0)
262 uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
263 if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
265 goto NonAscii;
267 tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
268 Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
269 currIdx += 2;
272 // If there's a single character left to convert, do it now.
273 if ((charCount & 1) != 0)
275 uint tempValue = pSource[currIdx];
276 if (tempValue > 0x7Fu)
278 goto NonAscii;
280 tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
281 pDestination[currIdx] = (char)tempValue;
284 // And we're finished!
286 goto Return;
288 // If we reached this point, we found non-ASCII data.
289 // Fall back down the p/invoke code path.
291 NonAsciiSkipTwoChars:
292 currIdx += 2;
294 NonAscii:
295 Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer.");
296 charCount -= (int)currIdx;
299 // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
300 // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
301 // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).
303 ChangeCase(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper);
306 Return:
307 return;
310 private unsafe string ChangeCaseCommon<TConversion>(string source) where TConversion : struct
312 Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
313 bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
315 Debug.Assert(!GlobalizationMode.Invariant);
316 Debug.Assert(source != null);
318 // If the string is empty, we're done.
319 if (source.Length == 0)
321 return string.Empty;
324 fixed (char* pSource = source)
326 nuint currIdx = 0; // in chars
328 // If this culture's casing for ASCII is the same as invariant, try to take
329 // a fast path that'll work in managed code and ASCII rather than calling out
330 // to the OS for culture-aware casing.
331 if (IsAsciiCasingSameAsInvariant)
333 // Read 2 chars (one 32-bit integer) at a time
335 if (source.Length >= 2)
337 nuint lastIndexWhereCanReadTwoChars = (uint)source.Length - 2;
340 // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.
342 uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
343 if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
345 goto NotAscii;
347 if ((toUpper) ? Utf16Utility.UInt32ContainsAnyLowercaseAsciiChar(tempValue) : Utf16Utility.UInt32ContainsAnyUppercaseAsciiChar(tempValue))
349 goto AsciiMustChangeCase;
352 currIdx += 2;
353 } while (currIdx <= lastIndexWhereCanReadTwoChars);
356 // If there's a single character left to convert, do it now.
357 if ((source.Length & 1) != 0)
359 uint tempValue = pSource[currIdx];
360 if (tempValue > 0x7Fu)
362 goto NotAscii;
364 if ((toUpper) ? ((tempValue - 'a') <= (uint)('z' - 'a')) : ((tempValue - 'A') <= (uint)('Z' - 'A')))
366 goto AsciiMustChangeCase;
370 // We got through all characters without finding anything that needed to change - done!
371 return source;
373 AsciiMustChangeCase:
375 // We reached ASCII data that requires a case change.
376 // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
377 // conversion code path if we can.
379 string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
381 // copy existing known-good data into the result
382 Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
383 source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
385 // and re-run the fast span-based logic over the remainder of the data
386 ChangeCaseCommon<TConversion>(source.AsSpan((int)currIdx), resultSpan.Slice((int)currIdx));
387 return result;
391 NotAscii:
393 // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
394 // In either case we need to fall back to the localization tables.
396 string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
398 if (currIdx > 0)
400 // copy existing known-good data into the result
401 Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
402 source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
405 // and run the culture-aware logic over the remainder of the data
406 fixed (char* pResult = result)
408 ChangeCase(pSource + currIdx, source.Length - (int)currIdx, pResult + currIdx, result.Length - (int)currIdx, toUpper);
410 return result;
415 internal static unsafe string ToLowerAsciiInvariant(string s)
417 if (s.Length == 0)
419 return string.Empty;
422 fixed (char* pSource = s)
424 int i = 0;
425 while (i < s.Length)
427 if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A'))
429 break;
431 i++;
434 if (i >= s.Length)
436 return s;
439 string result = string.FastAllocateString(s.Length);
440 fixed (char* pResult = result)
442 for (int j = 0; j < i; j++)
444 pResult[j] = pSource[j];
447 pResult[i] = (char)(pSource[i] | 0x20);
448 i++;
450 while (i < s.Length)
452 pResult[i] = ToLowerAsciiInvariant(pSource[i]);
453 i++;
457 return result;
461 internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
463 Debug.Assert(destination.Length >= source.Length);
465 for (int i = 0; i < source.Length; i++)
467 destination[i] = ToLowerAsciiInvariant(source[i]);
471 private static unsafe string ToUpperAsciiInvariant(string s)
473 if (s.Length == 0)
475 return string.Empty;
478 fixed (char* pSource = s)
480 int i = 0;
481 while (i < s.Length)
483 if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a'))
485 break;
487 i++;
490 if (i >= s.Length)
492 return s;
495 string result = string.FastAllocateString(s.Length);
496 fixed (char* pResult = result)
498 for (int j = 0; j < i; j++)
500 pResult[j] = pSource[j];
503 pResult[i] = (char)(pSource[i] & ~0x20);
504 i++;
506 while (i < s.Length)
508 pResult[i] = ToUpperAsciiInvariant(pSource[i]);
509 i++;
513 return result;
517 internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
519 Debug.Assert(destination.Length >= source.Length);
521 for (int i = 0; i < source.Length; i++)
523 destination[i] = ToUpperAsciiInvariant(source[i]);
527 private static char ToLowerAsciiInvariant(char c)
529 if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
531 c = (char)(c | 0x20);
533 return c;
536 /// <summary>
537 /// Converts the character or string to upper case. Certain locales
538 /// have different casing semantics from the file systems in Win32.
539 /// </summary>
540 public virtual char ToUpper(char c)
542 if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
544 return ToUpperAsciiInvariant(c);
547 return ChangeCase(c, toUpper: true);
550 public virtual string ToUpper(string str)
552 if (str == null)
554 throw new ArgumentNullException(nameof(str));
557 if (GlobalizationMode.Invariant)
559 return ToUpperAsciiInvariant(str);
562 return ChangeCaseCommon<ToUpperConversion>(str);
565 internal static char ToUpperAsciiInvariant(char c)
567 if ((uint)(c - 'a') <= (uint)('z' - 'a'))
569 c = (char)(c & ~0x20);
571 return c;
574 private static bool IsAscii(char c) => c < 0x80;
576 private bool IsAsciiCasingSameAsInvariant
578 [MethodImpl(MethodImplOptions.AggressiveInlining)]
581 if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
583 PopulateIsAsciiCasingSameAsInvariant();
586 Debug.Assert(_isAsciiCasingSameAsInvariant == Tristate.True || _isAsciiCasingSameAsInvariant == Tristate.False);
587 return _isAsciiCasingSameAsInvariant == Tristate.True;
591 [MethodImpl(MethodImplOptions.NoInlining)]
592 private void PopulateIsAsciiCasingSameAsInvariant()
594 bool compareResult = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0;
595 _isAsciiCasingSameAsInvariant = (compareResult) ? Tristate.True : Tristate.False;
598 /// <summary>
599 /// Returns true if the dominant direction of text and UI such as the
600 /// relative position of buttons and scroll bars
601 /// </summary>
602 public bool IsRightToLeft => _cultureData.IsRightToLeft;
604 public override bool Equals(object? obj)
606 return obj is TextInfo otherTextInfo
607 && CultureName.Equals(otherTextInfo.CultureName);
610 public override int GetHashCode() => CultureName.GetHashCode();
612 public override string ToString()
614 return "TextInfo - " + _cultureData.CultureName;
617 /// <summary>
618 /// Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
619 /// and the rest of the letters are lowercase. The choice of which words to titlecase in headings
620 /// and titles is dependent on language and local conventions. For example, "The Merry Wives of Windor"
621 /// is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
622 /// In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
623 /// are not titlecased. In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
625 /// Moreover, the determination of what actually constitutes a word is language dependent, and this can
626 /// influence which letter or letters of a "word" are uppercased when titlecasing strings. For example
627 /// "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
628 /// </summary>
629 public unsafe string ToTitleCase(string str)
631 if (str == null)
633 throw new ArgumentNullException(nameof(str));
636 if (str.Length == 0)
638 return str;
641 StringBuilder result = new StringBuilder();
642 string? lowercaseData = null;
643 // Store if the current culture is Dutch (special case)
644 bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);
646 for (int i = 0; i < str.Length; i++)
648 int charLen;
649 UnicodeCategory charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
650 if (char.CheckLetter(charType))
652 // Special case to check for Dutch specific titlecasing with "IJ" characters
653 // at the beginning of a word
654 if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i+1] == 'j' || str[i+1] == 'J'))
656 result.Append("IJ");
657 i += 2;
659 else
661 // Do the titlecasing for the first character of the word.
662 i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
665 // Convert the characters until the end of the this word
666 // to lowercase.
667 int lowercaseStart = i;
669 // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
670 // This is in line with Word 2000 behavior of titlecasing.
671 bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
673 // Use a loop to find all of the other letters following this letter.
674 while (i < str.Length)
676 charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
677 if (IsLetterCategory(charType))
679 if (charType == UnicodeCategory.LowercaseLetter)
681 hasLowerCase = true;
683 i += charLen;
685 else if (str[i] == '\'')
687 i++;
688 if (hasLowerCase)
690 if (lowercaseData == null)
692 lowercaseData = ToLower(str);
694 result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
696 else
698 result.Append(str, lowercaseStart, i - lowercaseStart);
700 lowercaseStart = i;
701 hasLowerCase = true;
703 else if (!IsWordSeparator(charType))
705 // This category is considered to be part of the word.
706 // This is any category that is marked as false in wordSeprator array.
707 i+= charLen;
709 else
711 // A word separator. Break out of the loop.
712 break;
716 int count = i - lowercaseStart;
718 if (count > 0)
720 if (hasLowerCase)
722 if (lowercaseData == null)
724 lowercaseData = ToLower(str);
726 result.Append(lowercaseData, lowercaseStart, count);
728 else
730 result.Append(str, lowercaseStart, count);
734 if (i < str.Length)
736 // not a letter, just append it
737 i = AddNonLetter(ref result, ref str, i, charLen);
740 else
742 // not a letter, just append it
743 i = AddNonLetter(ref result, ref str, i, charLen);
746 return result.ToString();
749 private static int AddNonLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
751 Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
752 if (charLen == 2)
754 // Surrogate pair
755 result.Append(input[inputIndex++]);
756 result.Append(input[inputIndex]);
758 else
760 result.Append(input[inputIndex]);
762 return inputIndex;
765 private int AddTitlecaseLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
767 Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
769 if (charLen == 2)
771 // for surrogate pairs do a ToUpper operation on the substring
772 ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2);
773 if (GlobalizationMode.Invariant)
775 result.Append(src); // surrogate pair in invariant mode, so changing case is a nop
777 else
779 Span<char> dst = stackalloc char[2];
780 ChangeCaseToUpper(src, dst);
781 result.Append(dst);
783 inputIndex++;
785 else
787 switch (input[inputIndex])
789 // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
790 case (char) 0x01C4: // DZ with Caron -> Dz with Caron
791 case (char) 0x01C5: // Dz with Caron -> Dz with Caron
792 case (char) 0x01C6: // dz with Caron -> Dz with Caron
793 result.Append((char) 0x01C5);
794 break;
795 case (char) 0x01C7: // LJ -> Lj
796 case (char) 0x01C8: // Lj -> Lj
797 case (char) 0x01C9: // lj -> Lj
798 result.Append((char) 0x01C8);
799 break;
800 case (char) 0x01CA: // NJ -> Nj
801 case (char) 0x01CB: // Nj -> Nj
802 case (char) 0x01CC: // nj -> Nj
803 result.Append((char) 0x01CB);
804 break;
805 case (char) 0x01F1: // DZ -> Dz
806 case (char) 0x01F2: // Dz -> Dz
807 case (char) 0x01F3: // dz -> Dz
808 result.Append((char) 0x01F2);
809 break;
810 default:
811 result.Append(ToUpper(input[inputIndex]));
812 break;
815 return inputIndex;
818 // Used in ToTitleCase():
819 // When we find a starting letter, the following array decides if a category should be
820 // considered as word seprator or not.
821 private const int c_wordSeparatorMask =
822 /* false */ (0 << 0) | // UppercaseLetter = 0,
823 /* false */ (0 << 1) | // LowercaseLetter = 1,
824 /* false */ (0 << 2) | // TitlecaseLetter = 2,
825 /* false */ (0 << 3) | // ModifierLetter = 3,
826 /* false */ (0 << 4) | // OtherLetter = 4,
827 /* false */ (0 << 5) | // NonSpacingMark = 5,
828 /* false */ (0 << 6) | // SpacingCombiningMark = 6,
829 /* false */ (0 << 7) | // EnclosingMark = 7,
830 /* false */ (0 << 8) | // DecimalDigitNumber = 8,
831 /* false */ (0 << 9) | // LetterNumber = 9,
832 /* false */ (0 << 10) | // OtherNumber = 10,
833 /* true */ (1 << 11) | // SpaceSeparator = 11,
834 /* true */ (1 << 12) | // LineSeparator = 12,
835 /* true */ (1 << 13) | // ParagraphSeparator = 13,
836 /* true */ (1 << 14) | // Control = 14,
837 /* true */ (1 << 15) | // Format = 15,
838 /* false */ (0 << 16) | // Surrogate = 16,
839 /* false */ (0 << 17) | // PrivateUse = 17,
840 /* true */ (1 << 18) | // ConnectorPunctuation = 18,
841 /* true */ (1 << 19) | // DashPunctuation = 19,
842 /* true */ (1 << 20) | // OpenPunctuation = 20,
843 /* true */ (1 << 21) | // ClosePunctuation = 21,
844 /* true */ (1 << 22) | // InitialQuotePunctuation = 22,
845 /* true */ (1 << 23) | // FinalQuotePunctuation = 23,
846 /* true */ (1 << 24) | // OtherPunctuation = 24,
847 /* true */ (1 << 25) | // MathSymbol = 25,
848 /* true */ (1 << 26) | // CurrencySymbol = 26,
849 /* true */ (1 << 27) | // ModifierSymbol = 27,
850 /* true */ (1 << 28) | // OtherSymbol = 28,
851 /* false */ (0 << 29); // OtherNotAssigned = 29;
853 private static bool IsWordSeparator(UnicodeCategory category)
855 return (c_wordSeparatorMask & (1 << (int) category)) != 0;
858 private static bool IsLetterCategory(UnicodeCategory uc)
860 return (uc == UnicodeCategory.UppercaseLetter
861 || uc == UnicodeCategory.LowercaseLetter
862 || uc == UnicodeCategory.TitlecaseLetter
863 || uc == UnicodeCategory.ModifierLetter
864 || uc == UnicodeCategory.OtherLetter);
867 // A dummy struct that is used for 'ToUpper' in generic parameters
868 private readonly struct ToUpperConversion { }
870 // A dummy struct that is used for 'ToLower' in generic parameters
871 private readonly struct ToLowerConversion { }