netcore/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 using System.Diagnostics;
   6 using System.Runtime.CompilerServices;
   7 using System.Runtime.InteropServices;
   8 using System.Runtime.Serialization;
   9 using System.Text;
  10 using System.Text.Unicode;
  11 using Internal.Runtime.CompilerServices;
  12
  13 #pragma warning disable SA1121 // explicitly using type aliases instead of built-in types
  14 #if BIT64
  15 using nuint = System.UInt64;
  16 using nint = System.Int64;
  17 #else // BIT64
  18 using nuint = System.UInt32;
  19 using nint = System.Int32;
  20 #endif // BIT64
  21
  22 namespace System.Globalization
  23 {
  24     /// <summary>
  25     /// This Class defines behaviors specific to a writing system.
  26     /// A writing system is the collection of scripts and orthographic rules
  27     /// required to represent a language as text.
  28     /// </summary>
  29     public partial class TextInfo : ICloneable, IDeserializationCallback
  30     {
  31         private enum Tristate : byte
  32         {
  33             NotInitialized = 0,
  34             False = 1,
  35             True = 2
  36         }
  37
  38         private string? _listSeparator;
  39         private bool _isReadOnly = false;
  40
  41         private readonly string _cultureName;
  42         private readonly CultureData _cultureData;
  43
  44         // // Name of the text info we're using (ie: _cultureData.TextInfoName)
  45         private readonly string _textInfoName;
  46
  47         private Tristate _isAsciiCasingSameAsInvariant = Tristate.NotInitialized;
  48
  49         // Invariant text info
  50         internal static TextInfo Invariant => s_invariant ??= new TextInfo(CultureData.Invariant);
  51
  52         private static volatile TextInfo? s_invariant;
  53
  54         internal TextInfo(CultureData cultureData)
  55         {
  56             // This is our primary data source, we don't need most of the rest of this
  57             _cultureData = cultureData;
  58             _cultureName = _cultureData.CultureName;
  59             _textInfoName = _cultureData.TextInfoName;
  60
  61             FinishInitialization();
  62         }
  63
  64         void IDeserializationCallback.OnDeserialization(object? sender)
  65         {
  66             throw new PlatformNotSupportedException();
  67         }
  68
  69         public virtual int ANSICodePage => _cultureData.ANSICodePage;
  70
  71         public virtual int OEMCodePage => _cultureData.OEMCodePage;
  72
  73         public virtual int MacCodePage => _cultureData.MacCodePage;
  74
  75         public virtual int EBCDICCodePage => _cultureData.EBCDICCodePage;
  76
  77         // Just use the LCID from our text info name
  78         public int LCID => CultureInfo.GetCultureInfo(_textInfoName).LCID;
  79
  80         public string CultureName => _textInfoName;
  81
  82         public bool IsReadOnly => _isReadOnly;
  83
  84         public virtual object Clone()
  85         {
  86             object o = MemberwiseClone();
  87             ((TextInfo)o).SetReadOnlyState(false);
  88             return o;
  89         }
  90
  91         /// <summary>
  92         /// Create a cloned readonly instance or return the input one if it is
  93         /// readonly.
  94         /// </summary>
  95         public static TextInfo ReadOnly(TextInfo textInfo)
  96         {
  97             if (textInfo == null)
  98             {
  99                 throw new ArgumentNullException(nameof(textInfo));
 100             }
 101
 102             if (textInfo.IsReadOnly)
 103             {
 104                 return textInfo;
 105             }
 106
 107             TextInfo clonedTextInfo = (TextInfo)(textInfo.MemberwiseClone());
 108             clonedTextInfo.SetReadOnlyState(true);
 109             return clonedTextInfo;
 110         }
 111
 112         private void VerifyWritable()
 113         {
 114             if (_isReadOnly)
 115             {
 116                 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
 117             }
 118         }
 119
 120         internal void SetReadOnlyState(bool readOnly)
 121         {
 122             _isReadOnly = readOnly;
 123         }
 124
 125
 126         /// <summary>
 127         /// Returns the string used to separate items in a list.
 128         /// </summary>
 129         public virtual string ListSeparator
 130         {
 131             get => _listSeparator ?? (_listSeparator = _cultureData.ListSeparator);
 132             set
 133             {
 134                 if (value == null)
 135                 {
 136                     throw new ArgumentNullException(nameof(value));
 137                 }
 138
 139                 VerifyWritable();
 140                 _listSeparator = value;
 141             }
 142         }
 143
 144         /// <summary>
 145         /// Converts the character or string to lower case.  Certain locales
 146         /// have different casing semantics from the file systems in Win32.
 147         /// </summary>
 148         public virtual char ToLower(char c)
 149         {
 150             if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
 151             {
 152                 return ToLowerAsciiInvariant(c);
 153             }
 154
 155             return ChangeCase(c, toUpper: false);
 156         }
 157
 158         public virtual string ToLower(string str)
 159         {
 160             if (str == null)
 161             {
 162                 throw new ArgumentNullException(nameof(str));
 163             }
 164
 165             if (GlobalizationMode.Invariant)
 166             {
 167                 return ToLowerAsciiInvariant(str);
 168             }
 169
 170             return ChangeCaseCommon<ToLowerConversion>(str);
 171         }
 172
 173         private unsafe char ChangeCase(char c, bool toUpper)
 174         {
 175             Debug.Assert(!GlobalizationMode.Invariant);
 176
 177             char dst = default;
 178             ChangeCase(&c, 1, &dst, 1, toUpper);
 179             return dst;
 180         }
 181
 182         [MethodImpl(MethodImplOptions.AggressiveInlining)]
 183         internal void ChangeCaseToLower(ReadOnlySpan<char> source, Span<char> destination)
 184         {
 185             Debug.Assert(destination.Length >= source.Length);
 186             ChangeCaseCommon<ToLowerConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
 187         }
 188
 189         [MethodImpl(MethodImplOptions.AggressiveInlining)]
 190         internal void ChangeCaseToUpper(ReadOnlySpan<char> source, Span<char> destination)
 191         {
 192             Debug.Assert(destination.Length >= source.Length);
 193             ChangeCaseCommon<ToUpperConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
 194         }
 195
 196         [MethodImpl(MethodImplOptions.AggressiveInlining)]
 197         private void ChangeCaseCommon<TConversion>(ReadOnlySpan<char> source, Span<char> destination) where TConversion : struct
 198         {
 199             Debug.Assert(destination.Length >= source.Length);
 200             ChangeCaseCommon<TConversion>(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length);
 201         }
 202
 203         private unsafe void ChangeCaseCommon<TConversion>(ref char source, ref char destination, int charCount) where TConversion : struct
 204         {
 205             Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
 206             bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
 207
 208             Debug.Assert(!GlobalizationMode.Invariant);
 209             Debug.Assert(charCount >= 0);
 210
 211             if (charCount == 0)
 212             {
 213                 goto Return;
 214             }
 215
 216             fixed (char* pSource = &source)
 217             fixed (char* pDestination = &destination)
 218             {
 219                 nuint currIdx = 0; // in chars
 220
 221                 if (IsAsciiCasingSameAsInvariant)
 222                 {
 223                     // Read 4 chars (two 32-bit integers) at a time
 224
 225                     if (charCount >= 4)
 226                     {
 227                         nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4;
 228                         do
 229                         {
 230                             // This is a mostly branchless case change routine. Generally speaking, we assume that the majority
 231                             // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within
 232                             // the ASCII data, we expect that characters of either case might be about equally distributed, so
 233                             // we want the case change operation itself to be branchless. This gives optimal performance in the
 234                             // common case. We also expect that developers aren't passing very long (16+ character) strings into
 235                             // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so.
 236
 237                             uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
 238                             if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
 239                             {
 240                                 goto NonAscii;
 241                             }
 242                             tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
 243                             Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
 244
 245                             tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx + 2);
 246                             if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
 247                             {
 248                                 goto NonAsciiSkipTwoChars;
 249                             }
 250                             tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
 251                             Unsafe.WriteUnaligned<uint>(pDestination + currIdx + 2, tempValue);
 252                             currIdx += 4;
 253                         } while (currIdx <= lastIndexWhereCanReadFourChars);
 254
 255                         // At this point, there are fewer than 4 characters remaining to convert.
 256                         Debug.Assert((uint)charCount - currIdx < 4);
 257                     }
 258
 259                     // If there are 2 or 3 characters left to convert, we'll convert 2 of them now.
 260                     if ((charCount & 2) != 0)
 261                     {
 262                         uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
 263                         if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
 264                         {
 265                             goto NonAscii;
 266                         }
 267                         tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
 268                         Unsafe.WriteUnaligned<uint>(pDestination + currIdx, tempValue);
 269                         currIdx += 2;
 270                     }
 271
 272                     // If there's a single character left to convert, do it now.
 273                     if ((charCount & 1) != 0)
 274                     {
 275                         uint tempValue = pSource[currIdx];
 276                         if (tempValue > 0x7Fu)
 277                         {
 278                             goto NonAscii;
 279                         }
 280                         tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue);
 281                         pDestination[currIdx] = (char)tempValue;
 282                     }
 283
 284                     // And we're finished!
 285
 286                     goto Return;
 287
 288                 // If we reached this point, we found non-ASCII data.
 289                 // Fall back down the p/invoke code path.
 290
 291                 NonAsciiSkipTwoChars:
 292                     currIdx += 2;
 293
 294                 NonAscii:
 295                     Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer.");
 296                     charCount -= (int)currIdx;
 297                 }
 298
 299                 // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture
 300                 // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts
 301                 // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)).
 302
 303                 ChangeCase(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper);
 304             }
 305
 306         Return:
 307             return;
 308         }
 309
 310         private unsafe string ChangeCaseCommon<TConversion>(string source) where TConversion : struct
 311         {
 312             Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion));
 313             bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds
 314
 315             Debug.Assert(!GlobalizationMode.Invariant);
 316             Debug.Assert(source != null);
 317
 318             // If the string is empty, we're done.
 319             if (source.Length == 0)
 320             {
 321                 return string.Empty;
 322             }
 323
 324             fixed (char* pSource = source)
 325             {
 326                 nuint currIdx = 0; // in chars
 327
 328                 // If this culture's casing for ASCII is the same as invariant, try to take
 329                 // a fast path that'll work in managed code and ASCII rather than calling out
 330                 // to the OS for culture-aware casing.
 331                 if (IsAsciiCasingSameAsInvariant)
 332                 {
 333                     // Read 2 chars (one 32-bit integer) at a time
 334
 335                     if (source.Length >= 2)
 336                     {
 337                         nuint lastIndexWhereCanReadTwoChars = (uint)source.Length - 2;
 338                         do
 339                         {
 340                             // See the comments in ChangeCaseCommon<TConversion>(ROS<char>, Span<char>) for a full explanation of the below code.
 341
 342                             uint tempValue = Unsafe.ReadUnaligned<uint>(pSource + currIdx);
 343                             if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue))
 344                             {
 345                                 goto NotAscii;
 346                             }
 347                             if ((toUpper) ? Utf16Utility.UInt32ContainsAnyLowercaseAsciiChar(tempValue) : Utf16Utility.UInt32ContainsAnyUppercaseAsciiChar(tempValue))
 348                             {
 349                                 goto AsciiMustChangeCase;
 350                             }
 351
 352                             currIdx += 2;
 353                         } while (currIdx <= lastIndexWhereCanReadTwoChars);
 354                     }
 355
 356                     // If there's a single character left to convert, do it now.
 357                     if ((source.Length & 1) != 0)
 358                     {
 359                         uint tempValue = pSource[currIdx];
 360                         if (tempValue > 0x7Fu)
 361                         {
 362                             goto NotAscii;
 363                         }
 364                         if ((toUpper) ? ((tempValue - 'a') <= (uint)('z' - 'a')) : ((tempValue - 'A') <= (uint)('Z' - 'A')))
 365                         {
 366                             goto AsciiMustChangeCase;
 367                         }
 368                     }
 369
 370                     // We got through all characters without finding anything that needed to change - done!
 371                     return source;
 372
 373                 AsciiMustChangeCase:
 374                     {
 375                         // We reached ASCII data that requires a case change.
 376                         // This will necessarily allocate a new string, but let's try to stay within the managed (non-localization tables)
 377                         // conversion code path if we can.
 378
 379                         string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
 380
 381                         // copy existing known-good data into the result
 382                         Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
 383                         source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
 384
 385                         // and re-run the fast span-based logic over the remainder of the data
 386                         ChangeCaseCommon<TConversion>(source.AsSpan((int)currIdx), resultSpan.Slice((int)currIdx));
 387                         return result;
 388                     }
 389                 }
 390
 391             NotAscii:
 392                 {
 393                     // We reached non-ASCII data *or* the requested culture doesn't map ASCII data the same way as the invariant culture.
 394                     // In either case we need to fall back to the localization tables.
 395
 396                     string result = string.FastAllocateString(source.Length); // changing case uses simple folding: doesn't change UTF-16 code unit count
 397
 398                     if (currIdx > 0)
 399                     {
 400                         // copy existing known-good data into the result
 401                         Span<char> resultSpan = new Span<char>(ref result.GetRawStringData(), result.Length);
 402                         source.AsSpan(0, (int)currIdx).CopyTo(resultSpan);
 403                     }
 404
 405                     // and run the culture-aware logic over the remainder of the data
 406                     fixed (char* pResult = result)
 407                     {
 408                         ChangeCase(pSource + currIdx, source.Length - (int)currIdx, pResult + currIdx, result.Length - (int)currIdx, toUpper);
 409                     }
 410                     return result;
 411                 }
 412             }
 413         }
 414
 415         internal static unsafe string ToLowerAsciiInvariant(string s)
 416         {
 417             if (s.Length == 0)
 418             {
 419                 return string.Empty;
 420             }
 421
 422             fixed (char* pSource = s)
 423             {
 424                 int i = 0;
 425                 while (i < s.Length)
 426                 {
 427                     if ((uint)(pSource[i] - 'A') <= (uint)('Z' - 'A'))
 428                     {
 429                         break;
 430                     }
 431                     i++;
 432                 }
 433
 434                 if (i >= s.Length)
 435                 {
 436                     return s;
 437                 }
 438
 439                 string result = string.FastAllocateString(s.Length);
 440                 fixed (char* pResult = result)
 441                 {
 442                     for (int j = 0; j < i; j++)
 443                     {
 444                         pResult[j] = pSource[j];
 445                     }
 446
 447                     pResult[i] = (char)(pSource[i] | 0x20);
 448                     i++;
 449
 450                     while (i < s.Length)
 451                     {
 452                         pResult[i] = ToLowerAsciiInvariant(pSource[i]);
 453                         i++;
 454                     }
 455                 }
 456
 457                 return result;
 458             }
 459         }
 460
 461         internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
 462         {
 463             Debug.Assert(destination.Length >= source.Length);
 464
 465             for (int i = 0; i < source.Length; i++)
 466             {
 467                 destination[i] = ToLowerAsciiInvariant(source[i]);
 468             }
 469         }
 470
 471         private static unsafe string ToUpperAsciiInvariant(string s)
 472         {
 473             if (s.Length == 0)
 474             {
 475                 return string.Empty;
 476             }
 477
 478             fixed (char* pSource = s)
 479             {
 480                 int i = 0;
 481                 while (i < s.Length)
 482                 {
 483                     if ((uint)(pSource[i] - 'a') <= (uint)('z' - 'a'))
 484                     {
 485                         break;
 486                     }
 487                     i++;
 488                 }
 489
 490                 if (i >= s.Length)
 491                 {
 492                     return s;
 493                 }
 494
 495                 string result = string.FastAllocateString(s.Length);
 496                 fixed (char* pResult = result)
 497                 {
 498                     for (int j = 0; j < i; j++)
 499                     {
 500                         pResult[j] = pSource[j];
 501                     }
 502
 503                     pResult[i] = (char)(pSource[i] & ~0x20);
 504                     i++;
 505
 506                     while (i < s.Length)
 507                     {
 508                         pResult[i] = ToUpperAsciiInvariant(pSource[i]);
 509                         i++;
 510                     }
 511                 }
 512
 513                 return result;
 514             }
 515         }
 516
 517         internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
 518         {
 519             Debug.Assert(destination.Length >= source.Length);
 520
 521             for (int i = 0; i < source.Length; i++)
 522             {
 523                 destination[i] = ToUpperAsciiInvariant(source[i]);
 524             }
 525         }
 526
 527         private static char ToLowerAsciiInvariant(char c)
 528         {
 529             if ((uint)(c - 'A') <= (uint)('Z' - 'A'))
 530             {
 531                 c = (char)(c | 0x20);
 532             }
 533             return c;
 534         }
 535
 536         /// <summary>
 537         /// Converts the character or string to upper case.  Certain locales
 538         /// have different casing semantics from the file systems in Win32.
 539         /// </summary>
 540         public virtual char ToUpper(char c)
 541         {
 542             if (GlobalizationMode.Invariant || (IsAscii(c) && IsAsciiCasingSameAsInvariant))
 543             {
 544                 return ToUpperAsciiInvariant(c);
 545             }
 546
 547             return ChangeCase(c, toUpper: true);
 548         }
 549
 550         public virtual string ToUpper(string str)
 551         {
 552             if (str == null)
 553             {
 554                 throw new ArgumentNullException(nameof(str));
 555             }
 556
 557             if (GlobalizationMode.Invariant)
 558             {
 559                 return ToUpperAsciiInvariant(str);
 560             }
 561
 562             return ChangeCaseCommon<ToUpperConversion>(str);
 563         }
 564
 565         internal static char ToUpperAsciiInvariant(char c)
 566         {
 567             if ((uint)(c - 'a') <= (uint)('z' - 'a'))
 568             {
 569                 c = (char)(c & ~0x20);
 570             }
 571             return c;
 572         }
 573
 574         private static bool IsAscii(char c) => c < 0x80;
 575
 576         private bool IsAsciiCasingSameAsInvariant
 577         {
 578             [MethodImpl(MethodImplOptions.AggressiveInlining)]
 579             get
 580             {
 581                 if (_isAsciiCasingSameAsInvariant == Tristate.NotInitialized)
 582                 {
 583                     PopulateIsAsciiCasingSameAsInvariant();
 584                 }
 585
 586                 Debug.Assert(_isAsciiCasingSameAsInvariant == Tristate.True || _isAsciiCasingSameAsInvariant == Tristate.False);
 587                 return _isAsciiCasingSameAsInvariant == Tristate.True;
 588             }
 589         }
 590
 591         [MethodImpl(MethodImplOptions.NoInlining)]
 592         private void PopulateIsAsciiCasingSameAsInvariant()
 593         {
 594             bool compareResult = CultureInfo.GetCultureInfo(_textInfoName).CompareInfo.Compare("abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", CompareOptions.IgnoreCase) == 0;
 595             _isAsciiCasingSameAsInvariant = (compareResult) ? Tristate.True : Tristate.False;
 596         }
 597
 598         /// <summary>
 599         /// Returns true if the dominant direction of text and UI such as the
 600         /// relative position of buttons and scroll bars
 601         /// </summary>
 602         public bool IsRightToLeft => _cultureData.IsRightToLeft;
 603
 604         public override bool Equals(object? obj)
 605         {
 606             return obj is TextInfo otherTextInfo
 607                 && CultureName.Equals(otherTextInfo.CultureName);
 608         }
 609
 610         public override int GetHashCode() => CultureName.GetHashCode();
 611
 612         public override string ToString()
 613         {
 614             return "TextInfo - " + _cultureData.CultureName;
 615         }
 616
 617         /// <summary>
 618         /// Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
 619         /// and the rest of the letters are lowercase.  The choice of which words to titlecase in headings
 620         /// and titles is dependent on language and local conventions.  For example, "The Merry Wives of Windor"
 621         /// is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
 622         /// In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
 623         /// are not titlecased.  In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
 624         ///
 625         /// Moreover, the determination of what actually constitutes a word is language dependent, and this can
 626         /// influence which letter or letters of a "word" are uppercased when titlecasing strings.  For example
 627         /// "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
 628         /// </summary>
 629         public unsafe string ToTitleCase(string str)
 630         {
 631             if (str == null)
 632             {
 633                 throw new ArgumentNullException(nameof(str));
 634             }
 635
 636             if (str.Length == 0)
 637             {
 638                 return str;
 639             }
 640
 641             StringBuilder result = new StringBuilder();
 642             string? lowercaseData = null;
 643             // Store if the current culture is Dutch (special case)
 644             bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);
 645
 646             for (int i = 0; i < str.Length; i++)
 647             {
 648                 int charLen;
 649                 UnicodeCategory charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
 650                 if (char.CheckLetter(charType))
 651                 {
 652                     // Special case to check for Dutch specific titlecasing with "IJ" characters
 653                     // at the beginning of a word
 654                     if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i+1] == 'j' || str[i+1] == 'J'))
 655                     {
 656                         result.Append("IJ");
 657                         i += 2;
 658                     }
 659                     else
 660                     {
 661                         // Do the titlecasing for the first character of the word.
 662                         i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
 663                     }
 664
 665                     // Convert the characters until the end of the this word
 666                     // to lowercase.
 667                     int lowercaseStart = i;
 668
 669                     // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
 670                     // This is in line with Word 2000 behavior of titlecasing.
 671                     bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
 672
 673                     // Use a loop to find all of the other letters following this letter.
 674                     while (i < str.Length)
 675                     {
 676                         charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
 677                         if (IsLetterCategory(charType))
 678                         {
 679                             if (charType == UnicodeCategory.LowercaseLetter)
 680                             {
 681                                 hasLowerCase = true;
 682                             }
 683                             i += charLen;
 684                         }
 685                         else if (str[i] == '\'')
 686                         {
 687                             i++;
 688                             if (hasLowerCase)
 689                             {
 690                                 if (lowercaseData == null)
 691                                 {
 692                                     lowercaseData = ToLower(str);
 693                                 }
 694                                 result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
 695                             }
 696                             else
 697                             {
 698                                 result.Append(str, lowercaseStart, i - lowercaseStart);
 699                             }
 700                             lowercaseStart = i;
 701                             hasLowerCase = true;
 702                         }
 703                         else if (!IsWordSeparator(charType))
 704                         {
 705                             // This category is considered to be part of the word.
 706                             // This is any category that is marked as false in wordSeprator array.
 707                             i+= charLen;
 708                         }
 709                         else
 710                         {
 711                             // A word separator. Break out of the loop.
 712                             break;
 713                         }
 714                     }
 715
 716                     int count = i - lowercaseStart;
 717
 718                     if (count > 0)
 719                     {
 720                         if (hasLowerCase)
 721                         {
 722                             if (lowercaseData == null)
 723                             {
 724                                 lowercaseData = ToLower(str);
 725                             }
 726                             result.Append(lowercaseData, lowercaseStart, count);
 727                         }
 728                         else
 729                         {
 730                             result.Append(str, lowercaseStart, count);
 731                         }
 732                     }
 733
 734                     if (i < str.Length)
 735                     {
 736                         // not a letter, just append it
 737                         i = AddNonLetter(ref result, ref str, i, charLen);
 738                     }
 739                 }
 740                 else
 741                 {
 742                     // not a letter, just append it
 743                     i = AddNonLetter(ref result, ref str, i, charLen);
 744                 }
 745             }
 746             return result.ToString();
 747         }
 748
 749         private static int AddNonLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
 750         {
 751             Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddNonLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
 752             if (charLen == 2)
 753             {
 754                 // Surrogate pair
 755                 result.Append(input[inputIndex++]);
 756                 result.Append(input[inputIndex]);
 757             }
 758             else
 759             {
 760                 result.Append(input[inputIndex]);
 761             }
 762             return inputIndex;
 763         }
 764
 765         private int AddTitlecaseLetter(ref StringBuilder result, ref string input, int inputIndex, int charLen)
 766         {
 767             Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
 768
 769             if (charLen == 2)
 770             {
 771                 // for surrogate pairs do a ToUpper operation on the substring
 772                 ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2);
 773                 if (GlobalizationMode.Invariant)
 774                 {
 775                     result.Append(src); // surrogate pair in invariant mode, so changing case is a nop
 776                 }
 777                 else
 778                 {
 779                     Span<char> dst = stackalloc char[2];
 780                     ChangeCaseToUpper(src, dst);
 781                     result.Append(dst);
 782                 }
 783                 inputIndex++;
 784             }
 785             else
 786             {
 787                 switch (input[inputIndex])
 788                 {
 789                     // For AppCompat, the Titlecase Case Mapping data from NDP 2.0 is used below.
 790                     case (char) 0x01C4:  // DZ with Caron -> Dz with Caron
 791                     case (char) 0x01C5:  // Dz with Caron -> Dz with Caron
 792                     case (char) 0x01C6:  // dz with Caron -> Dz with Caron
 793                         result.Append((char) 0x01C5);
 794                         break;
 795                     case (char) 0x01C7:  // LJ -> Lj
 796                     case (char) 0x01C8:  // Lj -> Lj
 797                     case (char) 0x01C9:  // lj -> Lj
 798                         result.Append((char) 0x01C8);
 799                         break;
 800                     case (char) 0x01CA:  // NJ -> Nj
 801                     case (char) 0x01CB:  // Nj -> Nj
 802                     case (char) 0x01CC:  // nj -> Nj
 803                         result.Append((char) 0x01CB);
 804                         break;
 805                     case (char) 0x01F1:  // DZ -> Dz
 806                     case (char) 0x01F2:  // Dz -> Dz
 807                     case (char) 0x01F3:  // dz -> Dz
 808                         result.Append((char) 0x01F2);
 809                         break;
 810                     default:
 811                         result.Append(ToUpper(input[inputIndex]));
 812                         break;
 813                 }
 814             }
 815             return inputIndex;
 816         }
 817
 818         // Used in ToTitleCase():
 819         // When we find a starting letter, the following array decides if a category should be
 820         // considered as word seprator or not.
 821         private const int c_wordSeparatorMask =
 822             /* false */ (0 <<  0) | // UppercaseLetter = 0,
 823             /* false */ (0 <<  1) | // LowercaseLetter = 1,
 824             /* false */ (0 <<  2) | // TitlecaseLetter = 2,
 825             /* false */ (0 <<  3) | // ModifierLetter = 3,
 826             /* false */ (0 <<  4) | // OtherLetter = 4,
 827             /* false */ (0 <<  5) | // NonSpacingMark = 5,
 828             /* false */ (0 <<  6) | // SpacingCombiningMark = 6,
 829             /* false */ (0 <<  7) | // EnclosingMark = 7,
 830             /* false */ (0 <<  8) | // DecimalDigitNumber = 8,
 831             /* false */ (0 <<  9) | // LetterNumber = 9,
 832             /* false */ (0 << 10) | // OtherNumber = 10,
 833             /* true  */ (1 << 11) | // SpaceSeparator = 11,
 834             /* true  */ (1 << 12) | // LineSeparator = 12,
 835             /* true  */ (1 << 13) | // ParagraphSeparator = 13,
 836             /* true  */ (1 << 14) | // Control = 14,
 837             /* true  */ (1 << 15) | // Format = 15,
 838             /* false */ (0 << 16) | // Surrogate = 16,
 839             /* false */ (0 << 17) | // PrivateUse = 17,
 840             /* true  */ (1 << 18) | // ConnectorPunctuation = 18,
 841             /* true  */ (1 << 19) | // DashPunctuation = 19,
 842             /* true  */ (1 << 20) | // OpenPunctuation = 20,
 843             /* true  */ (1 << 21) | // ClosePunctuation = 21,
 844             /* true  */ (1 << 22) | // InitialQuotePunctuation = 22,
 845             /* true  */ (1 << 23) | // FinalQuotePunctuation = 23,
 846             /* true  */ (1 << 24) | // OtherPunctuation = 24,
 847             /* true  */ (1 << 25) | // MathSymbol = 25,
 848             /* true  */ (1 << 26) | // CurrencySymbol = 26,
 849             /* true  */ (1 << 27) | // ModifierSymbol = 27,
 850             /* true  */ (1 << 28) | // OtherSymbol = 28,
 851             /* false */ (0 << 29);  // OtherNotAssigned = 29;
 852
 853         private static bool IsWordSeparator(UnicodeCategory category)
 854         {
 855             return (c_wordSeparatorMask & (1 << (int) category)) != 0;
 856         }
 857
 858         private static bool IsLetterCategory(UnicodeCategory uc)
 859         {
 860             return (uc == UnicodeCategory.UppercaseLetter
 861                  || uc == UnicodeCategory.LowercaseLetter
 862                  || uc == UnicodeCategory.TitlecaseLetter
 863                  || uc == UnicodeCategory.ModifierLetter
 864                  || uc == UnicodeCategory.OtherLetter);
 865         }
 866
 867         // A dummy struct that is used for 'ToUpper' in generic parameters
 868         private readonly struct ToUpperConversion { }
 869
 870         // A dummy struct that is used for 'ToLower' in generic parameters
 871         private readonly struct ToLowerConversion { }
 872     }
 873 }