More Corelib cleanup (dotnet/coreclr#26872)
[mono-project.git] / netcore / System.Private.CoreLib / shared / System / Char.cs
blobda9e1224f980656f241016763a031a54f961926d
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*============================================================
6 **
7 **
8 **
9 ** Purpose: This is the value class representing a Unicode character
10 ** Char methods until we create this functionality.
13 ===========================================================*/
15 using System.Diagnostics;
16 using System.Globalization;
17 using System.Runtime.InteropServices;
18 using System.Text;
20 namespace System
22 [Serializable]
23 [StructLayout(LayoutKind.Sequential)]
24 [System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")]
25 public readonly struct Char : IComparable, IComparable<char>, IEquatable<char>, IConvertible
28 // Member Variables
30 private readonly char m_value; // Do not rename (binary serialization)
33 // Public Constants
35 // The maximum character value.
36 public const char MaxValue = (char)0xFFFF;
37 // The minimum character value.
38 public const char MinValue = (char)0x00;
40 private const byte IsWhiteSpaceFlag = 0x80;
41 private const byte IsUpperCaseLetterFlag = 0x40;
42 private const byte IsLowerCaseLetterFlag = 0x20;
43 private const byte UnicodeCategoryMask = 0x1F;
45 // Contains information about the C0, Basic Latin, C1, and Latin-1 Supplement ranges [ U+0000..U+00FF ], with:
46 // - 0x80 bit if set means 'is whitespace'
47 // - 0x40 bit if set means 'is uppercase letter'
48 // - 0x20 bit if set means 'is lowercase letter'
49 // - bottom 5 bits are the UnicodeCategory of the character
51 // n.b. This data is locked to an earlier version of the Unicode standard (2.0, perhaps?), so
52 // the UnicodeCategory data contained here doesn't necessarily reflect the UnicodeCategory data
53 // contained within the CharUnicodeInfo or Rune types, which generally follow the latest Unicode
54 // standard.
55 private static ReadOnlySpan<byte> Latin1CharInfo => new byte[]
57 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x8E, 0x8E, 0x8E, 0x8E, 0x8E, 0x0E, 0x0E, // U+0000..U+000F
58 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, // U+0010..U+001F
59 0x8B, 0x18, 0x18, 0x18, 0x1A, 0x18, 0x18, 0x18, 0x14, 0x15, 0x18, 0x19, 0x18, 0x13, 0x18, 0x18, // U+0020..U+002F
60 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x18, 0x18, 0x19, 0x19, 0x19, 0x18, // U+0030..U+003F
61 0x18, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // U+0040..U+004F
62 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x14, 0x18, 0x15, 0x1B, 0x12, // U+0050..U+005F
63 0x1B, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // U+0060..U+006F
64 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x14, 0x19, 0x15, 0x19, 0x0E, // U+0070..U+007F
65 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x8E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, // U+0080..U+008F
66 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, 0x0E, // U+0090..U+009F
67 0x8B, 0x18, 0x1A, 0x1A, 0x1A, 0x1A, 0x1C, 0x1C, 0x1B, 0x1C, 0x21, 0x16, 0x19, 0x13, 0x1C, 0x1B, // U+00A0..U+00AF
68 0x1C, 0x19, 0x0A, 0x0A, 0x1B, 0x21, 0x1C, 0x18, 0x1B, 0x0A, 0x21, 0x17, 0x0A, 0x0A, 0x0A, 0x18, // U+00B0..U+00BF
69 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // U+00C0..U+00CF
70 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x19, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x21, // U+00D0..U+00DF
71 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // U+00E0..U+00EF
72 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x19, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, // U+00F0..U+00FF
75 // Return true for all characters below or equal U+00ff, which is ASCII + Latin-1 Supplement.
76 private static bool IsLatin1(char ch)
78 return (uint)ch < (uint)Latin1CharInfo.Length;
81 // Return true for all characters below or equal U+007f, which is ASCII.
82 private static bool IsAscii(char ch)
84 return (uint)ch <= '\x007f';
87 // Return the Unicode category for Unicode character <= 0x00ff.
88 private static UnicodeCategory GetLatin1UnicodeCategory(char ch)
90 Debug.Assert(IsLatin1(ch), "char.GetLatin1UnicodeCategory(): ch should be <= 00ff");
91 return (UnicodeCategory)(Latin1CharInfo[ch] & UnicodeCategoryMask);
95 // Private Constants
99 // Overriden Instance Methods
102 // Calculate a hashcode for a 2 byte Unicode character.
103 public override int GetHashCode()
105 return (int)m_value | ((int)m_value << 16);
108 // Used for comparing two boxed Char objects.
110 public override bool Equals(object? obj)
112 if (!(obj is char))
114 return false;
116 return m_value == ((char)obj).m_value;
119 [System.Runtime.Versioning.NonVersionable]
120 public bool Equals(char obj)
122 return m_value == obj;
125 // Compares this object to another object, returning an integer that
126 // indicates the relationship.
127 // Returns a value less than zero if this object
128 // null is considered to be less than any instance.
129 // If object is not of type Char, this method throws an ArgumentException.
131 public int CompareTo(object? value)
133 if (value == null)
135 return 1;
137 if (!(value is char))
139 throw new ArgumentException(SR.Arg_MustBeChar);
142 return m_value - ((char)value).m_value;
145 public int CompareTo(char value)
147 return m_value - value;
150 // Overrides System.Object.ToString.
151 public override string ToString()
153 return char.ToString(m_value);
156 public string ToString(IFormatProvider? provider)
158 return char.ToString(m_value);
162 // Formatting Methods
165 /*===================================ToString===================================
166 **This static methods takes a character and returns the String representation of it.
167 ==============================================================================*/
168 // Provides a string representation of a character.
169 public static string ToString(char c) => string.CreateFromChar(c);
171 public static char Parse(string s)
173 if (s == null)
175 throw new ArgumentNullException(nameof(s));
178 if (s.Length != 1)
180 throw new FormatException(SR.Format_NeedSingleChar);
182 return s[0];
185 public static bool TryParse(string? s, out char result)
187 result = '\0';
188 if (s == null)
190 return false;
192 if (s.Length != 1)
194 return false;
196 result = s[0];
197 return true;
201 // Static Methods
203 /*=================================ISDIGIT======================================
204 **A wrapper for char. Returns a boolean indicating whether **
205 **character c is considered to be a digit. **
206 ==============================================================================*/
207 // Determines whether a character is a digit.
208 public static bool IsDigit(char c)
210 if (IsLatin1(c))
212 return IsInRange(c, '0', '9');
214 return CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.DecimalDigitNumber;
217 internal static bool IsInRange(char c, char min, char max) => (uint)(c - min) <= (uint)(max - min);
219 private static bool IsInRange(UnicodeCategory c, UnicodeCategory min, UnicodeCategory max) => (uint)(c - min) <= (uint)(max - min);
221 /*=================================CheckLetter=====================================
222 ** Check if the specified UnicodeCategory belongs to the letter categories.
223 ==============================================================================*/
224 internal static bool CheckLetter(UnicodeCategory uc)
226 return IsInRange(uc, UnicodeCategory.UppercaseLetter, UnicodeCategory.OtherLetter);
229 /*=================================ISLETTER=====================================
230 **A wrapper for char. Returns a boolean indicating whether **
231 **character c is considered to be a letter. **
232 ==============================================================================*/
233 // Determines whether a character is a letter.
234 public static bool IsLetter(char c)
236 if (IsLatin1(c))
238 // For the version of the Unicode standard the Char type is locked to, the
239 // Latin-1 range doesn't include letters in categories other than "upper" and "lower".
240 return (Latin1CharInfo[c] & (IsUpperCaseLetterFlag | IsLowerCaseLetterFlag)) != 0;
242 return CheckLetter(CharUnicodeInfo.GetUnicodeCategory(c));
245 private static bool IsWhiteSpaceLatin1(char c)
247 Debug.Assert(IsLatin1(c));
248 return (Latin1CharInfo[c] & IsWhiteSpaceFlag) != 0;
251 /*===============================ISWHITESPACE===================================
252 **A wrapper for char. Returns a boolean indicating whether **
253 **character c is considered to be a whitespace character. **
254 ==============================================================================*/
255 // Determines whether a character is whitespace.
256 public static bool IsWhiteSpace(char c)
258 if (IsLatin1(c))
260 return IsWhiteSpaceLatin1(c);
262 return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c));
265 /*===================================IsUpper====================================
266 **Arguments: c -- the characater to be checked.
267 **Returns: True if c is an uppercase character.
268 ==============================================================================*/
269 // Determines whether a character is upper-case.
270 public static bool IsUpper(char c)
272 if (IsLatin1(c))
274 return (Latin1CharInfo[c] & IsUpperCaseLetterFlag) != 0;
276 return CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.UppercaseLetter;
279 /*===================================IsLower====================================
280 **Arguments: c -- the characater to be checked.
281 **Returns: True if c is an lowercase character.
282 ==============================================================================*/
283 // Determines whether a character is lower-case.
284 public static bool IsLower(char c)
286 if (IsLatin1(c))
288 return (Latin1CharInfo[c] & IsLowerCaseLetterFlag) != 0;
290 return CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.LowercaseLetter;
293 internal static bool CheckPunctuation(UnicodeCategory uc)
295 return IsInRange(uc, UnicodeCategory.ConnectorPunctuation, UnicodeCategory.OtherPunctuation);
298 /*================================IsPunctuation=================================
299 **Arguments: c -- the characater to be checked.
300 **Returns: True if c is an punctuation mark
301 ==============================================================================*/
302 // Determines whether a character is a punctuation mark.
303 public static bool IsPunctuation(char c)
305 if (IsLatin1(c))
307 return CheckPunctuation(GetLatin1UnicodeCategory(c));
309 return CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(c));
312 /*=================================CheckLetterOrDigit=====================================
313 ** Check if the specified UnicodeCategory belongs to the letter or digit categories.
314 ==============================================================================*/
315 internal static bool CheckLetterOrDigit(UnicodeCategory uc)
317 return CheckLetter(uc) || uc == UnicodeCategory.DecimalDigitNumber;
320 // Determines whether a character is a letter or a digit.
321 public static bool IsLetterOrDigit(char c)
323 if (IsLatin1(c))
325 return CheckLetterOrDigit(GetLatin1UnicodeCategory(c));
327 return CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(c));
330 /*===================================ToUpper====================================
332 ==============================================================================*/
333 // Converts a character to upper-case for the specified culture.
334 // <;<;Not fully implemented>;>;
335 public static char ToUpper(char c, CultureInfo culture)
337 if (culture == null)
338 throw new ArgumentNullException(nameof(culture));
339 return culture.TextInfo.ToUpper(c);
342 /*=================================TOUPPER======================================
343 **A wrapper for char.ToUpperCase. Converts character c to its **
344 **uppercase equivalent. If c is already an uppercase character or is not an **
345 **alphabetic, nothing happens. **
346 ==============================================================================*/
347 // Converts a character to upper-case for the default culture.
349 public static char ToUpper(char c)
351 return CultureInfo.CurrentCulture.TextInfo.ToUpper(c);
354 // Converts a character to upper-case for invariant culture.
355 public static char ToUpperInvariant(char c)
357 return CultureInfo.InvariantCulture.TextInfo.ToUpper(c);
360 /*===================================ToLower====================================
362 ==============================================================================*/
363 // Converts a character to lower-case for the specified culture.
364 // <;<;Not fully implemented>;>;
365 public static char ToLower(char c, CultureInfo culture)
367 if (culture == null)
368 throw new ArgumentNullException(nameof(culture));
369 return culture.TextInfo.ToLower(c);
372 /*=================================TOLOWER======================================
373 **A wrapper for char.ToLowerCase. Converts character c to its **
374 **lowercase equivalent. If c is already a lowercase character or is not an **
375 **alphabetic, nothing happens. **
376 ==============================================================================*/
377 // Converts a character to lower-case for the default culture.
378 public static char ToLower(char c)
380 return CultureInfo.CurrentCulture.TextInfo.ToLower(c);
383 // Converts a character to lower-case for invariant culture.
384 public static char ToLowerInvariant(char c)
386 return CultureInfo.InvariantCulture.TextInfo.ToLower(c);
390 // IConvertible implementation
392 public TypeCode GetTypeCode()
394 return TypeCode.Char;
397 bool IConvertible.ToBoolean(IFormatProvider? provider)
399 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Boolean"));
402 char IConvertible.ToChar(IFormatProvider? provider)
404 return m_value;
407 sbyte IConvertible.ToSByte(IFormatProvider? provider)
409 return Convert.ToSByte(m_value);
412 byte IConvertible.ToByte(IFormatProvider? provider)
414 return Convert.ToByte(m_value);
417 short IConvertible.ToInt16(IFormatProvider? provider)
419 return Convert.ToInt16(m_value);
422 ushort IConvertible.ToUInt16(IFormatProvider? provider)
424 return Convert.ToUInt16(m_value);
427 int IConvertible.ToInt32(IFormatProvider? provider)
429 return Convert.ToInt32(m_value);
432 uint IConvertible.ToUInt32(IFormatProvider? provider)
434 return Convert.ToUInt32(m_value);
437 long IConvertible.ToInt64(IFormatProvider? provider)
439 return Convert.ToInt64(m_value);
442 ulong IConvertible.ToUInt64(IFormatProvider? provider)
444 return Convert.ToUInt64(m_value);
447 float IConvertible.ToSingle(IFormatProvider? provider)
449 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Single"));
452 double IConvertible.ToDouble(IFormatProvider? provider)
454 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Double"));
457 decimal IConvertible.ToDecimal(IFormatProvider? provider)
459 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "Decimal"));
462 DateTime IConvertible.ToDateTime(IFormatProvider? provider)
464 throw new InvalidCastException(SR.Format(SR.InvalidCast_FromTo, "Char", "DateTime"));
467 object IConvertible.ToType(Type type, IFormatProvider? provider)
469 return Convert.DefaultToType((IConvertible)this, type, provider);
472 public static bool IsControl(char c)
474 if (IsLatin1(c))
476 return GetLatin1UnicodeCategory(c) == UnicodeCategory.Control;
478 return CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Control;
481 public static bool IsControl(string s, int index)
483 if (s == null)
484 throw new ArgumentNullException(nameof(s));
485 if (((uint)index) >= ((uint)s.Length))
487 throw new ArgumentOutOfRangeException(nameof(index));
489 char c = s[index];
490 if (IsLatin1(c))
492 return GetLatin1UnicodeCategory(c) == UnicodeCategory.Control;
494 return CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.Control;
497 public static bool IsDigit(string s, int index)
499 if (s == null)
500 throw new ArgumentNullException(nameof(s));
501 if (((uint)index) >= ((uint)s.Length))
503 throw new ArgumentOutOfRangeException(nameof(index));
505 char c = s[index];
506 if (IsLatin1(c))
508 return IsInRange(c, '0', '9');
510 return CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.DecimalDigitNumber;
513 public static bool IsLetter(string s, int index)
515 if (s == null)
516 throw new ArgumentNullException(nameof(s));
517 if (((uint)index) >= ((uint)s.Length))
519 throw new ArgumentOutOfRangeException(nameof(index));
521 char c = s[index];
522 if (IsLatin1(c))
524 // The Latin-1 range doesn't include letters in categories other than "upper" and "lower"
525 return (Latin1CharInfo[c] & (IsUpperCaseLetterFlag | IsLowerCaseLetterFlag)) != 0;
527 return CheckLetter(CharUnicodeInfo.GetUnicodeCategory(s, index));
530 public static bool IsLetterOrDigit(string s, int index)
532 if (s == null)
533 throw new ArgumentNullException(nameof(s));
534 if (((uint)index) >= ((uint)s.Length))
536 throw new ArgumentOutOfRangeException(nameof(index));
538 char c = s[index];
539 if (IsLatin1(c))
541 return CheckLetterOrDigit(GetLatin1UnicodeCategory(c));
543 return CheckLetterOrDigit(CharUnicodeInfo.GetUnicodeCategory(s, index));
546 public static bool IsLower(string s, int index)
548 if (s == null)
549 throw new ArgumentNullException(nameof(s));
550 if (((uint)index) >= ((uint)s.Length))
552 throw new ArgumentOutOfRangeException(nameof(index));
554 char c = s[index];
555 if (IsLatin1(c))
557 return (Latin1CharInfo[c] & IsLowerCaseLetterFlag) != 0;
560 return CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.LowercaseLetter;
563 /*=================================CheckNumber=====================================
564 ** Check if the specified UnicodeCategory belongs to the number categories.
565 ==============================================================================*/
567 internal static bool CheckNumber(UnicodeCategory uc)
569 return IsInRange(uc, UnicodeCategory.DecimalDigitNumber, UnicodeCategory.OtherNumber);
572 public static bool IsNumber(char c)
574 if (IsLatin1(c))
576 if (IsAscii(c))
578 return IsInRange(c, '0', '9');
580 return CheckNumber(GetLatin1UnicodeCategory(c));
582 return CheckNumber(CharUnicodeInfo.GetUnicodeCategory(c));
585 public static bool IsNumber(string s, int index)
587 if (s == null)
588 throw new ArgumentNullException(nameof(s));
589 if (((uint)index) >= ((uint)s.Length))
591 throw new ArgumentOutOfRangeException(nameof(index));
593 char c = s[index];
594 if (IsLatin1(c))
596 if (IsAscii(c))
598 return IsInRange(c, '0', '9');
600 return CheckNumber(GetLatin1UnicodeCategory(c));
602 return CheckNumber(CharUnicodeInfo.GetUnicodeCategory(s, index));
605 ////////////////////////////////////////////////////////////////////////
607 // IsPunctuation
609 // Determines if the given character is a punctuation character.
611 ////////////////////////////////////////////////////////////////////////
613 public static bool IsPunctuation(string s, int index)
615 if (s == null)
616 throw new ArgumentNullException(nameof(s));
617 if (((uint)index) >= ((uint)s.Length))
619 throw new ArgumentOutOfRangeException(nameof(index));
621 char c = s[index];
622 if (IsLatin1(c))
624 return CheckPunctuation(GetLatin1UnicodeCategory(c));
626 return CheckPunctuation(CharUnicodeInfo.GetUnicodeCategory(s, index));
629 /*================================= CheckSeparator ============================
630 ** Check if the specified UnicodeCategory belongs to the seprator categories.
631 ==============================================================================*/
633 internal static bool CheckSeparator(UnicodeCategory uc)
635 return IsInRange(uc, UnicodeCategory.SpaceSeparator, UnicodeCategory.ParagraphSeparator);
638 private static bool IsSeparatorLatin1(char c)
640 // U+00a0 = NO-BREAK SPACE
641 // There is no LineSeparator or ParagraphSeparator in Latin 1 range.
642 return c == '\x0020' || c == '\x00a0';
645 public static bool IsSeparator(char c)
647 if (IsLatin1(c))
649 return IsSeparatorLatin1(c);
651 return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(c));
654 public static bool IsSeparator(string s, int index)
656 if (s == null)
657 throw new ArgumentNullException(nameof(s));
658 if (((uint)index) >= ((uint)s.Length))
660 throw new ArgumentOutOfRangeException(nameof(index));
662 char c = s[index];
663 if (IsLatin1(c))
665 return IsSeparatorLatin1(c);
667 return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index));
670 public static bool IsSurrogate(char c)
672 return IsInRange(c, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END);
675 public static bool IsSurrogate(string s, int index)
677 if (s == null)
679 throw new ArgumentNullException(nameof(s));
681 if (((uint)index) >= ((uint)s.Length))
683 throw new ArgumentOutOfRangeException(nameof(index));
685 return IsSurrogate(s[index]);
688 /*================================= CheckSymbol ============================
689 ** Check if the specified UnicodeCategory belongs to the symbol categories.
690 ==============================================================================*/
692 internal static bool CheckSymbol(UnicodeCategory uc)
694 return IsInRange(uc, UnicodeCategory.MathSymbol, UnicodeCategory.OtherSymbol);
697 public static bool IsSymbol(char c)
699 if (IsLatin1(c))
701 return CheckSymbol(GetLatin1UnicodeCategory(c));
703 return CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(c));
706 public static bool IsSymbol(string s, int index)
708 if (s == null)
709 throw new ArgumentNullException(nameof(s));
710 if (((uint)index) >= ((uint)s.Length))
712 throw new ArgumentOutOfRangeException(nameof(index));
714 char c = s[index];
715 if (IsLatin1(c))
717 return CheckSymbol(GetLatin1UnicodeCategory(c));
719 return CheckSymbol(CharUnicodeInfo.GetUnicodeCategory(s, index));
722 public static bool IsUpper(string s, int index)
724 if (s == null)
725 throw new ArgumentNullException(nameof(s));
726 if (((uint)index) >= ((uint)s.Length))
728 throw new ArgumentOutOfRangeException(nameof(index));
730 char c = s[index];
731 if (IsLatin1(c))
733 return (Latin1CharInfo[c] & IsUpperCaseLetterFlag) != 0;
736 return CharUnicodeInfo.GetUnicodeCategory(s, index) == UnicodeCategory.UppercaseLetter;
739 public static bool IsWhiteSpace(string s, int index)
741 if (s == null)
742 throw new ArgumentNullException(nameof(s));
743 if (((uint)index) >= ((uint)s.Length))
745 throw new ArgumentOutOfRangeException(nameof(index));
748 char ch = s[index];
750 if (IsLatin1(ch))
752 return IsWhiteSpaceLatin1(ch);
755 return CheckSeparator(CharUnicodeInfo.GetUnicodeCategory(s, index));
758 public static UnicodeCategory GetUnicodeCategory(char c)
760 if (IsLatin1(c))
762 return GetLatin1UnicodeCategory(c);
764 return CharUnicodeInfo.GetUnicodeCategory((int)c);
767 public static UnicodeCategory GetUnicodeCategory(string s, int index)
769 if (s == null)
770 throw new ArgumentNullException(nameof(s));
771 if (((uint)index) >= ((uint)s.Length))
773 throw new ArgumentOutOfRangeException(nameof(index));
775 if (IsLatin1(s[index]))
777 return GetLatin1UnicodeCategory(s[index]);
779 return CharUnicodeInfo.InternalGetUnicodeCategory(s, index);
782 public static double GetNumericValue(char c)
784 return CharUnicodeInfo.GetNumericValue(c);
787 public static double GetNumericValue(string s, int index)
789 if (s == null)
790 throw new ArgumentNullException(nameof(s));
791 if (((uint)index) >= ((uint)s.Length))
793 throw new ArgumentOutOfRangeException(nameof(index));
795 return CharUnicodeInfo.GetNumericValue(s, index);
798 /*================================= IsHighSurrogate ============================
799 ** Check if a char is a high surrogate.
800 ==============================================================================*/
801 public static bool IsHighSurrogate(char c)
803 return IsInRange(c, CharUnicodeInfo.HIGH_SURROGATE_START, CharUnicodeInfo.HIGH_SURROGATE_END);
806 public static bool IsHighSurrogate(string s, int index)
808 if (s == null)
810 throw new ArgumentNullException(nameof(s));
812 if (index < 0 || index >= s.Length)
814 throw new ArgumentOutOfRangeException(nameof(index));
816 return IsHighSurrogate(s[index]);
819 /*================================= IsLowSurrogate ============================
820 ** Check if a char is a low surrogate.
821 ==============================================================================*/
822 public static bool IsLowSurrogate(char c)
824 return IsInRange(c, CharUnicodeInfo.LOW_SURROGATE_START, CharUnicodeInfo.LOW_SURROGATE_END);
827 public static bool IsLowSurrogate(string s, int index)
829 if (s == null)
831 throw new ArgumentNullException(nameof(s));
833 if (index < 0 || index >= s.Length)
835 throw new ArgumentOutOfRangeException(nameof(index));
837 return IsLowSurrogate(s[index]);
840 /*================================= IsSurrogatePair ============================
841 ** Check if the string specified by the index starts with a surrogate pair.
842 ==============================================================================*/
843 public static bool IsSurrogatePair(string s, int index)
845 if (s == null)
847 throw new ArgumentNullException(nameof(s));
849 if (index < 0 || index >= s.Length)
851 throw new ArgumentOutOfRangeException(nameof(index));
853 if (index + 1 < s.Length)
855 return IsSurrogatePair(s[index], s[index + 1]);
857 return false;
860 public static bool IsSurrogatePair(char highSurrogate, char lowSurrogate)
862 // Since both the high and low surrogate ranges are exactly 0x400 elements
863 // wide, and since this is a power of two, we can perform a single comparison
864 // by baselining each value to the start of its respective range and taking
865 // the logical OR of them.
867 uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
868 uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
869 return (highSurrogateOffset | lowSurrogateOffset) <= CharUnicodeInfo.HIGH_SURROGATE_RANGE;
872 internal const int UNICODE_PLANE00_END = 0x00ffff;
873 // The starting codepoint for Unicode plane 1. Plane 1 contains 0x010000 ~ 0x01ffff.
874 internal const int UNICODE_PLANE01_START = 0x10000;
875 // The end codepoint for Unicode plane 16. This is the maximum code point value allowed for Unicode.
876 // Plane 16 contains 0x100000 ~ 0x10ffff.
877 internal const int UNICODE_PLANE16_END = 0x10ffff;
879 /*================================= ConvertFromUtf32 ============================
880 ** Convert an UTF32 value into a surrogate pair.
881 ==============================================================================*/
883 public static string ConvertFromUtf32(int utf32)
885 if (!UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
887 throw new ArgumentOutOfRangeException(nameof(utf32), SR.ArgumentOutOfRange_InvalidUTF32);
890 return Rune.UnsafeCreate((uint)utf32).ToString();
893 /*=============================ConvertToUtf32===================================
894 ** Convert a surrogate pair to UTF32 value
895 ==============================================================================*/
897 public static int ConvertToUtf32(char highSurrogate, char lowSurrogate)
899 // First, extend both to 32 bits, then calculate the offset of
900 // each candidate surrogate char from the start of its range.
902 uint highSurrogateOffset = (uint)highSurrogate - CharUnicodeInfo.HIGH_SURROGATE_START;
903 uint lowSurrogateOffset = (uint)lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START;
905 // This is a single comparison which allows us to check both for validity at once since
906 // both the high surrogate range and the low surrogate range are the same length.
907 // If the comparison fails, we call to a helper method to throw the correct exception message.
909 if ((highSurrogateOffset | lowSurrogateOffset) > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
911 ConvertToUtf32_ThrowInvalidArgs(highSurrogateOffset);
914 // The 0x40u << 10 below is to account for uuuuu = wwww + 1 in the surrogate encoding.
915 return ((int)highSurrogateOffset << 10) + (lowSurrogate - CharUnicodeInfo.LOW_SURROGATE_START) + (0x40 << 10);
918 [StackTraceHidden]
919 private static void ConvertToUtf32_ThrowInvalidArgs(uint highSurrogateOffset)
921 // If the high surrogate is not within its expected range, throw an exception
922 // whose message fingers it as invalid. If it's within the expected range,
923 // change the message to read that the low surrogate was the problem.
925 if (highSurrogateOffset > CharUnicodeInfo.HIGH_SURROGATE_RANGE)
927 throw new ArgumentOutOfRangeException(
928 paramName: "highSurrogate",
929 message: SR.ArgumentOutOfRange_InvalidHighSurrogate);
931 else
933 throw new ArgumentOutOfRangeException(
934 paramName: "lowSurrogate",
935 message: SR.ArgumentOutOfRange_InvalidLowSurrogate);
939 /*=============================ConvertToUtf32===================================
940 ** Convert a character or a surrogate pair starting at index of the specified string
941 ** to UTF32 value.
942 ** The char pointed by index should be a surrogate pair or a BMP character.
943 ** This method throws if a high-surrogate is not followed by a low surrogate.
944 ** This method throws if a low surrogate is seen without preceding a high-surrogate.
945 ==============================================================================*/
947 public static int ConvertToUtf32(string s, int index)
949 if (s == null)
951 throw new ArgumentNullException(nameof(s));
954 if (index < 0 || index >= s.Length)
956 throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
958 // Check if the character at index is a high surrogate.
959 int temp1 = (int)s[index] - CharUnicodeInfo.HIGH_SURROGATE_START;
960 if (temp1 >= 0 && temp1 <= 0x7ff)
962 // Found a surrogate char.
963 if (temp1 <= 0x3ff)
965 // Found a high surrogate.
966 if (index < s.Length - 1)
968 int temp2 = (int)s[index + 1] - CharUnicodeInfo.LOW_SURROGATE_START;
969 if (temp2 >= 0 && temp2 <= 0x3ff)
971 // Found a low surrogate.
972 return (temp1 * 0x400) + temp2 + UNICODE_PLANE01_START;
974 else
976 throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
979 else
981 // Found a high surrogate at the end of the string.
982 throw new ArgumentException(SR.Format(SR.Argument_InvalidHighSurrogate, index), nameof(s));
985 else
987 // Find a low surrogate at the character pointed by index.
988 throw new ArgumentException(SR.Format(SR.Argument_InvalidLowSurrogate, index), nameof(s));
991 // Not a high-surrogate or low-surrogate. Genereate the UTF32 value for the BMP characters.
992 return (int)s[index];