Fix StyleCop warning SA1005 (single line comment spacing)
[mono-project.git] / netcore / System.Private.CoreLib / shared / System / Globalization / DateTimeFormatInfoScanner.cs
blob9541a0023b5c1a4cb93da8de55f515ee5e479f78
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 ////////////////////////////////////////////////////////////////////////////
6 //
7 // DateTimeFormatInfoScanner
8 //
9 // Scan a specified DateTimeFormatInfo to search for data used in DateTime.Parse()
11 // The data includes:
13 // DateWords: such as "de" used in es-ES (Spanish) LongDatePattern.
14 // Postfix: such as "ta" used in fi-FI after the month name.
16 // This class is shared among mscorlib.dll and sysglobl.dll.
17 // Use conditional CULTURE_AND_REGIONINFO_BUILDER_ONLY to differentiate between
18 // methods for mscorlib.dll and sysglobl.dll.
20 ////////////////////////////////////////////////////////////////////////////
22 using System.Collections.Generic;
23 using System.Text;
25 namespace System.Globalization
27 // from LocaleEx.txt header
28 // IFORMATFLAGS
29 internal enum FORMATFLAGS
31 None = 0x00000000,
32 UseGenitiveMonth = 0x00000001,
33 UseLeapYearMonth = 0x00000002,
34 UseSpacesInMonthNames = 0x00000004,
35 UseHebrewParsing = 0x00000008,
36 UseSpacesInDayNames = 0x00000010, // Has spaces or non-breaking space in the day names.
37 UseDigitPrefixInTokens = 0x00000020, // Has token starting with numbers.
40 internal enum CalendarId : ushort
42 UNINITIALIZED_VALUE = 0,
43 GREGORIAN = 1, // Gregorian (localized) calendar
44 GREGORIAN_US = 2, // Gregorian (U.S.) calendar
45 JAPAN = 3, // Japanese Emperor Era calendar
46 /* SSS_WARNINGS_OFF */
47 TAIWAN = 4, // Taiwan Era calendar /* SSS_WARNINGS_ON */
48 KOREA = 5, // Korean Tangun Era calendar
49 HIJRI = 6, // Hijri (Arabic Lunar) calendar
50 THAI = 7, // Thai calendar
51 HEBREW = 8, // Hebrew (Lunar) calendar
52 GREGORIAN_ME_FRENCH = 9, // Gregorian Middle East French calendar
53 GREGORIAN_ARABIC = 10, // Gregorian Arabic calendar
54 GREGORIAN_XLIT_ENGLISH = 11, // Gregorian Transliterated English calendar
55 GREGORIAN_XLIT_FRENCH = 12,
56 // Note that all calendars after this point are MANAGED ONLY for now.
57 JULIAN = 13,
58 JAPANESELUNISOLAR = 14,
59 CHINESELUNISOLAR = 15,
60 SAKA = 16, // reserved to match Office but not implemented in our code
61 LUNAR_ETO_CHN = 17, // reserved to match Office but not implemented in our code
62 LUNAR_ETO_KOR = 18, // reserved to match Office but not implemented in our code
63 LUNAR_ETO_ROKUYOU = 19, // reserved to match Office but not implemented in our code
64 KOREANLUNISOLAR = 20,
65 TAIWANLUNISOLAR = 21,
66 PERSIAN = 22,
67 UMALQURA = 23,
68 LAST_CALENDAR = 23 // Last calendar ID
71 internal class DateTimeFormatInfoScanner
73 // Special prefix-like flag char in DateWord array.
75 // Use char in PUA area since we won't be using them in real data.
76 // The char used to tell a read date word or a month postfix. A month postfix
77 // is "ta" in the long date pattern like "d. MMMM'ta 'yyyy" for fi-FI.
78 // In this case, it will be stored as "\xfffeta" in the date word array.
79 internal const char MonthPostfixChar = '\xe000';
81 // Add ignorable symbol in a DateWord array.
83 // hu-HU has:
84 // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd
85 // long date pattern: yyyy. MMMM d.
86 // Here, "." is the date separator (derived from short date pattern). However,
87 // "." also appear at the end of long date pattern. In this case, we just
88 // "." as ignorable symbol so that the DateTime.Parse() state machine will not
89 // treat the additional date separator at the end of y,m,d pattern as an error
90 // condition.
91 internal const char IgnorableSymbolChar = '\xe001';
93 // Known CJK suffix
94 internal const string CJKYearSuff = "\u5e74";
95 internal const string CJKMonthSuff = "\u6708";
96 internal const string CJKDaySuff = "\u65e5";
98 internal const string KoreanYearSuff = "\ub144";
99 internal const string KoreanMonthSuff = "\uc6d4";
100 internal const string KoreanDaySuff = "\uc77c";
102 internal const string KoreanHourSuff = "\uc2dc";
103 internal const string KoreanMinuteSuff = "\ubd84";
104 internal const string KoreanSecondSuff = "\ucd08";
106 internal const string CJKHourSuff = "\u6642";
107 internal const string ChineseHourSuff = "\u65f6";
109 internal const string CJKMinuteSuff = "\u5206";
110 internal const string CJKSecondSuff = "\u79d2";
112 // The collection fo date words & postfix.
113 internal List<string> m_dateWords = new List<string>();
114 // Hashtable for the known words.
115 private static volatile Dictionary<string, string>? s_knownWords;
117 private static Dictionary<string, string> KnownWords =>
118 s_knownWords ??=
119 new Dictionary<string, string>(16)
121 // Add known words into the hash table.
123 // Skip these special symbols.
124 { "/", string.Empty },
125 { "-", string.Empty },
126 { ".", string.Empty },
128 // Skip known CJK suffixes.
129 { CJKYearSuff, string.Empty },
130 { CJKMonthSuff, string.Empty },
131 { CJKDaySuff, string.Empty },
132 { KoreanYearSuff, string.Empty },
133 { KoreanMonthSuff, string.Empty },
134 { KoreanDaySuff, string.Empty },
135 { KoreanHourSuff, string.Empty },
136 { KoreanMinuteSuff, string.Empty },
137 { KoreanSecondSuff, string.Empty },
138 { CJKHourSuff, string.Empty },
139 { ChineseHourSuff, string.Empty },
140 { CJKMinuteSuff, string.Empty },
141 { CJKSecondSuff, string.Empty }
144 ////////////////////////////////////////////////////////////////////////////
146 // Parameters:
147 // pattern: The pattern to be scanned.
148 // currentIndex: the current index to start the scan.
150 // Returns:
151 // Return the index with the first character that is a letter, which will
152 // be the start of a date word.
153 // Note that the index can be pattern.Length if we reach the end of the string.
155 ////////////////////////////////////////////////////////////////////////////
156 internal static int SkipWhiteSpacesAndNonLetter(string pattern, int currentIndex)
158 while (currentIndex < pattern.Length)
160 char ch = pattern[currentIndex];
161 if (ch == '\\')
163 // Escaped character. Look ahead one character.
164 currentIndex++;
165 if (currentIndex < pattern.Length)
167 ch = pattern[currentIndex];
168 if (ch == '\'')
170 // Skip the leading single quote. We will
171 // stop at the first letter.
172 continue;
174 // Fall thru to check if this is a letter.
176 else
178 // End of string
179 break;
182 if (char.IsLetter(ch) || ch == '\'' || ch == '.')
184 break;
186 // Skip the current char since it is not a letter.
187 currentIndex++;
189 return (currentIndex);
192 ////////////////////////////////////////////////////////////////////////////
194 // A helper to add the found date word or month postfix into ArrayList for date words.
196 // Parameters:
197 // formatPostfix: What kind of postfix this is.
198 // Possible values:
199 // null: This is a regular date word
200 // "MMMM": month postfix
201 // word: The date word or postfix to be added.
203 ////////////////////////////////////////////////////////////////////////////
204 internal void AddDateWordOrPostfix(string? formatPostfix, string str)
206 if (str.Length > 0)
208 // Some cultures use . like an abbreviation
209 if (str.Equals("."))
211 AddIgnorableSymbols(".");
212 return;
215 if (KnownWords.TryGetValue(str, out _) == false)
217 if (m_dateWords == null)
219 m_dateWords = new List<string>();
221 if (formatPostfix == "MMMM")
223 // Add the word into the ArrayList as "\xfffe" + real month postfix.
224 string temp = MonthPostfixChar + str;
225 if (!m_dateWords.Contains(temp))
227 m_dateWords.Add(temp);
230 else
232 if (!m_dateWords.Contains(str))
234 m_dateWords.Add(str);
236 if (str[^1] == '.')
238 // Old version ignore the trailing dot in the date words. Support this as well.
239 string strWithoutDot = str[0..^1];
240 if (!m_dateWords.Contains(strWithoutDot))
242 m_dateWords.Add(strWithoutDot);
250 ////////////////////////////////////////////////////////////////////////////
252 // Scan the pattern from the specified index and add the date word/postfix
253 // when appropriate.
255 // Parameters:
256 // pattern: The pattern to be scanned.
257 // index: The starting index to be scanned.
258 // formatPostfix: The kind of postfix to be scanned.
259 // Possible values:
260 // null: This is a regular date word
261 // "MMMM": month postfix
264 ////////////////////////////////////////////////////////////////////////////
265 internal int AddDateWords(string pattern, int index, string? formatPostfix)
267 // Skip any whitespaces so we will start from a letter.
268 int newIndex = SkipWhiteSpacesAndNonLetter(pattern, index);
269 if (newIndex != index && formatPostfix != null)
271 // There are whitespaces. This will not be a postfix.
272 formatPostfix = null;
274 index = newIndex;
276 // This is the first char added into dateWord.
277 // Skip all non-letter character. We will add the first letter into DateWord.
278 StringBuilder dateWord = new StringBuilder();
279 // We assume that date words should start with a letter.
280 // Skip anything until we see a letter.
282 while (index < pattern.Length)
284 char ch = pattern[index];
285 if (ch == '\'')
287 // We have seen the end of quote. Add the word if we do not see it before,
288 // and break the while loop.
289 AddDateWordOrPostfix(formatPostfix, dateWord.ToString());
290 index++;
291 break;
293 else if (ch == '\\')
296 // Escaped character. Look ahead one character
299 // Skip escaped backslash.
300 index++;
301 if (index < pattern.Length)
303 dateWord.Append(pattern[index]);
304 index++;
307 else if (char.IsWhiteSpace(ch))
309 // Found a whitespace. We have to add the current date word/postfix.
310 AddDateWordOrPostfix(formatPostfix, dateWord.ToString());
311 if (formatPostfix != null)
313 // Done with postfix. The rest will be regular date word.
314 formatPostfix = null;
316 // Reset the dateWord.
317 dateWord.Length = 0;
318 index++;
320 else
322 dateWord.Append(ch);
323 index++;
326 return (index);
329 ////////////////////////////////////////////////////////////////////////////
331 // A simple helper to find the repeat count for a specified char.
333 ////////////////////////////////////////////////////////////////////////////
334 internal static int ScanRepeatChar(string pattern, char ch, int index, out int count)
336 count = 1;
337 while (++index < pattern.Length && pattern[index] == ch)
339 count++;
341 // Return the updated position.
342 return (index);
345 ////////////////////////////////////////////////////////////////////////////
347 // Add the text that is a date separator but is treated like ignroable symbol.
348 // E.g.
349 // hu-HU has:
350 // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd
351 // long date pattern: yyyy. MMMM d.
352 // Here, "." is the date separator (derived from short date pattern). However,
353 // "." also appear at the end of long date pattern. In this case, we just
354 // "." as ignorable symbol so that the DateTime.Parse() state machine will not
355 // treat the additional date separator at the end of y,m,d pattern as an error
356 // condition.
358 ////////////////////////////////////////////////////////////////////////////
360 internal void AddIgnorableSymbols(string? text)
362 if (m_dateWords == null)
364 // Create the date word array.
365 m_dateWords = new List<string>();
367 // Add the ignorable symbol into the ArrayList.
368 string temp = IgnorableSymbolChar + text;
369 if (!m_dateWords.Contains(temp))
371 m_dateWords.Add(temp);
377 // Flag used to trace the date patterns (yy/yyyyy/M/MM/MMM/MMM/d/dd) that we have seen.
379 private enum FoundDatePattern
381 None = 0x0000,
382 FoundYearPatternFlag = 0x0001,
383 FoundMonthPatternFlag = 0x0002,
384 FoundDayPatternFlag = 0x0004,
385 FoundYMDPatternFlag = 0x0007, // FoundYearPatternFlag | FoundMonthPatternFlag | FoundDayPatternFlag;
388 // Check if we have found all of the year/month/day pattern.
389 private FoundDatePattern _ymdFlags = FoundDatePattern.None;
392 ////////////////////////////////////////////////////////////////////////////
394 // Given a date format pattern, scan for date word or postfix.
396 // A date word should be always put in a single quoted string. And it will
397 // start from a letter, so whitespace and symbols will be ignored before
398 // the first letter.
400 // Examples of date word:
401 // 'de' in es-SP: dddd, dd' de 'MMMM' de 'yyyy
402 // "\x0443." in bg-BG: dd.M.yyyy '\x0433.'
404 // Example of postfix:
405 // month postfix:
406 // "ta" in fi-FI: d. MMMM'ta 'yyyy
407 // Currently, only month postfix is supported.
409 // Usage:
410 // Always call this with Framework-style pattern, instead of Windows style pattern.
411 // Windows style pattern uses '' for single quote, while .NET uses \'
413 ////////////////////////////////////////////////////////////////////////////
414 internal void ScanDateWord(string pattern)
416 // Check if we have found all of the year/month/day pattern.
417 _ymdFlags = FoundDatePattern.None;
419 int i = 0;
420 while (i < pattern.Length)
422 char ch = pattern[i];
423 int chCount;
425 switch (ch)
427 case '\'':
428 // Find a beginning quote. Search until the end quote.
429 i = AddDateWords(pattern, i + 1, null);
430 break;
431 case 'M':
432 i = ScanRepeatChar(pattern, 'M', i, out chCount);
433 if (chCount >= 4)
435 if (i < pattern.Length && pattern[i] == '\'')
437 i = AddDateWords(pattern, i + 1, "MMMM");
440 _ymdFlags |= FoundDatePattern.FoundMonthPatternFlag;
441 break;
442 case 'y':
443 i = ScanRepeatChar(pattern, 'y', i, out chCount);
444 _ymdFlags |= FoundDatePattern.FoundYearPatternFlag;
445 break;
446 case 'd':
447 i = ScanRepeatChar(pattern, 'd', i, out chCount);
448 if (chCount <= 2)
450 // Only count "d" & "dd".
451 // ddd, dddd are day names. Do not count them.
452 _ymdFlags |= FoundDatePattern.FoundDayPatternFlag;
454 break;
455 case '\\':
456 // Found a escaped char not in a quoted string. Skip the current backslash
457 // and its next character.
458 i += 2;
459 break;
460 case '.':
461 if (_ymdFlags == FoundDatePattern.FoundYMDPatternFlag)
463 // If we find a dot immediately after the we have seen all of the y, m, d pattern.
464 // treat it as a ignroable symbol. Check for comments in AddIgnorableSymbols for
465 // more details.
466 AddIgnorableSymbols(".");
467 _ymdFlags = FoundDatePattern.None;
469 i++;
470 break;
471 default:
472 if (_ymdFlags == FoundDatePattern.FoundYMDPatternFlag && !char.IsWhiteSpace(ch))
474 // We are not seeing "." after YMD. Clear the flag.
475 _ymdFlags = FoundDatePattern.None;
477 // We are not in quote. Skip the current character.
478 i++;
479 break;
484 ////////////////////////////////////////////////////////////////////////////
486 // Given a DTFI, get all of the date words from date patterns and time patterns.
488 ////////////////////////////////////////////////////////////////////////////
490 internal string[]? GetDateWordsOfDTFI(DateTimeFormatInfo dtfi)
492 // Enumarate all LongDatePatterns, and get the DateWords and scan for month postfix.
493 string[] datePatterns = dtfi.GetAllDateTimePatterns('D');
494 int i;
496 // Scan the long date patterns
497 for (i = 0; i < datePatterns.Length; i++)
499 ScanDateWord(datePatterns[i]);
502 // Scan the short date patterns
503 datePatterns = dtfi.GetAllDateTimePatterns('d');
504 for (i = 0; i < datePatterns.Length; i++)
506 ScanDateWord(datePatterns[i]);
508 // Scan the YearMonth patterns.
509 datePatterns = dtfi.GetAllDateTimePatterns('y');
510 for (i = 0; i < datePatterns.Length; i++)
512 ScanDateWord(datePatterns[i]);
515 // Scan the month/day pattern
516 ScanDateWord(dtfi.MonthDayPattern);
518 // Scan the long time patterns.
519 datePatterns = dtfi.GetAllDateTimePatterns('T');
520 for (i = 0; i < datePatterns.Length; i++)
522 ScanDateWord(datePatterns[i]);
525 // Scan the short time patterns.
526 datePatterns = dtfi.GetAllDateTimePatterns('t');
527 for (i = 0; i < datePatterns.Length; i++)
529 ScanDateWord(datePatterns[i]);
532 string[]? result = null;
533 if (m_dateWords != null && m_dateWords.Count > 0)
535 result = new string[m_dateWords.Count];
536 for (i = 0; i < m_dateWords.Count; i++)
538 result[i] = m_dateWords[i];
541 return result;
545 ////////////////////////////////////////////////////////////////////////////
547 // Scan the month names to see if genitive month names are used, and return
548 // the format flag.
550 ////////////////////////////////////////////////////////////////////////////
551 internal static FORMATFLAGS GetFormatFlagGenitiveMonth(string[] monthNames, string[] genitveMonthNames, string[] abbrevMonthNames, string[] genetiveAbbrevMonthNames)
553 // If we have different names in regular and genitive month names, use genitive month flag.
554 return ((!EqualStringArrays(monthNames, genitveMonthNames) || !EqualStringArrays(abbrevMonthNames, genetiveAbbrevMonthNames))
555 ? FORMATFLAGS.UseGenitiveMonth : 0);
558 ////////////////////////////////////////////////////////////////////////////
560 // Scan the month names to see if spaces are used or start with a digit, and return the format flag
562 ////////////////////////////////////////////////////////////////////////////
563 internal static FORMATFLAGS GetFormatFlagUseSpaceInMonthNames(string[] monthNames, string[] genitveMonthNames, string[] abbrevMonthNames, string[] genetiveAbbrevMonthNames)
565 FORMATFLAGS formatFlags = 0;
566 formatFlags |= (ArrayElementsBeginWithDigit(monthNames) ||
567 ArrayElementsBeginWithDigit(genitveMonthNames) ||
568 ArrayElementsBeginWithDigit(abbrevMonthNames) ||
569 ArrayElementsBeginWithDigit(genetiveAbbrevMonthNames)
570 ? FORMATFLAGS.UseDigitPrefixInTokens : 0);
572 formatFlags |= (ArrayElementsHaveSpace(monthNames) ||
573 ArrayElementsHaveSpace(genitveMonthNames) ||
574 ArrayElementsHaveSpace(abbrevMonthNames) ||
575 ArrayElementsHaveSpace(genetiveAbbrevMonthNames)
576 ? FORMATFLAGS.UseSpacesInMonthNames : 0);
577 return (formatFlags);
580 ////////////////////////////////////////////////////////////////////////////
582 // Scan the day names and set the correct format flag.
584 ////////////////////////////////////////////////////////////////////////////
585 internal static FORMATFLAGS GetFormatFlagUseSpaceInDayNames(string[] dayNames, string[] abbrevDayNames)
587 return ((ArrayElementsHaveSpace(dayNames) ||
588 ArrayElementsHaveSpace(abbrevDayNames))
589 ? FORMATFLAGS.UseSpacesInDayNames : 0);
592 ////////////////////////////////////////////////////////////////////////////
594 // Check the calendar to see if it is HebrewCalendar and set the Hebrew format flag if necessary.
596 ////////////////////////////////////////////////////////////////////////////
597 internal static FORMATFLAGS GetFormatFlagUseHebrewCalendar(int calID)
599 return (calID == (int)CalendarId.HEBREW ?
600 FORMATFLAGS.UseHebrewParsing | FORMATFLAGS.UseLeapYearMonth : 0);
604 //-----------------------------------------------------------------------------
605 // EqualStringArrays
606 // compares two string arrays and return true if all elements of the first
607 // array equals to all elmentsof the second array.
608 // otherwise it returns false.
609 //-----------------------------------------------------------------------------
611 private static bool EqualStringArrays(string[] array1, string[] array2)
613 // Shortcut if they're the same array
614 if (array1 == array2)
616 return true;
619 // This is effectively impossible
620 if (array1.Length != array2.Length)
622 return false;
625 // Check each string
626 for (int i = 0; i < array1.Length; i++)
628 if (array1[i] != array2[i])
630 return false;
634 return true;
637 //-----------------------------------------------------------------------------
638 // ArrayElementsHaveSpace
639 // It checks all input array elements if any of them has space character
640 // returns true if found space character in one of the array elements.
641 // otherwise returns false.
642 //-----------------------------------------------------------------------------
644 private static bool ArrayElementsHaveSpace(string[] array)
646 for (int i = 0; i < array.Length; i++)
648 // it is faster to check for space character manually instead of calling IndexOf
649 // so we don't have to go to native code side.
650 for (int j = 0; j < array[i].Length; j++)
652 if (char.IsWhiteSpace(array[i][j]))
654 return true;
659 return false;
663 ////////////////////////////////////////////////////////////////////////////
665 // Check if any element of the array start with a digit.
667 ////////////////////////////////////////////////////////////////////////////
668 private static bool ArrayElementsBeginWithDigit(string[] array)
670 for (int i = 0; i < array.Length; i++)
672 // it is faster to check for space character manually instead of calling IndexOf
673 // so we don't have to go to native code side.
674 if (array[i].Length > 0 &&
675 array[i][0] >= '0' && array[i][0] <= '9')
677 int index = 1;
678 while (index < array[i].Length && array[i][index] >= '0' && array[i][index] <= '9')
680 // Skip other digits.
681 index++;
683 if (index == array[i].Length)
685 return (false);
688 if (index == array[i].Length - 1)
690 // Skip known CJK month suffix.
691 // CJK uses month name like "1\x6708", since \x6708 is a known month suffix,
692 // we don't need the UseDigitPrefixInTokens since it is slower.
693 switch (array[i][index])
695 case '\x6708': // CJKMonthSuff
696 case '\xc6d4': // KoreanMonthSuff
697 return (false);
701 if (index == array[i].Length - 4)
703 // Skip known CJK month suffix.
704 // Starting with Windows 8, the CJK months for some cultures looks like: "1' \x6708'"
705 // instead of just "1\x6708"
706 if (array[i][index] == '\'' && array[i][index + 1] == ' ' &&
707 array[i][index + 2] == '\x6708' && array[i][index + 3] == '\'')
709 return (false);
712 return (true);
716 return false;