1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 ////////////////////////////////////////////////////////////////////////////
7 // DateTimeFormatInfoScanner
9 // Scan a specified DateTimeFormatInfo to search for data used in DateTime.Parse()
13 // DateWords: such as "de" used in es-ES (Spanish) LongDatePattern.
14 // Postfix: such as "ta" used in fi-FI after the month name.
16 // This class is shared among mscorlib.dll and sysglobl.dll.
17 // Use conditional CULTURE_AND_REGIONINFO_BUILDER_ONLY to differentiate between
18 // methods for mscorlib.dll and sysglobl.dll.
20 ////////////////////////////////////////////////////////////////////////////
22 using System
.Collections
.Generic
;
25 namespace System
.Globalization
27 // from LocaleEx.txt header
29 internal enum FORMATFLAGS
32 UseGenitiveMonth
= 0x00000001,
33 UseLeapYearMonth
= 0x00000002,
34 UseSpacesInMonthNames
= 0x00000004,
35 UseHebrewParsing
= 0x00000008,
36 UseSpacesInDayNames
= 0x00000010, // Has spaces or non-breaking space in the day names.
37 UseDigitPrefixInTokens
= 0x00000020, // Has token starting with numbers.
40 internal enum CalendarId
: ushort
42 UNINITIALIZED_VALUE
= 0,
43 GREGORIAN
= 1, // Gregorian (localized) calendar
44 GREGORIAN_US
= 2, // Gregorian (U.S.) calendar
45 JAPAN
= 3, // Japanese Emperor Era calendar
46 /* SSS_WARNINGS_OFF */
47 TAIWAN
= 4, // Taiwan Era calendar /* SSS_WARNINGS_ON */
48 KOREA
= 5, // Korean Tangun Era calendar
49 HIJRI
= 6, // Hijri (Arabic Lunar) calendar
50 THAI
= 7, // Thai calendar
51 HEBREW
= 8, // Hebrew (Lunar) calendar
52 GREGORIAN_ME_FRENCH
= 9, // Gregorian Middle East French calendar
53 GREGORIAN_ARABIC
= 10, // Gregorian Arabic calendar
54 GREGORIAN_XLIT_ENGLISH
= 11, // Gregorian Transliterated English calendar
55 GREGORIAN_XLIT_FRENCH
= 12,
56 // Note that all calendars after this point are MANAGED ONLY for now.
58 JAPANESELUNISOLAR
= 14,
59 CHINESELUNISOLAR
= 15,
60 SAKA
= 16, // reserved to match Office but not implemented in our code
61 LUNAR_ETO_CHN
= 17, // reserved to match Office but not implemented in our code
62 LUNAR_ETO_KOR
= 18, // reserved to match Office but not implemented in our code
63 LUNAR_ETO_ROKUYOU
= 19, // reserved to match Office but not implemented in our code
68 LAST_CALENDAR
= 23 // Last calendar ID
71 internal class DateTimeFormatInfoScanner
73 // Special prefix-like flag char in DateWord array.
75 // Use char in PUA area since we won't be using them in real data.
76 // The char used to tell a read date word or a month postfix. A month postfix
77 // is "ta" in the long date pattern like "d. MMMM'ta 'yyyy" for fi-FI.
78 // In this case, it will be stored as "\xfffeta" in the date word array.
79 internal const char MonthPostfixChar
= '\xe000';
81 // Add ignorable symbol in a DateWord array.
84 // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd
85 // long date pattern: yyyy. MMMM d.
86 // Here, "." is the date separator (derived from short date pattern). However,
87 // "." also appear at the end of long date pattern. In this case, we just
88 // "." as ignorable symbol so that the DateTime.Parse() state machine will not
89 // treat the additional date separator at the end of y,m,d pattern as an error
91 internal const char IgnorableSymbolChar
= '\xe001';
94 internal const string CJKYearSuff
= "\u5e74";
95 internal const string CJKMonthSuff
= "\u6708";
96 internal const string CJKDaySuff
= "\u65e5";
98 internal const string KoreanYearSuff
= "\ub144";
99 internal const string KoreanMonthSuff
= "\uc6d4";
100 internal const string KoreanDaySuff
= "\uc77c";
102 internal const string KoreanHourSuff
= "\uc2dc";
103 internal const string KoreanMinuteSuff
= "\ubd84";
104 internal const string KoreanSecondSuff
= "\ucd08";
106 internal const string CJKHourSuff
= "\u6642";
107 internal const string ChineseHourSuff
= "\u65f6";
109 internal const string CJKMinuteSuff
= "\u5206";
110 internal const string CJKSecondSuff
= "\u79d2";
112 // The collection fo date words & postfix.
113 internal List
<string> m_dateWords
= new List
<string>();
114 // Hashtable for the known words.
115 private static volatile Dictionary
<string, string>? s_knownWords
;
117 private static Dictionary
<string, string> KnownWords
=>
119 new Dictionary
<string, string>(16)
121 // Add known words into the hash table.
123 // Skip these special symbols.
124 { "/", string.Empty }
,
125 { "-", string.Empty }
,
126 { ".", string.Empty }
,
128 // Skip known CJK suffixes.
129 { CJKYearSuff, string.Empty }
,
130 { CJKMonthSuff, string.Empty }
,
131 { CJKDaySuff, string.Empty }
,
132 { KoreanYearSuff, string.Empty }
,
133 { KoreanMonthSuff, string.Empty }
,
134 { KoreanDaySuff, string.Empty }
,
135 { KoreanHourSuff, string.Empty }
,
136 { KoreanMinuteSuff, string.Empty }
,
137 { KoreanSecondSuff, string.Empty }
,
138 { CJKHourSuff, string.Empty }
,
139 { ChineseHourSuff, string.Empty }
,
140 { CJKMinuteSuff, string.Empty }
,
141 { CJKSecondSuff, string.Empty }
144 ////////////////////////////////////////////////////////////////////////////
147 // pattern: The pattern to be scanned.
148 // currentIndex: the current index to start the scan.
151 // Return the index with the first character that is a letter, which will
152 // be the start of a date word.
153 // Note that the index can be pattern.Length if we reach the end of the string.
155 ////////////////////////////////////////////////////////////////////////////
156 internal static int SkipWhiteSpacesAndNonLetter(string pattern
, int currentIndex
)
158 while (currentIndex
< pattern
.Length
)
160 char ch
= pattern
[currentIndex
];
163 // Escaped character. Look ahead one character.
165 if (currentIndex
< pattern
.Length
)
167 ch
= pattern
[currentIndex
];
170 // Skip the leading single quote. We will
171 // stop at the first letter.
174 // Fall thru to check if this is a letter.
182 if (char.IsLetter(ch
) || ch
== '\'' || ch
== '.')
186 // Skip the current char since it is not a letter.
189 return (currentIndex
);
192 ////////////////////////////////////////////////////////////////////////////
194 // A helper to add the found date word or month postfix into ArrayList for date words.
197 // formatPostfix: What kind of postfix this is.
199 // null: This is a regular date word
200 // "MMMM": month postfix
201 // word: The date word or postfix to be added.
203 ////////////////////////////////////////////////////////////////////////////
204 internal void AddDateWordOrPostfix(string? formatPostfix
, string str
)
208 // Some cultures use . like an abbreviation
211 AddIgnorableSymbols(".");
215 if (KnownWords
.TryGetValue(str
, out _
) == false)
217 if (m_dateWords
== null)
219 m_dateWords
= new List
<string>();
221 if (formatPostfix
== "MMMM")
223 // Add the word into the ArrayList as "\xfffe" + real month postfix.
224 string temp
= MonthPostfixChar
+ str
;
225 if (!m_dateWords
.Contains(temp
))
227 m_dateWords
.Add(temp
);
232 if (!m_dateWords
.Contains(str
))
234 m_dateWords
.Add(str
);
238 // Old version ignore the trailing dot in the date words. Support this as well.
239 string strWithoutDot
= str
[0..^
1];
240 if (!m_dateWords
.Contains(strWithoutDot
))
242 m_dateWords
.Add(strWithoutDot
);
250 ////////////////////////////////////////////////////////////////////////////
252 // Scan the pattern from the specified index and add the date word/postfix
256 // pattern: The pattern to be scanned.
257 // index: The starting index to be scanned.
258 // formatPostfix: The kind of postfix to be scanned.
260 // null: This is a regular date word
261 // "MMMM": month postfix
264 ////////////////////////////////////////////////////////////////////////////
265 internal int AddDateWords(string pattern
, int index
, string? formatPostfix
)
267 // Skip any whitespaces so we will start from a letter.
268 int newIndex
= SkipWhiteSpacesAndNonLetter(pattern
, index
);
269 if (newIndex
!= index
&& formatPostfix
!= null)
271 // There are whitespaces. This will not be a postfix.
272 formatPostfix
= null;
276 // This is the first char added into dateWord.
277 // Skip all non-letter character. We will add the first letter into DateWord.
278 StringBuilder dateWord
= new StringBuilder();
279 // We assume that date words should start with a letter.
280 // Skip anything until we see a letter.
282 while (index
< pattern
.Length
)
284 char ch
= pattern
[index
];
287 // We have seen the end of quote. Add the word if we do not see it before,
288 // and break the while loop.
289 AddDateWordOrPostfix(formatPostfix
, dateWord
.ToString());
296 // Escaped character. Look ahead one character
299 // Skip escaped backslash.
301 if (index
< pattern
.Length
)
303 dateWord
.Append(pattern
[index
]);
307 else if (char.IsWhiteSpace(ch
))
309 // Found a whitespace. We have to add the current date word/postfix.
310 AddDateWordOrPostfix(formatPostfix
, dateWord
.ToString());
311 if (formatPostfix
!= null)
313 // Done with postfix. The rest will be regular date word.
314 formatPostfix
= null;
316 // Reset the dateWord.
329 ////////////////////////////////////////////////////////////////////////////
331 // A simple helper to find the repeat count for a specified char.
333 ////////////////////////////////////////////////////////////////////////////
334 internal static int ScanRepeatChar(string pattern
, char ch
, int index
, out int count
)
337 while (++index
< pattern
.Length
&& pattern
[index
] == ch
)
341 // Return the updated position.
345 ////////////////////////////////////////////////////////////////////////////
347 // Add the text that is a date separator but is treated like ignroable symbol.
350 // shrot date pattern: yyyy. MM. dd.;yyyy-MM-dd;yy-MM-dd
351 // long date pattern: yyyy. MMMM d.
352 // Here, "." is the date separator (derived from short date pattern). However,
353 // "." also appear at the end of long date pattern. In this case, we just
354 // "." as ignorable symbol so that the DateTime.Parse() state machine will not
355 // treat the additional date separator at the end of y,m,d pattern as an error
358 ////////////////////////////////////////////////////////////////////////////
360 internal void AddIgnorableSymbols(string? text
)
362 if (m_dateWords
== null)
364 // Create the date word array.
365 m_dateWords
= new List
<string>();
367 // Add the ignorable symbol into the ArrayList.
368 string temp
= IgnorableSymbolChar
+ text
;
369 if (!m_dateWords
.Contains(temp
))
371 m_dateWords
.Add(temp
);
377 // Flag used to trace the date patterns (yy/yyyyy/M/MM/MMM/MMM/d/dd) that we have seen.
379 private enum FoundDatePattern
382 FoundYearPatternFlag
= 0x0001,
383 FoundMonthPatternFlag
= 0x0002,
384 FoundDayPatternFlag
= 0x0004,
385 FoundYMDPatternFlag
= 0x0007, // FoundYearPatternFlag | FoundMonthPatternFlag | FoundDayPatternFlag;
388 // Check if we have found all of the year/month/day pattern.
389 private FoundDatePattern _ymdFlags
= FoundDatePattern
.None
;
392 ////////////////////////////////////////////////////////////////////////////
394 // Given a date format pattern, scan for date word or postfix.
396 // A date word should be always put in a single quoted string. And it will
397 // start from a letter, so whitespace and symbols will be ignored before
400 // Examples of date word:
401 // 'de' in es-SP: dddd, dd' de 'MMMM' de 'yyyy
402 // "\x0443." in bg-BG: dd.M.yyyy '\x0433.'
404 // Example of postfix:
406 // "ta" in fi-FI: d. MMMM'ta 'yyyy
407 // Currently, only month postfix is supported.
410 // Always call this with Framework-style pattern, instead of Windows style pattern.
411 // Windows style pattern uses '' for single quote, while .NET uses \'
413 ////////////////////////////////////////////////////////////////////////////
414 internal void ScanDateWord(string pattern
)
416 // Check if we have found all of the year/month/day pattern.
417 _ymdFlags
= FoundDatePattern
.None
;
420 while (i
< pattern
.Length
)
422 char ch
= pattern
[i
];
428 // Find a beginning quote. Search until the end quote.
429 i
= AddDateWords(pattern
, i
+ 1, null);
432 i
= ScanRepeatChar(pattern
, 'M', i
, out chCount
);
435 if (i
< pattern
.Length
&& pattern
[i
] == '\'')
437 i
= AddDateWords(pattern
, i
+ 1, "MMMM");
440 _ymdFlags
|= FoundDatePattern
.FoundMonthPatternFlag
;
443 i
= ScanRepeatChar(pattern
, 'y', i
, out chCount
);
444 _ymdFlags
|= FoundDatePattern
.FoundYearPatternFlag
;
447 i
= ScanRepeatChar(pattern
, 'd', i
, out chCount
);
450 // Only count "d" & "dd".
451 // ddd, dddd are day names. Do not count them.
452 _ymdFlags
|= FoundDatePattern
.FoundDayPatternFlag
;
456 // Found a escaped char not in a quoted string. Skip the current backslash
457 // and its next character.
461 if (_ymdFlags
== FoundDatePattern
.FoundYMDPatternFlag
)
463 // If we find a dot immediately after the we have seen all of the y, m, d pattern.
464 // treat it as a ignroable symbol. Check for comments in AddIgnorableSymbols for
466 AddIgnorableSymbols(".");
467 _ymdFlags
= FoundDatePattern
.None
;
472 if (_ymdFlags
== FoundDatePattern
.FoundYMDPatternFlag
&& !char.IsWhiteSpace(ch
))
474 // We are not seeing "." after YMD. Clear the flag.
475 _ymdFlags
= FoundDatePattern
.None
;
477 // We are not in quote. Skip the current character.
484 ////////////////////////////////////////////////////////////////////////////
486 // Given a DTFI, get all of the date words from date patterns and time patterns.
488 ////////////////////////////////////////////////////////////////////////////
490 internal string[]? GetDateWordsOfDTFI(DateTimeFormatInfo dtfi
)
492 // Enumarate all LongDatePatterns, and get the DateWords and scan for month postfix.
493 string[] datePatterns
= dtfi
.GetAllDateTimePatterns('D');
496 // Scan the long date patterns
497 for (i
= 0; i
< datePatterns
.Length
; i
++)
499 ScanDateWord(datePatterns
[i
]);
502 // Scan the short date patterns
503 datePatterns
= dtfi
.GetAllDateTimePatterns('d');
504 for (i
= 0; i
< datePatterns
.Length
; i
++)
506 ScanDateWord(datePatterns
[i
]);
508 // Scan the YearMonth patterns.
509 datePatterns
= dtfi
.GetAllDateTimePatterns('y');
510 for (i
= 0; i
< datePatterns
.Length
; i
++)
512 ScanDateWord(datePatterns
[i
]);
515 // Scan the month/day pattern
516 ScanDateWord(dtfi
.MonthDayPattern
);
518 // Scan the long time patterns.
519 datePatterns
= dtfi
.GetAllDateTimePatterns('T');
520 for (i
= 0; i
< datePatterns
.Length
; i
++)
522 ScanDateWord(datePatterns
[i
]);
525 // Scan the short time patterns.
526 datePatterns
= dtfi
.GetAllDateTimePatterns('t');
527 for (i
= 0; i
< datePatterns
.Length
; i
++)
529 ScanDateWord(datePatterns
[i
]);
532 string[]? result
= null;
533 if (m_dateWords
!= null && m_dateWords
.Count
> 0)
535 result
= new string[m_dateWords
.Count
];
536 for (i
= 0; i
< m_dateWords
.Count
; i
++)
538 result
[i
] = m_dateWords
[i
];
545 ////////////////////////////////////////////////////////////////////////////
547 // Scan the month names to see if genitive month names are used, and return
550 ////////////////////////////////////////////////////////////////////////////
551 internal static FORMATFLAGS
GetFormatFlagGenitiveMonth(string[] monthNames
, string[] genitveMonthNames
, string[] abbrevMonthNames
, string[] genetiveAbbrevMonthNames
)
553 // If we have different names in regular and genitive month names, use genitive month flag.
554 return ((!EqualStringArrays(monthNames
, genitveMonthNames
) || !EqualStringArrays(abbrevMonthNames
, genetiveAbbrevMonthNames
))
555 ? FORMATFLAGS
.UseGenitiveMonth
: 0);
558 ////////////////////////////////////////////////////////////////////////////
560 // Scan the month names to see if spaces are used or start with a digit, and return the format flag
562 ////////////////////////////////////////////////////////////////////////////
563 internal static FORMATFLAGS
GetFormatFlagUseSpaceInMonthNames(string[] monthNames
, string[] genitveMonthNames
, string[] abbrevMonthNames
, string[] genetiveAbbrevMonthNames
)
565 FORMATFLAGS formatFlags
= 0;
566 formatFlags
|= (ArrayElementsBeginWithDigit(monthNames
) ||
567 ArrayElementsBeginWithDigit(genitveMonthNames
) ||
568 ArrayElementsBeginWithDigit(abbrevMonthNames
) ||
569 ArrayElementsBeginWithDigit(genetiveAbbrevMonthNames
)
570 ? FORMATFLAGS
.UseDigitPrefixInTokens
: 0);
572 formatFlags
|= (ArrayElementsHaveSpace(monthNames
) ||
573 ArrayElementsHaveSpace(genitveMonthNames
) ||
574 ArrayElementsHaveSpace(abbrevMonthNames
) ||
575 ArrayElementsHaveSpace(genetiveAbbrevMonthNames
)
576 ? FORMATFLAGS
.UseSpacesInMonthNames
: 0);
577 return (formatFlags
);
580 ////////////////////////////////////////////////////////////////////////////
582 // Scan the day names and set the correct format flag.
584 ////////////////////////////////////////////////////////////////////////////
585 internal static FORMATFLAGS
GetFormatFlagUseSpaceInDayNames(string[] dayNames
, string[] abbrevDayNames
)
587 return ((ArrayElementsHaveSpace(dayNames
) ||
588 ArrayElementsHaveSpace(abbrevDayNames
))
589 ? FORMATFLAGS
.UseSpacesInDayNames
: 0);
592 ////////////////////////////////////////////////////////////////////////////
594 // Check the calendar to see if it is HebrewCalendar and set the Hebrew format flag if necessary.
596 ////////////////////////////////////////////////////////////////////////////
597 internal static FORMATFLAGS
GetFormatFlagUseHebrewCalendar(int calID
)
599 return (calID
== (int)CalendarId
.HEBREW
?
600 FORMATFLAGS
.UseHebrewParsing
| FORMATFLAGS
.UseLeapYearMonth
: 0);
604 //-----------------------------------------------------------------------------
606 // compares two string arrays and return true if all elements of the first
607 // array equals to all elmentsof the second array.
608 // otherwise it returns false.
609 //-----------------------------------------------------------------------------
611 private static bool EqualStringArrays(string[] array1
, string[] array2
)
613 // Shortcut if they're the same array
614 if (array1
== array2
)
619 // This is effectively impossible
620 if (array1
.Length
!= array2
.Length
)
626 for (int i
= 0; i
< array1
.Length
; i
++)
628 if (array1
[i
] != array2
[i
])
637 //-----------------------------------------------------------------------------
638 // ArrayElementsHaveSpace
639 // It checks all input array elements if any of them has space character
640 // returns true if found space character in one of the array elements.
641 // otherwise returns false.
642 //-----------------------------------------------------------------------------
644 private static bool ArrayElementsHaveSpace(string[] array
)
646 for (int i
= 0; i
< array
.Length
; i
++)
648 // it is faster to check for space character manually instead of calling IndexOf
649 // so we don't have to go to native code side.
650 for (int j
= 0; j
< array
[i
].Length
; j
++)
652 if (char.IsWhiteSpace(array
[i
][j
]))
663 ////////////////////////////////////////////////////////////////////////////
665 // Check if any element of the array start with a digit.
667 ////////////////////////////////////////////////////////////////////////////
668 private static bool ArrayElementsBeginWithDigit(string[] array
)
670 for (int i
= 0; i
< array
.Length
; i
++)
672 // it is faster to check for space character manually instead of calling IndexOf
673 // so we don't have to go to native code side.
674 if (array
[i
].Length
> 0 &&
675 array
[i
][0] >= '0' && array
[i
][0] <= '9')
678 while (index
< array
[i
].Length
&& array
[i
][index
] >= '0' && array
[i
][index
] <= '9')
680 // Skip other digits.
683 if (index
== array
[i
].Length
)
688 if (index
== array
[i
].Length
- 1)
690 // Skip known CJK month suffix.
691 // CJK uses month name like "1\x6708", since \x6708 is a known month suffix,
692 // we don't need the UseDigitPrefixInTokens since it is slower.
693 switch (array
[i
][index
])
695 case '\x6708': // CJKMonthSuff
696 case '\xc6d4': // KoreanMonthSuff
701 if (index
== array
[i
].Length
- 4)
703 // Skip known CJK month suffix.
704 // Starting with Windows 8, the CJK months for some cultures looks like: "1' \x6708'"
705 // instead of just "1\x6708"
706 if (array
[i
][index
] == '\'' && array
[i
][index
+ 1] == ' ' &&
707 array
[i
][index
+ 2] == '\x6708' && array
[i
][index
+ 3] == '\'')