**** Merged from MCS ****
[mono-project.git] / mcs / class / System / System.Text.RegularExpressions / category.cs
blob6b465e407901ad35d4a9920760bcfc6d58203b1e
1 //
2 // assembly: System
3 // namespace: System.Text.RegularExpressions
4 // file: category.cs
5 //
6 // author: Dan Lewis (dlewis@gmx.co.uk)
7 // (c) 2002
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 using System;
31 using System.Globalization;
33 namespace System.Text.RegularExpressions {
35 enum Category : ushort {
36 None,
38 // canonical classes
40 Any, // any character except newline .
41 AnySingleline, // any character . (s option)
42 Word, // any word character \w
43 Digit, // any digit character \d
44 WhiteSpace, // any whitespace character \s
46 // ECMAScript classes
49 EcmaAny,
50 EcmaAnySingleline,
51 EcmaWord, // [a-zA-Z_0-9]
52 EcmaDigit, // [0-9]
53 EcmaWhiteSpace, // [ \f\n\r\t\v]
55 // unicode categories
57 UnicodeL, // Letter
58 UnicodeM, // Mark
59 UnicodeN, // Number
60 UnicodeZ, // Separator
61 UnicodeP, // Punctuation
62 UnicodeS, // Symbol
63 UnicodeC, // Other
65 UnicodeLu, // UppercaseLetter
66 UnicodeLl, // LowercaseLetter
67 UnicodeLt, // TitlecaseLetter
68 UnicodeLm, // ModifierLetter
69 UnicodeLo, // OtherLetter
70 UnicodeMn, // NonspacingMark
71 UnicodeMe, // EnclosingMark
72 UnicodeMc, // SpacingMark
73 UnicodeNd, // DecimalNumber
74 UnicodeNl, // LetterNumber
75 UnicodeNo, // OtherNumber
76 UnicodeZs, // SpaceSeparator
77 UnicodeZl, // LineSeparator
78 UnicodeZp, // ParagraphSeparator
79 UnicodePd, // DashPunctuation
80 UnicodePs, // OpenPunctuation
81 UnicodePi, // InitialPunctuation
82 UnicodePe, // ClosePunctuation
83 UnicodePf, // FinalPunctuation
84 UnicodePc, // ConnectorPunctuation
85 UnicodePo, // OtherPunctuation
86 UnicodeSm, // MathSymbol
87 UnicodeSc, // CurrencySymbol
88 UnicodeSk, // ModifierSymbol
89 UnicodeSo, // OtherSymbol
90 UnicodeCc, // Control
91 UnicodeCf, // Format
92 UnicodeCo, // PrivateUse
93 UnicodeCs, // Surrogate
94 UnicodeCn, // Unassigned
96 // unicode block ranges
98 // notes: the categories marked with a star are valid unicode block ranges,
99 // but don't seem to be accepted by the MS parser using the /p{...} format.
100 // any ideas?
102 UnicodeBasicLatin,
103 UnicodeLatin1Supplement, // *
104 UnicodeLatinExtendedA, // *
105 UnicodeLatinExtendedB, // *
106 UnicodeIPAExtensions,
107 UnicodeSpacingModifierLetters,
108 UnicodeCombiningDiacriticalMarks,
109 UnicodeGreek,
110 UnicodeCyrillic,
111 UnicodeArmenian,
112 UnicodeHebrew,
113 UnicodeArabic,
114 UnicodeSyriac,
115 UnicodeThaana,
116 UnicodeDevanagari,
117 UnicodeBengali,
118 UnicodeGurmukhi,
119 UnicodeGujarati,
120 UnicodeOriya,
121 UnicodeTamil,
122 UnicodeTelugu,
123 UnicodeKannada,
124 UnicodeMalayalam,
125 UnicodeSinhala,
126 UnicodeThai,
127 UnicodeLao,
128 UnicodeTibetan,
129 UnicodeMyanmar,
130 UnicodeGeorgian,
131 UnicodeHangulJamo,
132 UnicodeEthiopic,
133 UnicodeCherokee,
134 UnicodeUnifiedCanadianAboriginalSyllabics,
135 UnicodeOgham,
136 UnicodeRunic,
137 UnicodeKhmer,
138 UnicodeMongolian,
139 UnicodeLatinExtendedAdditional,
140 UnicodeGreekExtended,
141 UnicodeGeneralPunctuation,
142 UnicodeSuperscriptsandSubscripts,
143 UnicodeCurrencySymbols,
144 UnicodeCombiningMarksforSymbols,
145 UnicodeLetterlikeSymbols,
146 UnicodeNumberForms,
147 UnicodeArrows,
148 UnicodeMathematicalOperators,
149 UnicodeMiscellaneousTechnical,
150 UnicodeControlPictures,
151 UnicodeOpticalCharacterRecognition,
152 UnicodeEnclosedAlphanumerics,
153 UnicodeBoxDrawing,
154 UnicodeBlockElements,
155 UnicodeGeometricShapes,
156 UnicodeMiscellaneousSymbols,
157 UnicodeDingbats,
158 UnicodeBraillePatterns,
159 UnicodeCJKRadicalsSupplement,
160 UnicodeKangxiRadicals,
161 UnicodeIdeographicDescriptionCharacters,
162 UnicodeCJKSymbolsandPunctuation,
163 UnicodeHiragana,
164 UnicodeKatakana,
165 UnicodeBopomofo,
166 UnicodeHangulCompatibilityJamo,
167 UnicodeKanbun,
168 UnicodeBopomofoExtended,
169 UnicodeEnclosedCJKLettersandMonths,
170 UnicodeCJKCompatibility,
171 UnicodeCJKUnifiedIdeographsExtensionA,
172 UnicodeCJKUnifiedIdeographs,
173 UnicodeYiSyllables,
174 UnicodeYiRadicals,
175 UnicodeHangulSyllables,
176 UnicodeHighSurrogates,
177 UnicodeHighPrivateUseSurrogates,
178 UnicodeLowSurrogates,
179 UnicodePrivateUse,
180 UnicodeCJKCompatibilityIdeographs,
181 UnicodeAlphabeticPresentationForms,
182 UnicodeArabicPresentationFormsA, // *
183 UnicodeCombiningHalfMarks,
184 UnicodeCJKCompatibilityForms,
185 UnicodeSmallFormVariants,
186 UnicodeArabicPresentationFormsB, // *
187 UnicodeSpecials,
188 UnicodeHalfwidthandFullwidthForms,
190 UnicodeOldItalic,
191 UnicodeGothic,
192 UnicodeDeseret,
193 UnicodeByzantineMusicalSymbols,
194 UnicodeMusicalSymbols,
195 UnicodeMathematicalAlphanumericSymbols,
196 UnicodeCJKUnifiedIdeographsExtensionB,
197 UnicodeCJKCompatibilityIdeographsSupplement,
198 UnicodeTags,
200 LastValue // Keep this with the higher value in the enumeration
203 class CategoryUtils {
204 public static Category CategoryFromName (string name) {
205 try {
206 if (name.StartsWith ("Is")) // remove prefix from block range
207 name = name.Substring (2);
209 return (Category)Enum.Parse (typeof (Category), "Unicode" + name);
211 catch (ArgumentException) {
212 return Category.None;
216 public static bool IsCategory (Category cat, char c) {
217 switch (cat) {
218 case Category.None:
219 return false;
221 case Category.Any:
222 return c != '\n';
224 case Category.AnySingleline:
225 return true;
227 case Category.Word:
228 return
229 Char.IsLetterOrDigit (c) ||
230 IsCategory (UnicodeCategory.ConnectorPunctuation, c);
232 case Category.Digit:
233 return Char.IsDigit (c);
235 case Category.WhiteSpace:
236 return Char.IsWhiteSpace (c);
238 // ECMA categories
240 case Category.EcmaAny:
241 return c != '\n';
243 case Category.EcmaAnySingleline:
244 return true;
246 case Category.EcmaWord:
247 return
248 'a' <= c && c <= 'z' ||
249 'A' <= c && c <= 'Z' ||
250 '0' <= c && c <= '9' ||
251 '_' == c;
253 case Category.EcmaDigit:
254 return
255 '0' <= c && c <= 9;
257 case Category.EcmaWhiteSpace:
258 return
259 c == ' ' ||
260 c == '\f' ||
261 c == '\n' ||
262 c == '\r' ||
263 c == '\t' ||
264 c == '\v';
266 // Unicode categories...
268 // letter
270 case Category.UnicodeLu: return IsCategory (UnicodeCategory.UppercaseLetter, c);
271 case Category.UnicodeLl: return IsCategory (UnicodeCategory.LowercaseLetter, c);
272 case Category.UnicodeLt: return IsCategory (UnicodeCategory.TitlecaseLetter, c);
273 case Category.UnicodeLm: return IsCategory (UnicodeCategory.ModifierLetter, c);
274 case Category.UnicodeLo: return IsCategory (UnicodeCategory.OtherLetter, c);
276 // mark
278 case Category.UnicodeMn: return IsCategory (UnicodeCategory.NonSpacingMark, c);
279 case Category.UnicodeMe: return IsCategory (UnicodeCategory.EnclosingMark, c);
280 case Category.UnicodeMc: return IsCategory (UnicodeCategory.SpacingCombiningMark, c);
281 case Category.UnicodeNd: return IsCategory (UnicodeCategory.DecimalDigitNumber, c);
283 // number
285 case Category.UnicodeNl: return IsCategory (UnicodeCategory.LetterNumber, c);
286 case Category.UnicodeNo: return IsCategory (UnicodeCategory.OtherNumber, c);
288 // separator
290 case Category.UnicodeZs: return IsCategory (UnicodeCategory.SpaceSeparator, c);
291 case Category.UnicodeZl: return IsCategory (UnicodeCategory.LineSeparator, c);
292 case Category.UnicodeZp: return IsCategory (UnicodeCategory.ParagraphSeparator, c);
294 // punctuation
296 case Category.UnicodePd: return IsCategory (UnicodeCategory.DashPunctuation, c);
297 case Category.UnicodePs: return IsCategory (UnicodeCategory.OpenPunctuation, c);
298 case Category.UnicodePi: return IsCategory (UnicodeCategory.InitialQuotePunctuation, c);
299 case Category.UnicodePe: return IsCategory (UnicodeCategory.ClosePunctuation, c);
300 case Category.UnicodePf: return IsCategory (UnicodeCategory.FinalQuotePunctuation, c);
301 case Category.UnicodePc: return IsCategory (UnicodeCategory.ConnectorPunctuation, c);
302 case Category.UnicodePo: return IsCategory (UnicodeCategory.OtherPunctuation, c);
304 // symbol
306 case Category.UnicodeSm: return IsCategory (UnicodeCategory.MathSymbol, c);
307 case Category.UnicodeSc: return IsCategory (UnicodeCategory.CurrencySymbol, c);
308 case Category.UnicodeSk: return IsCategory (UnicodeCategory.ModifierSymbol, c);
309 case Category.UnicodeSo: return IsCategory (UnicodeCategory.OtherSymbol, c);
311 // other
313 case Category.UnicodeCc: return IsCategory (UnicodeCategory.Control, c);
314 case Category.UnicodeCf: return IsCategory (UnicodeCategory.Format, c);
315 case Category.UnicodeCo: return IsCategory (UnicodeCategory.PrivateUse, c);
316 case Category.UnicodeCs: return IsCategory (UnicodeCategory.Surrogate, c);
317 case Category.UnicodeCn: return IsCategory (UnicodeCategory.OtherNotAssigned, c);
319 case Category.UnicodeL: // letter
320 return
321 IsCategory (UnicodeCategory.UppercaseLetter, c) ||
322 IsCategory (UnicodeCategory.LowercaseLetter, c) ||
323 IsCategory (UnicodeCategory.TitlecaseLetter, c) ||
324 IsCategory (UnicodeCategory.ModifierLetter, c) ||
325 IsCategory (UnicodeCategory.OtherLetter, c);
327 case Category.UnicodeM: // mark
328 return
329 IsCategory (UnicodeCategory.NonSpacingMark, c) ||
330 IsCategory (UnicodeCategory.EnclosingMark, c) ||
331 IsCategory (UnicodeCategory.SpacingCombiningMark, c);
333 case Category.UnicodeN: // number
334 return
335 IsCategory (UnicodeCategory.DecimalDigitNumber, c) ||
336 IsCategory (UnicodeCategory.LetterNumber, c) ||
337 IsCategory (UnicodeCategory.OtherNumber, c);
339 case Category.UnicodeZ: // separator
340 return
341 IsCategory (UnicodeCategory.SpaceSeparator, c) ||
342 IsCategory (UnicodeCategory.LineSeparator, c) ||
343 IsCategory (UnicodeCategory.ParagraphSeparator, c);
345 case Category.UnicodeP: // punctuation
346 return
347 IsCategory (UnicodeCategory.DashPunctuation, c) ||
348 IsCategory (UnicodeCategory.OpenPunctuation, c) ||
349 IsCategory (UnicodeCategory.InitialQuotePunctuation, c) ||
350 IsCategory (UnicodeCategory.ClosePunctuation, c) ||
351 IsCategory (UnicodeCategory.FinalQuotePunctuation, c) ||
352 IsCategory (UnicodeCategory.ConnectorPunctuation, c) ||
353 IsCategory (UnicodeCategory.OtherPunctuation, c);
355 case Category.UnicodeS: // symbol
356 return
357 IsCategory (UnicodeCategory.MathSymbol, c) ||
358 IsCategory (UnicodeCategory.CurrencySymbol, c) ||
359 IsCategory (UnicodeCategory.ModifierSymbol, c) ||
360 IsCategory (UnicodeCategory.OtherSymbol, c);
362 case Category.UnicodeC: // other
363 return
364 IsCategory (UnicodeCategory.Control, c) ||
365 IsCategory (UnicodeCategory.Format, c) ||
366 IsCategory (UnicodeCategory.PrivateUse, c) ||
367 IsCategory (UnicodeCategory.Surrogate, c) ||
368 IsCategory (UnicodeCategory.OtherNotAssigned, c);
370 // Unicode block ranges...
372 case Category.UnicodeBasicLatin:
373 return '\u0000' <= c && c <= '\u007F';
375 case Category.UnicodeLatin1Supplement:
376 return '\u0080' <= c && c <= '\u00FF';
378 case Category.UnicodeLatinExtendedA:
379 return '\u0100' <= c && c <= '\u017F';
381 case Category.UnicodeLatinExtendedB:
382 return '\u0180' <= c && c <= '\u024F';
384 case Category.UnicodeIPAExtensions:
385 return '\u0250' <= c && c <= '\u02AF';
387 case Category.UnicodeSpacingModifierLetters:
388 return '\u02B0' <= c && c <= '\u02FF';
390 case Category.UnicodeCombiningDiacriticalMarks:
391 return '\u0300' <= c && c <= '\u036F';
393 case Category.UnicodeGreek:
394 return '\u0370' <= c && c <= '\u03FF';
396 case Category.UnicodeCyrillic:
397 return '\u0400' <= c && c <= '\u04FF';
399 case Category.UnicodeArmenian:
400 return '\u0530' <= c && c <= '\u058F';
402 case Category.UnicodeHebrew:
403 return '\u0590' <= c && c <= '\u05FF';
405 case Category.UnicodeArabic:
406 return '\u0600' <= c && c <= '\u06FF';
408 case Category.UnicodeSyriac:
409 return '\u0700' <= c && c <= '\u074F';
411 case Category.UnicodeThaana:
412 return '\u0780' <= c && c <= '\u07BF';
414 case Category.UnicodeDevanagari:
415 return '\u0900' <= c && c <= '\u097F';
417 case Category.UnicodeBengali:
418 return '\u0980' <= c && c <= '\u09FF';
420 case Category.UnicodeGurmukhi:
421 return '\u0A00' <= c && c <= '\u0A7F';
423 case Category.UnicodeGujarati:
424 return '\u0A80' <= c && c <= '\u0AFF';
426 case Category.UnicodeOriya:
427 return '\u0B00' <= c && c <= '\u0B7F';
429 case Category.UnicodeTamil:
430 return '\u0B80' <= c && c <= '\u0BFF';
432 case Category.UnicodeTelugu:
433 return '\u0C00' <= c && c <= '\u0C7F';
435 case Category.UnicodeKannada:
436 return '\u0C80' <= c && c <= '\u0CFF';
438 case Category.UnicodeMalayalam:
439 return '\u0D00' <= c && c <= '\u0D7F';
441 case Category.UnicodeSinhala:
442 return '\u0D80' <= c && c <= '\u0DFF';
444 case Category.UnicodeThai:
445 return '\u0E00' <= c && c <= '\u0E7F';
447 case Category.UnicodeLao:
448 return '\u0E80' <= c && c <= '\u0EFF';
450 case Category.UnicodeTibetan:
451 return '\u0F00' <= c && c <= '\u0FFF';
453 case Category.UnicodeMyanmar:
454 return '\u1000' <= c && c <= '\u109F';
456 case Category.UnicodeGeorgian:
457 return '\u10A0' <= c && c <= '\u10FF';
459 case Category.UnicodeHangulJamo:
460 return '\u1100' <= c && c <= '\u11FF';
462 case Category.UnicodeEthiopic:
463 return '\u1200' <= c && c <= '\u137F';
465 case Category.UnicodeCherokee:
466 return '\u13A0' <= c && c <= '\u13FF';
468 case Category.UnicodeUnifiedCanadianAboriginalSyllabics:
469 return '\u1400' <= c && c <= '\u167F';
471 case Category.UnicodeOgham:
472 return '\u1680' <= c && c <= '\u169F';
474 case Category.UnicodeRunic:
475 return '\u16A0' <= c && c <= '\u16FF';
477 case Category.UnicodeKhmer:
478 return '\u1780' <= c && c <= '\u17FF';
480 case Category.UnicodeMongolian:
481 return '\u1800' <= c && c <= '\u18AF';
483 case Category.UnicodeLatinExtendedAdditional:
484 return '\u1E00' <= c && c <= '\u1EFF';
486 case Category.UnicodeGreekExtended:
487 return '\u1F00' <= c && c <= '\u1FFF';
489 case Category.UnicodeGeneralPunctuation:
490 return '\u2000' <= c && c <= '\u206F';
492 case Category.UnicodeSuperscriptsandSubscripts:
493 return '\u2070' <= c && c <= '\u209F';
495 case Category.UnicodeCurrencySymbols:
496 return '\u20A0' <= c && c <= '\u20CF';
498 case Category.UnicodeCombiningMarksforSymbols:
499 return '\u20D0' <= c && c <= '\u20FF';
501 case Category.UnicodeLetterlikeSymbols:
502 return '\u2100' <= c && c <= '\u214F';
504 case Category.UnicodeNumberForms:
505 return '\u2150' <= c && c <= '\u218F';
507 case Category.UnicodeArrows:
508 return '\u2190' <= c && c <= '\u21FF';
510 case Category.UnicodeMathematicalOperators:
511 return '\u2200' <= c && c <= '\u22FF';
513 case Category.UnicodeMiscellaneousTechnical:
514 return '\u2300' <= c && c <= '\u23FF';
516 case Category.UnicodeControlPictures:
517 return '\u2400' <= c && c <= '\u243F';
519 case Category.UnicodeOpticalCharacterRecognition:
520 return '\u2440' <= c && c <= '\u245F';
522 case Category.UnicodeEnclosedAlphanumerics:
523 return '\u2460' <= c && c <= '\u24FF';
525 case Category.UnicodeBoxDrawing:
526 return '\u2500' <= c && c <= '\u257F';
528 case Category.UnicodeBlockElements:
529 return '\u2580' <= c && c <= '\u259F';
531 case Category.UnicodeGeometricShapes:
532 return '\u25A0' <= c && c <= '\u25FF';
534 case Category.UnicodeMiscellaneousSymbols:
535 return '\u2600' <= c && c <= '\u26FF';
537 case Category.UnicodeDingbats:
538 return '\u2700' <= c && c <= '\u27BF';
540 case Category.UnicodeBraillePatterns:
541 return '\u2800' <= c && c <= '\u28FF';
543 case Category.UnicodeCJKRadicalsSupplement:
544 return '\u2E80' <= c && c <= '\u2EFF';
546 case Category.UnicodeKangxiRadicals:
547 return '\u2F00' <= c && c <= '\u2FDF';
549 case Category.UnicodeIdeographicDescriptionCharacters:
550 return '\u2FF0' <= c && c <= '\u2FFF';
552 case Category.UnicodeCJKSymbolsandPunctuation:
553 return '\u3000' <= c && c <= '\u303F';
555 case Category.UnicodeHiragana:
556 return '\u3040' <= c && c <= '\u309F';
558 case Category.UnicodeKatakana:
559 return '\u30A0' <= c && c <= '\u30FF';
561 case Category.UnicodeBopomofo:
562 return '\u3100' <= c && c <= '\u312F';
564 case Category.UnicodeHangulCompatibilityJamo:
565 return '\u3130' <= c && c <= '\u318F';
567 case Category.UnicodeKanbun:
568 return '\u3190' <= c && c <= '\u319F';
570 case Category.UnicodeBopomofoExtended:
571 return '\u31A0' <= c && c <= '\u31BF';
573 case Category.UnicodeEnclosedCJKLettersandMonths:
574 return '\u3200' <= c && c <= '\u32FF';
576 case Category.UnicodeCJKCompatibility:
577 return '\u3300' <= c && c <= '\u33FF';
579 case Category.UnicodeCJKUnifiedIdeographsExtensionA:
580 return '\u3400' <= c && c <= '\u4DB5';
582 case Category.UnicodeCJKUnifiedIdeographs:
583 return '\u4E00' <= c && c <= '\u9FFF';
585 case Category.UnicodeYiSyllables:
586 return '\uA000' <= c && c <= '\uA48F';
588 case Category.UnicodeYiRadicals:
589 return '\uA490' <= c && c <= '\uA4CF';
591 case Category.UnicodeHangulSyllables:
592 return '\uAC00' <= c && c <= '\uD7A3';
594 case Category.UnicodeHighSurrogates:
595 return '\uD800' <= c && c <= '\uDB7F';
597 case Category.UnicodeHighPrivateUseSurrogates:
598 return '\uDB80' <= c && c <= '\uDBFF';
600 case Category.UnicodeLowSurrogates:
601 return '\uDC00' <= c && c <= '\uDFFF';
603 case Category.UnicodePrivateUse:
604 return '\uE000' <= c && c <= '\uF8FF';
606 case Category.UnicodeCJKCompatibilityIdeographs:
607 return '\uF900' <= c && c <= '\uFAFF';
609 case Category.UnicodeAlphabeticPresentationForms:
610 return '\uFB00' <= c && c <= '\uFB4F';
612 case Category.UnicodeArabicPresentationFormsA:
613 return '\uFB50' <= c && c <= '\uFDFF';
615 case Category.UnicodeCombiningHalfMarks:
616 return '\uFE20' <= c && c <= '\uFE2F';
618 case Category.UnicodeCJKCompatibilityForms:
619 return '\uFE30' <= c && c <= '\uFE4F';
621 case Category.UnicodeSmallFormVariants:
622 return '\uFE50' <= c && c <= '\uFE6F';
624 case Category.UnicodeArabicPresentationFormsB:
625 return '\uFE70' <= c && c <= '\uFEFE';
627 case Category.UnicodeHalfwidthandFullwidthForms:
628 return '\uFF00' <= c && c <= '\uFFEF';
630 case Category.UnicodeSpecials:
631 return
632 '\uFEFF' <= c && c <= '\uFEFF' ||
633 '\uFFF0' <= c && c <= '\uFFFD';
635 // these block ranges begin above 0x10000
637 case Category.UnicodeOldItalic:
638 case Category.UnicodeGothic:
639 case Category.UnicodeDeseret:
640 case Category.UnicodeByzantineMusicalSymbols:
641 case Category.UnicodeMusicalSymbols:
642 case Category.UnicodeMathematicalAlphanumericSymbols:
643 case Category.UnicodeCJKUnifiedIdeographsExtensionB:
644 case Category.UnicodeCJKCompatibilityIdeographsSupplement:
645 case Category.UnicodeTags:
646 return false;
648 default:
649 return false;
653 private static bool IsCategory (UnicodeCategory uc, char c) {
654 if (Char.GetUnicodeCategory (c) == uc)
655 return true;
657 return false;