**** Merged from MCS ****
[mono-project.git] / mcs / class / System / System.Text.RegularExpressions / parser.cs
blob80ce31eb301bcdddfd638653f56d2507cfd67d2b
1 //
2 // assembly: System
3 // namespace: System.Text.RegularExpressions
4 // file: parser.cs
5 //
6 // author: Dan Lewis (dlewis@gmx.co.uk)
7 // (c) 2002
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 using System;
31 using System.Collections;
32 using System.Globalization;
34 namespace System.Text.RegularExpressions.Syntax {
36 class Parser {
37 public static int ParseDecimal (string str, ref int ptr) {
38 return ParseNumber (str, ref ptr, 10, 1, Int32.MaxValue);
41 public static int ParseOctal (string str, ref int ptr) {
42 return ParseNumber (str, ref ptr, 8, 1, 3);
45 public static int ParseHex (string str, ref int ptr, int digits) {
46 return ParseNumber (str, ref ptr, 16, digits, digits);
49 public static int ParseNumber (string str, ref int ptr, int b, int min, int max) {
50 int p = ptr, n = 0, digits = 0, d;
51 if (max < min)
52 max = Int32.MaxValue;
54 while (digits < max && p < str.Length) {
55 d = ParseDigit (str[p ++], b, digits);
56 if (d < 0) {
57 -- p;
58 break;
61 n = n * b + d;
62 ++ digits;
65 if (digits < min)
66 return -1;
68 ptr = p;
69 return n;
72 public static string ParseName (string str, ref int ptr) {
73 if (Char.IsDigit (str[ptr])) {
74 int gid = ParseNumber (str, ref ptr, 10, 1, 0);
75 if (gid > 0)
76 return gid.ToString ();
78 return null;
81 int start = ptr;
82 for (;;) {
83 if (!IsNameChar (str[ptr]))
84 break;
85 ++ ptr;
88 if (ptr - start > 0)
89 return str.Substring (start, ptr - start);
91 return null;
94 public static string Escape (string str) {
95 string result = "";
96 for (int i = 0; i < str.Length; ++ i) {
97 char c = str[i];
98 switch (c) {
99 case '\\': case '*': case '+': case '?': case '|':
100 case '{': case '[': case '(': case ')': case '^':
101 case '$': case '.': case '#': case ' ':
102 result += "\\" + c;
103 break;
105 case '\t': result += "\\t"; break;
106 case '\n': result += "\\n"; break;
107 case '\r': result += "\\r"; break;
108 case '\f': result += "\\f"; break;
110 default: result += c; break;
114 return result;
117 public static string Unescape (string str) {
118 return new Parser ().ParseString (str);
121 // public instance
123 public Parser () {
124 this.caps = new ArrayList ();
125 this.refs = new Hashtable ();
128 public RegularExpression ParseRegularExpression (string pattern, RegexOptions options) {
129 this.pattern = pattern;
130 this.ptr = 0;
132 caps.Clear ();
133 refs.Clear ();
134 this.num_groups = 0;
136 try {
137 RegularExpression re = new RegularExpression ();
138 ParseGroup (re, options, null);
139 ResolveReferences ();
141 re.GroupCount = num_groups;
143 return re;
145 catch (IndexOutOfRangeException) {
146 throw NewParseException ("Unexpected end of pattern.");
150 public IDictionary GetMapping () {
151 Hashtable mapping = new Hashtable ();
152 Hashtable numbers = new Hashtable ();
153 int end = caps.Count;
154 mapping.Add ("0", 0);
155 for (int i = 0; i < end; i++) {
156 CapturingGroup group = (CapturingGroup) caps [i];
157 if (group.Name != null && !mapping.Contains (group.Name)) {
158 mapping.Add (group.Name, group.Number);
159 numbers.Add (group.Number, group.Number);
163 for (int i = 1; i < end; i++) {
164 if (numbers [i] == null)
165 mapping.Add (i.ToString (), i);
168 return mapping;
171 // private methods
173 private void ParseGroup (Group group, RegexOptions options, Assertion assertion) {
174 bool is_top_level = group is RegularExpression;
176 Alternation alternation = null;
177 string literal = null;
179 Group current = new Group ();
180 Expression expr = null;
181 bool closed = false;
183 while (true) {
184 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
185 if (ptr >= pattern.Length)
186 break;
188 // (1) Parse for Expressions
190 char ch = pattern[ptr ++];
192 switch (ch) {
193 case '^': {
194 Position pos =
195 IsMultiline (options) ? Position.StartOfLine : Position.Start;
196 expr = new PositionAssertion (pos);
197 break;
200 case '$': {
201 Position pos =
202 IsMultiline (options) ? Position.EndOfLine : Position.End;
203 expr = new PositionAssertion (pos);
204 break;
207 case '.': {
208 Category cat =
209 IsSingleline (options) ? Category.AnySingleline : Category.Any;
210 expr = new CharacterClass (cat, false);
211 break;
214 case '\\': {
215 int c = ParseEscape ();
216 if (c >= 0)
217 ch = (char)c;
218 else {
219 expr = ParseSpecial (options);
221 if (expr == null)
222 ch = pattern[ptr ++]; // default escape
224 break;
227 case '[': {
228 expr = ParseCharacterClass (options);
229 break;
232 case '(': {
233 bool ignore = IsIgnoreCase (options);
234 expr = ParseGroupingConstruct (ref options);
235 if (expr == null) {
236 if (literal != null && IsIgnoreCase (options) != ignore) {
237 current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
238 literal = null;
241 continue;
243 break;
246 case ')': {
247 closed = true;
248 goto EndOfGroup;
251 case '|': {
252 if (literal != null) {
253 current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
254 literal = null;
257 if (assertion != null) {
258 if (assertion.TrueExpression == null)
259 assertion.TrueExpression = current;
260 else if (assertion.FalseExpression == null)
261 assertion.FalseExpression = current;
262 else
263 throw NewParseException ("Too many | in (?()|).");
265 else {
266 if (alternation == null)
267 alternation = new Alternation ();
269 alternation.AddAlternative (current);
272 current = new Group ();
273 continue;
276 case '*': case '+': case '?': {
277 throw NewParseException ("Bad quantifier.");
280 default:
281 break; // literal character
284 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
286 // (2) Check for Repetitions
288 if (ptr < pattern.Length) {
289 char k = pattern[ptr];
291 if (k == '?' || k == '*' || k == '+' || k == '{') {
292 ++ ptr;
294 int min = 0, max = 0;
295 bool lazy = false;
297 switch (k) {
298 case '?': min = 0; max = 1; break;
299 case '*': min = 0; max = 0xffff; break;
300 case '+': min = 1; max = 0xffff; break;
301 case '{': ParseRepetitionBounds (out min, out max, options); break;
304 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
305 if (ptr < pattern.Length && pattern[ptr] == '?') {
306 ++ ptr;
307 lazy = true;
310 Repetition repetition = new Repetition (min, max, lazy);
312 if (expr == null)
313 repetition.Expression = new Literal (ch.ToString (), IsIgnoreCase (options));
314 else
315 repetition.Expression = expr;
317 expr = repetition;
321 // (3) Append Expression and/or Literal
323 if (expr == null) {
324 if (literal == null)
325 literal = "";
326 literal += ch;
328 else {
329 if (literal != null) {
330 current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
331 literal = null;
334 current.AppendExpression (expr);
335 expr = null;
338 if (is_top_level && ptr >= pattern.Length)
339 goto EndOfGroup;
342 EndOfGroup:
343 if (is_top_level && closed)
344 throw NewParseException ("Too many )'s.");
345 if (!is_top_level && !closed)
346 throw NewParseException ("Not enough )'s.");
349 // clean up literals and alternations
351 if (literal != null)
352 current.AppendExpression (new Literal (literal, IsIgnoreCase (options)));
354 if (assertion != null) {
355 if (assertion.TrueExpression == null)
356 assertion.TrueExpression = current;
357 else
358 assertion.FalseExpression = current;
360 group.AppendExpression (assertion);
362 else if (alternation != null) {
363 alternation.AddAlternative (current);
364 group.AppendExpression (alternation);
366 else
367 group.AppendExpression (current);
370 private Expression ParseGroupingConstruct (ref RegexOptions options) {
371 if (pattern[ptr] != '?') {
372 Group group;
374 if (IsExplicitCapture (options))
375 group = new Group ();
376 else {
377 group = new CapturingGroup ();
378 caps.Add (group);
381 ParseGroup (group, options, null);
382 return group;
384 else
385 ++ ptr;
387 switch (pattern[ptr]) {
388 case ':': { // non-capturing group
389 ++ ptr;
390 Group group = new Group ();
391 ParseGroup (group, options, null);
393 return group;
396 case '>': { // non-backtracking group
397 ++ ptr;
398 Group group = new NonBacktrackingGroup ();
399 ParseGroup (group, options, null);
401 return group;
404 case 'i': case 'm': case 'n':
405 case 's': case 'x': case '-': { // options
406 RegexOptions o = options;
407 ParseOptions (ref o, false);
408 if (pattern[ptr] == '-') {
409 ++ ptr;
410 ParseOptions (ref o, true);
413 if (pattern[ptr] == ':') { // pass options to child group
414 ++ ptr;
415 Group group = new Group ();
416 ParseGroup (group, o, null);
417 return group;
419 else if (pattern[ptr] == ')') { // change options of enclosing group
420 ++ ptr;
421 options = o;
422 return null;
424 else
425 throw NewParseException ("Bad options");
428 case '<': case '=': case '!': { // lookahead/lookbehind
429 ExpressionAssertion asn = new ExpressionAssertion ();
430 if (!ParseAssertionType (asn))
431 goto case '\''; // it's a (?<name> ) construct
433 Group test = new Group ();
434 ParseGroup (test, options, null);
436 asn.TestExpression = test;
437 return asn;
440 case '\'': { // named/balancing group
441 char delim;
442 if (pattern[ptr] == '<')
443 delim = '>';
444 else
445 delim = '\'';
447 ++ ptr;
448 string name = ParseName ();
450 if (pattern[ptr] == delim) {
451 // capturing group
453 if (name == null)
454 throw NewParseException ("Bad group name.");
456 ++ ptr;
457 CapturingGroup cap = new CapturingGroup ();
458 cap.Name = name;
459 caps.Add (cap);
460 ParseGroup (cap, options, null);
462 return cap;
464 else if (pattern[ptr] == '-') {
465 // balancing group
467 ++ ptr;
468 string balance_name = ParseName ();
469 if (balance_name == null || pattern[ptr] != delim)
470 throw NewParseException ("Bad balancing group name.");
472 ++ ptr;
473 BalancingGroup bal = new BalancingGroup ();
474 bal.Name = name;
476 if(bal.IsNamed) {
477 caps.Add (bal);
480 refs.Add (bal, balance_name);
482 ParseGroup (bal, options, null);
484 return bal;
486 else
487 throw NewParseException ("Bad group name.");
490 case '(': { // expression/capture test
491 Assertion asn;
493 ++ ptr;
494 int p = ptr;
495 string name = ParseName ();
496 if (name == null || pattern[ptr] != ')') { // expression test
497 // FIXME MS implementation doesn't seem to
498 // implement this version of (?(x) ...)
500 ptr = p;
501 ExpressionAssertion expr_asn = new ExpressionAssertion ();
503 if (pattern[ptr] == '?') {
504 ++ ptr;
505 if (!ParseAssertionType (expr_asn))
506 throw NewParseException ("Bad conditional.");
508 else {
509 expr_asn.Negate = false;
510 expr_asn.Reverse = false;
513 Group test = new Group ();
514 ParseGroup (test, options, null);
515 expr_asn.TestExpression = test;
516 asn = expr_asn;
518 else { // capture test
519 ++ ptr;
520 asn = new CaptureAssertion ();
521 refs.Add (asn, name);
524 Group group = new Group ();
525 ParseGroup (group, options, asn);
526 return group;
529 case '#': { // comment
530 ++ ptr;
531 while (pattern[ptr ++] != ')') {
532 if (ptr >= pattern.Length)
533 throw NewParseException ("Unterminated (?#...) comment.");
535 return null;
538 default: // error
539 throw NewParseException ("Bad grouping construct.");
543 private bool ParseAssertionType (ExpressionAssertion assertion) {
544 if (pattern[ptr] == '<') {
545 switch (pattern[ptr + 1]) {
546 case '=':
547 assertion.Negate = false;
548 break;
549 case '!':
550 assertion.Negate = true;
551 break;
552 default:
553 return false;
556 assertion.Reverse = true;
557 ptr += 2;
559 else {
560 switch (pattern[ptr]) {
561 case '=':
562 assertion.Negate = false;
563 break;
564 case '!':
565 assertion.Negate = true;
566 break;
567 default:
568 return false;
571 assertion.Reverse = false;
572 ptr += 1;
575 return true;
578 private void ParseOptions (ref RegexOptions options, bool negate) {
579 for (;;) {
580 switch (pattern[ptr]) {
581 case 'i':
582 if (negate)
583 options &= ~RegexOptions.IgnoreCase;
584 else
585 options |= RegexOptions.IgnoreCase;
586 break;
588 case 'm':
589 if (negate)
590 options &= ~RegexOptions.Multiline;
591 else
592 options |= RegexOptions.Multiline;
593 break;
595 case 'n':
596 if (negate)
597 options &= ~RegexOptions.ExplicitCapture;
598 else
599 options |= RegexOptions.ExplicitCapture;
600 break;
602 case 's':
603 if (negate)
604 options &= ~RegexOptions.Singleline;
605 else
606 options |= RegexOptions.Singleline;
607 break;
609 case 'x':
610 if (negate)
611 options &= ~RegexOptions.IgnorePatternWhitespace;
612 else
613 options |= RegexOptions.IgnorePatternWhitespace;
614 break;
616 default:
617 return;
620 ++ ptr;
624 private Expression ParseCharacterClass (RegexOptions options) {
625 bool negate, ecma;
626 if (pattern[ptr] == '^') {
627 negate = true;
628 ++ ptr;
630 else
631 negate = false;
633 ecma = IsECMAScript (options);
634 CharacterClass cls = new CharacterClass (negate, IsIgnoreCase (options));
636 if (pattern[ptr] == ']') {
637 cls.AddCharacter (']');
638 ++ ptr;
641 int c = -1;
642 int last = -1;
643 bool range = false;
644 bool closed = false;
645 while (ptr < pattern.Length) {
646 c = pattern[ptr ++];
648 if (c == ']') {
649 closed = true;
650 break;
653 if (c == '-') {
654 range = true;
655 continue;
658 if (c == '\\') {
659 c = ParseEscape ();
660 if (c < 0) {
661 // didn't recognize escape
663 c = pattern[ptr ++];
664 switch (c) {
665 case 'b': c = '\b'; break;
667 case 'd':
668 cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, false);
669 last = -1;
670 continue;
672 case 'w':
673 cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, false);
674 last = -1;
675 continue;
677 case 's':
678 cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
679 last = -1;
680 continue;
682 case 'p':
683 cls.AddCategory (ParseUnicodeCategory (), false); // ignore ecma
684 last = -1;
685 continue;
687 case 'D':
688 cls.AddCategory (ecma ? Category.EcmaDigit : Category.Digit, true);
689 last = -1;
690 continue;
692 case 'W':
693 cls.AddCategory (ecma ? Category.EcmaWord : Category.Word, true);
694 last = -1;
695 continue;
697 case 'S':
698 cls.AddCategory (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
699 last = -1;
700 continue;
702 case 'P':
703 cls.AddCategory (ParseUnicodeCategory (), true);
704 last = -1;
705 continue;
707 default: break; // add escaped character
712 if (range) {
713 if (c < last)
714 throw NewParseException ("[x-y] range in reverse order.");
716 if (last >=0 )
717 cls.AddRange ((char)last, (char)c);
718 else {
719 cls.AddCharacter ((char)c);
720 cls.AddCharacter ('-');
723 range = false;
724 last = -1;
726 else {
727 cls.AddCharacter ((char)c);
728 last = c;
732 if (!closed)
733 throw NewParseException ("Unterminated [] set.");
735 if (range)
736 cls.AddCharacter ('-');
738 return cls;
741 private void ParseRepetitionBounds (out int min, out int max, RegexOptions options) {
742 int n, m;
744 /* check syntax */
746 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
748 if (pattern[ptr] == ',') {
749 n = -1;
750 } else {
751 n = ParseNumber (10, 1, 0);
752 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
755 switch (pattern[ptr ++]) {
756 case '}':
757 m = n;
758 break;
759 case ',':
760 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
761 m = ParseNumber (10, 1, 0);
762 ConsumeWhitespace (IsIgnorePatternWhitespace (options));
763 if (pattern[ptr ++] != '}')
764 throw NewParseException ("Illegal {x,y} - bad value of y.");
765 break;
766 default:
767 throw NewParseException ("Illegal {x,y}");
770 /* check bounds and ordering */
772 if (n >= 0xffff || m >= 0xffff)
773 throw NewParseException ("Illegal {x, y} - maximum of 65535.");
774 if (m >= 0 && m < n)
775 throw NewParseException ("Illegal {x, y} with x > y.");
777 /* assign min and max */
779 min = n;
780 if (m > 0)
781 max = m;
782 else
783 max = 0xffff;
786 private Category ParseUnicodeCategory () {
787 if (pattern[ptr ++] != '{')
788 throw NewParseException ("Incomplete \\p{X} character escape.");
790 string name = ParseName (pattern, ref ptr);
791 if (name == null)
792 throw NewParseException ("Incomplete \\p{X} character escape.");
794 Category cat = CategoryUtils.CategoryFromName (name);
795 if (cat == Category.None)
796 throw NewParseException ("Unknown property '" + name + "'.");
798 if (pattern[ptr ++] != '}')
799 throw NewParseException ("Incomplete \\p{X} character escape.");
801 return cat;
804 private Expression ParseSpecial (RegexOptions options) {
805 int p = ptr;
806 bool ecma = IsECMAScript (options);
807 Expression expr = null;
809 switch (pattern[ptr ++]) {
811 // categories
813 case 'd':
814 expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, false);
815 break;
817 case 'w':
818 expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, false);
819 break;
821 case 's':
822 expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
823 break;
825 case 'p':
826 // this is odd - ECMAScript isn't supposed to support Unicode,
827 // yet \p{..} compiles and runs under the MS implementation
828 // identically to canonical mode. That's why I'm ignoring the
829 // value of ecma here.
831 expr = new CharacterClass (ParseUnicodeCategory (), false);
832 break;
834 case 'D':
835 expr = new CharacterClass (ecma ? Category.EcmaDigit : Category.Digit, true);
836 break;
838 case 'W':
839 expr = new CharacterClass (ecma ? Category.EcmaWord : Category.Word, true);
840 break;
842 case 'S':
843 expr = new CharacterClass (ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
844 break;
846 case 'P':
847 expr = new CharacterClass (ParseUnicodeCategory (), true);
848 break;
850 // positions
852 case 'A': expr = new PositionAssertion (Position.StartOfString); break;
853 case 'Z': expr = new PositionAssertion (Position.End); break;
854 case 'z': expr = new PositionAssertion (Position.EndOfString); break;
855 case 'G': expr = new PositionAssertion (Position.StartOfScan); break;
856 case 'b': expr = new PositionAssertion (Position.Boundary); break;
857 case 'B': expr = new PositionAssertion (Position.NonBoundary); break;
859 // references
861 case '1': case '2': case '3': case '4': case '5':
862 case '6': case '7': case '8': case '9': {
863 ptr --;
864 int n = ParseNumber (10, 1, 0);
865 if (n < 0) {
866 ptr = p;
867 return null;
870 // FIXME test if number is within number of assigned groups
871 // this may present a problem for right-to-left matching
873 Reference reference = new Reference (IsIgnoreCase (options));
874 refs.Add (reference, n.ToString ());
875 expr = reference;
876 break;
879 case 'k': {
880 char delim = pattern[ptr ++];
881 if (delim == '<')
882 delim = '>';
883 else if (delim != '\'')
884 throw NewParseException ("Malformed \\k<...> named backreference.");
886 string name = ParseName ();
887 if (name == null || pattern[ptr] != delim)
888 throw NewParseException ("Malformed \\k<...> named backreference.");
890 ++ ptr;
891 Reference reference = new Reference (IsIgnoreCase (options));
892 refs.Add (reference, name);
893 expr = reference;
894 break;
897 default:
898 expr = null;
899 break;
902 if (expr == null)
903 ptr = p;
905 return expr;
908 private int ParseEscape () {
909 int p = ptr;
910 int c;
912 if (p >= pattern.Length)
913 throw new ArgumentException (
914 String.Format ("Parsing \"{0}\" - Illegal \\ at end of " +
915 "pattern.", pattern), pattern);
917 switch (pattern[ptr ++]) {
919 // standard escapes (except \b)
921 case 'a': return '\u0007';
922 case 't': return '\u0009';
923 case 'r': return '\u000d';
924 case 'v': return '\u000b';
925 case 'f': return '\u000c';
926 case 'n': return '\u000a';
927 case 'e': return '\u001b';
928 case '\\': return '\\';
930 // character codes
932 case '0':
933 int prevptr = ptr;
934 int result = ParseOctal (pattern, ref ptr);
935 if (result == -1 && prevptr == ptr)
936 return 0;
938 return result;
940 case 'x':
941 c = ParseHex (pattern, ref ptr, 2);
942 if (c < 0)
943 throw NewParseException ("Insufficient hex digits");
945 return c;
947 case 'u':
948 c = ParseHex (pattern, ref ptr, 4);
949 if (c < 0)
950 throw NewParseException ("Insufficient hex digits");
952 return c;
954 // control characters
956 case 'c':
957 c = pattern[ptr ++];
958 if (c >= '@' && c <= '_')
959 return c - '@';
960 else
961 throw NewParseException ("Unrecognized control character.");
963 // unknown escape
965 default:
966 ptr = p;
967 return -1;
971 private string ParseName () {
972 return Parser.ParseName (pattern, ref ptr);
975 private static bool IsNameChar (char c) {
976 UnicodeCategory cat = Char.GetUnicodeCategory (c);
977 if (cat == UnicodeCategory.ModifierLetter)
978 return false;
979 if (cat == UnicodeCategory.ConnectorPunctuation)
980 return true;
981 return Char.IsLetterOrDigit (c);
984 private int ParseNumber (int b, int min, int max) {
985 return Parser.ParseNumber (pattern, ref ptr, b, min, max);
988 private int ParseDecimal () {
989 return Parser.ParseDecimal (pattern, ref ptr);
992 private static int ParseDigit (char c, int b, int n) {
993 switch (b) {
994 case 8:
995 if (c >= '0' && c <= '7')
996 return c - '0';
997 else
998 return -1;
999 case 10:
1000 if (c >= '0' && c <= '9')
1001 return c - '0';
1002 else
1003 return -1;
1004 case 16:
1005 if (c >= '0' && c <= '9')
1006 return c - '0';
1007 else if (c >= 'a' && c <= 'f')
1008 return 10 + c - 'a';
1009 else if (c >= 'A' && c <= 'F')
1010 return 10 + c - 'A';
1011 else
1012 return -1;
1013 default:
1014 return -1;
1018 private void ConsumeWhitespace (bool ignore) {
1019 while (true) {
1020 if (ptr >= pattern.Length)
1021 break;
1023 if (pattern[ptr] == '(') {
1024 if (ptr + 3 >= pattern.Length)
1025 return;
1027 if (pattern[ptr + 1] != '?' || pattern[ptr + 2] != '#')
1028 return;
1030 ptr += 3;
1031 while (pattern[ptr ++] != ')')
1032 /* ignore */ ;
1034 else if (ignore && pattern[ptr] == '#') {
1035 while (ptr < pattern.Length && pattern[ptr ++] != '\n')
1036 /* ignore */ ;
1038 else if (ignore && Char.IsWhiteSpace (pattern[ptr])) {
1039 while (ptr < pattern.Length && Char.IsWhiteSpace (pattern[ptr]))
1040 ++ ptr;
1042 else
1043 return;
1047 private string ParseString (string pattern) {
1048 this.pattern = pattern;
1049 this.ptr = 0;
1051 StringBuilder result = new StringBuilder (pattern.Length);
1052 while (ptr < pattern.Length) {
1053 int c = pattern[ptr ++];
1054 if (c == '\\') {
1055 c = ParseEscape ();
1057 if(c < 0) {
1058 c = pattern[ptr ++];
1059 if(c == 'b')
1060 c = '\b';
1063 result.Append ((char) c);
1066 return result.ToString ();
1069 private void ResolveReferences () {
1070 int gid = 1;
1071 Hashtable dict = new Hashtable ();
1073 // number unnamed groups
1075 foreach (CapturingGroup group in caps) {
1076 if (group.Name == null) {
1077 dict.Add (gid.ToString (), group);
1078 group.Number = gid ++;
1080 ++ num_groups;
1084 // number named groups
1086 foreach (CapturingGroup group in caps) {
1087 if (group.Name != null) {
1088 if (!dict.Contains (group.Name)) {
1089 dict.Add (group.Name, group);
1090 group.Number = gid ++;
1092 ++ num_groups;
1094 else {
1095 CapturingGroup prev = (CapturingGroup)dict[group.Name];
1096 group.Number = prev.Number;
1101 // resolve references
1103 foreach (Expression expr in refs.Keys) {
1104 string name = (string)refs[expr];
1105 if (!dict.Contains (name)) {
1106 throw NewParseException ("Reference to undefined group " +
1107 (Char.IsDigit (name[0]) ? "number " : "name ") +
1108 name);
1111 CapturingGroup group = (CapturingGroup)dict[name];
1112 if (expr is Reference)
1113 ((Reference)expr).CapturingGroup = group;
1114 else if (expr is CaptureAssertion)
1115 ((CaptureAssertion)expr).CapturingGroup = group;
1116 else if (expr is BalancingGroup)
1117 ((BalancingGroup)expr).Balance = group;
1121 // flag helper functions
1123 private static bool IsIgnoreCase (RegexOptions options) {
1124 return (options & RegexOptions.IgnoreCase) != 0;
1127 private static bool IsMultiline (RegexOptions options) {
1128 return (options & RegexOptions.Multiline) != 0;
1131 private static bool IsExplicitCapture (RegexOptions options) {
1132 return (options & RegexOptions.ExplicitCapture) != 0;
1135 private static bool IsSingleline (RegexOptions options) {
1136 return (options & RegexOptions.Singleline) != 0;
1139 private static bool IsIgnorePatternWhitespace (RegexOptions options) {
1140 return (options & RegexOptions.IgnorePatternWhitespace) != 0;
1143 private static bool IsRightToLeft (RegexOptions options) {
1144 return (options & RegexOptions.RightToLeft) != 0;
1147 private static bool IsECMAScript (RegexOptions options) {
1148 return (options & RegexOptions.ECMAScript) != 0;
1151 // exception creation
1153 private ArgumentException NewParseException (string msg) {
1154 msg = "parsing \"" + pattern + "\" - " + msg;
1155 return new ArgumentException (msg, pattern);
1158 private string pattern;
1159 private int ptr;
1161 private ArrayList caps;
1162 private Hashtable refs;
1163 private int num_groups;