2 // Based upon interpreter.cs, written by Dan Lewis (dlewis@gmx.co.uk)
5 using System
.Collections
;
6 using System
.Globalization
;
7 using System
.Diagnostics
;
9 namespace System
.Text
.RegularExpressions
{
11 internal delegate bool EvalDelegate (RxInterpreter interp
, int strpos
, ref int strpos_result
);
13 sealed class RxInterpreter
: BaseMachine
{
21 EvalDelegate eval_del
; // optimized EvalByteCode method created by the CILCompiler
23 Mark
[] marks
= null; // mark stack
24 int mark_start
; // start of current checkpoint
25 int mark_end
; // end of checkpoint/next free mark
27 IntStack stack
; // utility stack
29 RepeatContext repeat
; // current repeat context
30 RepeatContext deep
; // points to the most-nested repeat context
32 /* The readonly ensures the JIT can optimize out if (trace_rx) statements */
33 public static readonly bool trace_rx
= Environment
.GetEnvironmentVariable ("MONO_TRACE_RX") != null;
37 internal struct IntStack
{
42 return values
[--count
];
44 public void Push (int value)
48 } else if (count
== values
.Length
) {
49 int new_size
= values
.Length
;
50 new_size
+= new_size
>> 1;
51 int [] new_values
= new int [new_size
];
52 for (int i
= 0; i
< count
; ++i
)
53 new_values
[i
] = values
[i
];
56 values
[count
++] = value;
59 get { return values [count - 1]; }
65 throw new SystemException ("can only truncate the stack");
71 private class RepeatContext
{
72 public RepeatContext (RepeatContext previous
, int min
, int max
, bool lazy
, int expr_pc
) {
73 this.previous
= previous
;
77 this.expr_pc
= expr_pc
;
85 set { count = value; }
90 set { start = value; }
93 public bool IsMinimum
{
94 get { return min <= count; }
97 public bool IsMaximum
{
98 get { return max <= count; }
105 public int Expression
{
106 get { return expr_pc; }
109 public RepeatContext Previous
{
110 get { return previous; }
114 private int min
, max
;
117 private RepeatContext previous
;
122 static int ReadInt (byte[] code
, int pc
)
125 val
|= (int)code
[pc
+ 1] << 8;
126 val
|= (int)code
[pc
+ 2] << 16;
127 val
|= (int)code
[pc
+ 3] << 24;
131 public RxInterpreter (byte[] program
, EvalDelegate eval_del
)
133 this.program
= program
;
134 this.eval_del
= eval_del
;
135 group_count
= 1 + (program
[1] | ((int)program
[2] << 8));
136 groups
= new int [group_count
];
137 stack
= new IntStack ();
142 public override Match
Scan (Regex regex
, string text
, int start
, int end
) {
144 string_start
= start
;
149 if (eval_del
!= null) {
150 match
= eval_del (this, start
, ref res
);
152 match
= EvalByteCode (11, start
, ref res
);
154 marks
[groups
[0]].End
= res
;
156 return GenerateMatch (regex
);
157 //Match m = new Match (regex, this, text, end, 0, match_start, res - match_start);
163 // capture management
164 private void Open (int gid
, int ptr
) {
165 int m
= groups
[gid
];
166 if (m
< mark_start
|| marks
[m
].IsDefined
) {
171 marks
[m
].Start
= ptr
;
174 private void Close (int gid
, int ptr
) {
175 marks
[groups
[gid
]].End
= ptr
;
178 private bool Balance (int gid
, int balance_gid
, bool capture
, int ptr
) {
179 int b
= groups
[balance_gid
];
181 if(b
== -1 || marks
[b
].Index
< 0) {
182 //Group not previously matched
185 Debug
.Assert (marks
[b
].IsDefined
, "Regex", "Balancng group not closed");
186 if (gid
> 0 && capture
){
187 Open (gid
, marks
[b
].Index
+ marks
[b
].Length
);
191 groups
[balance_gid
] = marks
[b
].Previous
;
196 private int Checkpoint () {
197 mark_start
= mark_end
;
201 private void Backtrack (int cp
) {
202 for (int i
= 0; i
< groups
.Length
; ++ i
) {
205 m
= marks
[m
].Previous
;
210 private void ResetGroups () {
211 int n
= groups
.Length
;
213 marks
= new Mark
[n
* 10];
215 for (int i
= 0; i
< n
; ++ i
) {
218 marks
[i
].Start
= -1;
220 marks
[i
].Previous
= -1;
226 private int GetLastDefined (int gid
) {
227 int m
= groups
[gid
];
228 while (m
>= 0 && !marks
[m
].IsDefined
)
229 m
= marks
[m
].Previous
;
234 private int CreateMark (int previous
) {
235 if (mark_end
== marks
.Length
) {
236 Mark
[] dest
= new Mark
[marks
.Length
* 2];
237 marks
.CopyTo (dest
, 0);
242 marks
[m
].Start
= marks
[m
].End
= -1;
243 marks
[m
].Previous
= previous
;
248 private void GetGroupInfo (int gid
, out int first_mark_index
, out int n_caps
)
250 first_mark_index
= -1;
252 for (int m
= groups
[gid
]; m
>= 0; m
= marks
[m
].Previous
) {
253 if (!marks
[m
].IsDefined
)
255 if (first_mark_index
< 0)
256 first_mark_index
= m
;
261 private void PopulateGroup (Group g
, int first_mark_index
, int n_caps
)
264 for (int m
= marks
[first_mark_index
].Previous
; m
>= 0; m
= marks
[m
].Previous
) {
265 if (!marks
[m
].IsDefined
)
267 Capture cap
= new Capture (str
, marks
[m
].Index
, marks
[m
].Length
);
268 g
.Captures
.SetValue (cap
, n_caps
- 1 - i
);
273 private Match
GenerateMatch (Regex regex
)
275 int n_caps
, first_mark_index
;
277 GetGroupInfo (0, out first_mark_index
, out n_caps
);
279 // Avoid fully populating the Match instance if not needed
280 if (!needs_groups_or_captures
)
281 return new Match (regex
, this, str
, string_end
, 0, marks
[first_mark_index
].Index
, marks
[first_mark_index
].Length
);
283 Match retval
= new Match (regex
, this, str
, string_end
, groups
.Length
,
284 marks
[first_mark_index
].Index
, marks
[first_mark_index
].Length
, n_caps
);
285 PopulateGroup (retval
, first_mark_index
, n_caps
);
287 for (int gid
= 1; gid
< groups
.Length
; ++ gid
) {
288 GetGroupInfo (gid
, out first_mark_index
, out n_caps
);
289 if (first_mark_index
< 0) {
292 g
= new Group (str
, marks
[first_mark_index
].Index
, marks
[first_mark_index
].Length
, n_caps
);
293 PopulateGroup (g
, first_mark_index
, n_caps
);
295 retval
.Groups
.SetValue (g
, gid
);
300 // used by the IL backend
301 internal void SetStartOfMatch (int pos
)
303 marks
[groups
[0]].Start
= pos
;
306 static bool IsWordChar (char c
)
308 return Char
.IsLetterOrDigit (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
;
311 bool EvalByteCode (int pc
, int strpos
, ref int strpos_result
)
313 // luckily the IL engine can deal with char_group_end at compile time
314 // this code offset needs to be checked only in opcodes that handle
315 // a single char and that are included in a TestCharGroup expression:
316 // the engine is supposed to jump to this offset as soons as the
317 // first opcode in the expression matches
318 // The code pattern becomes:
319 // on successfull match: check if char_group_end is nonzero and jump to
320 // test_char_group_passed after adjusting strpos
321 // on failure: try the next expression by simply advancing pc
322 int char_group_end
= 0;
323 int length
, start
, end
;
326 Console
.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}, cge: {3}", (RxOp
)program
[pc
], pc
, strpos
, char_group_end
);
327 //Console.WriteLine ("deep: " + (deep == null ? 0 : deep.GetHashCode ()) + " repeat: " + (this.repeat == null ? 0 : this.repeat.GetHashCode ()));
329 switch ((RxOp
)program
[pc
]) {
331 if (char_group_end
!= 0) {
336 strpos_result
= strpos
;
340 case RxOp
.AnyPosition
:
343 case RxOp
.StartOfString
:
348 case RxOp
.StartOfLine
:
349 if (strpos
== 0 || str
[strpos
- 1] == '\n') {
354 case RxOp
.StartOfScan
:
355 if (strpos
!= string_start
)
360 if (strpos
== string_end
|| (strpos
== string_end
- 1 && str
[strpos
] == '\n')) {
365 case RxOp
.EndOfString
:
366 if (strpos
!= string_end
)
371 if (strpos
== string_end
|| str
[strpos
] == '\n') {
376 case RxOp
.WordBoundary
:
380 if (IsWordChar (str
[strpos
])) {
384 } else if (strpos
== string_end
) {
385 if (IsWordChar (str
[strpos
- 1])) {
390 if (IsWordChar (str
[strpos
]) != IsWordChar (str
[strpos
- 1])) {
396 case RxOp
.NoWordBoundary
:
400 if (!IsWordChar (str
[strpos
])) {
404 } else if (strpos
== string_end
) {
405 if (!IsWordChar (str
[strpos
- 1])) {
410 if (IsWordChar (str
[strpos
]) == IsWordChar (str
[strpos
- 1])) {
417 length
= program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8);
418 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
420 RxOp anch_op
= (RxOp
)(program
[pc
] & 0x00ff);
422 // Optimize some common cases
424 if (anch_op
== RxOp
.StartOfString
) {
427 if (groups
.Length
> 1) {
429 marks
[groups
[0]].Start
= strpos
;
431 if (EvalByteCode (pc
+ 1, strpos
, ref res
)) {
432 marks
[groups
[0]].Start
= strpos
;
433 if (groups
.Length
> 1)
434 marks
[groups
[0]].End
= res
;
442 // FIXME: Add more special cases from interpreter.cs
444 // it's important to test also the end of the string
445 // position for things like: "" =~ /$/
446 end
= string_end
+ 1;
447 while (strpos
< end
) {
449 if (groups
.Length
> 1) {
451 marks
[groups
[0]].Start
= strpos
;
453 if (EvalByteCode (pc
, strpos
, ref res
)) {
454 // match_start = strpos;
455 marks
[groups
[0]].Start
= strpos
;
456 if (groups
.Length
> 1)
457 marks
[groups
[0]].End
= res
;
464 case RxOp
.AnchorReverse
:
465 length
= program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8);
466 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
467 // it's important to test also the end of the string
468 // position for things like: "" =~ /$/
470 while (strpos
>= 0) {
472 if (groups
.Length
> 1) {
474 marks
[groups
[0]].Start
= strpos
;
476 if (EvalByteCode (pc
, strpos
, ref res
)) {
477 // match_start = strpos;
478 marks
[groups
[0]].Start
= strpos
;
479 if (groups
.Length
> 1)
480 marks
[groups
[0]].End
= res
;
488 length
= GetLastDefined (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8));
491 start
= marks
[length
].Index
;
492 length
= marks
[length
].Length
;
493 if (strpos
+ length
> string_end
)
495 for (end
= start
+ length
; start
< end
; ++start
) {
496 if (str
[strpos
] != str
[start
])
502 case RxOp
.ReferenceIgnoreCase
:
503 length
= GetLastDefined (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8));
506 start
= marks
[length
].Index
;
507 length
= marks
[length
].Length
;
508 if (strpos
+ length
> string_end
)
510 for (end
= start
+ length
; start
< end
; ++start
) {
511 if (str
[strpos
] != str
[start
] && Char
.ToLower (str
[strpos
]) != Char
.ToLower (str
[start
]))
517 case RxOp
.ReferenceReverse
: {
518 length
= GetLastDefined (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8));
521 start
= marks
[length
].Index
;
522 length
= marks
[length
].Length
;
523 if (strpos
- length
< 0)
525 int p
= strpos
- length
;
526 for (end
= start
+ length
; start
< end
; ++start
, ++p
) {
527 if (str
[p
] != str
[start
])
535 if (GetLastDefined (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8)) >= 0)
538 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
540 case RxOp
.SubExpression
: {
542 if (EvalByteCode (pc
+ 3, strpos
, ref res
)) {
543 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
552 if (EvalByteCode (pc
+ 5, strpos
, ref res
)) {
553 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
555 pc
+= program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8);
560 Open (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8), strpos
);
563 case RxOp
.CloseGroup
:
564 Close (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8), strpos
);
567 case RxOp
.BalanceStart
: {
570 if (!EvalByteCode (pc
+ 8, strpos
, ref res
))
573 int gid
= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
574 int balance_gid
= program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8);
575 bool capture
= program
[pc
+ 5] > 0;
576 if (!Balance (gid
, balance_gid
, capture
, strpos
))
580 pc
+= program
[pc
+ 6] | ((int)program
[pc
+ 7] << 8);
588 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
590 case RxOp
.TestCharGroup
:
591 char_group_end
= pc
+ (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8));
596 length
= program
[pc
+ 1];
597 if (strpos
+ length
> string_end
)
599 end
= start
+ length
;
600 for (; start
< end
; ++start
) {
601 if (str
[strpos
] != program
[start
])
607 case RxOp
.StringIgnoreCase
:
609 length
= program
[pc
+ 1];
610 if (strpos
+ length
> string_end
)
612 end
= start
+ length
;
613 for (; start
< end
; ++start
) {
614 if (str
[strpos
] != program
[start
] && Char
.ToLower (str
[strpos
]) != program
[start
])
620 case RxOp
.StringReverse
: {
622 length
= program
[pc
+ 1];
625 int p
= strpos
- length
;
626 end
= start
+ length
;
627 for (; start
< end
; ++start
, ++p
) {
628 if (str
[p
] != program
[start
])
635 case RxOp
.StringIgnoreCaseReverse
: {
637 length
= program
[pc
+ 1];
640 int p
= strpos
- length
;
641 end
= start
+ length
;
642 for (; start
< end
; ++start
, ++p
) {
643 if (str
[p
] != program
[start
] && Char
.ToLower (str
[p
]) != program
[start
])
650 case RxOp
.UnicodeString
: {
652 length
= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
653 if (strpos
+ length
> string_end
)
655 end
= start
+ length
* 2;
656 for (; start
< end
; start
+= 2) {
657 int c
= program
[start
] | ((int)program
[start
+ 1] << 8);
658 if (str
[strpos
] != c
)
665 case RxOp
.UnicodeStringIgnoreCase
: {
667 length
= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
668 if (strpos
+ length
> string_end
)
670 end
= start
+ length
* 2;
671 for (; start
< end
; start
+= 2) {
672 int c
= program
[start
] | ((int)program
[start
+ 1] << 8);
673 if (str
[strpos
] != c
&& Char
.ToLower (str
[strpos
]) != c
)
680 case RxOp
.UnicodeStringReverse
: {
682 length
= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
685 int p
= strpos
- length
;
686 end
= start
+ length
* 2;
687 for (; start
< end
; start
+= 2, p
+= 2) {
688 int c
= program
[start
] | ((int)program
[start
+ 1] << 8);
696 case RxOp
.UnicodeStringIgnoreCaseReverse
: {
698 length
= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
701 int p
= strpos
- length
;
702 end
= start
+ length
* 2;
703 for (; start
< end
; start
+= 2, p
+= 2) {
704 int c
= program
[start
] | ((int)program
[start
+ 1] << 8);
705 if (str
[p
] != c
&& Char
.ToLower (str
[p
]) != c
)
714 * The opcodes below are basically specialized versions of one
715 * generic opcode, which has three parameters:
716 * - reverse (Reverse), revert (No), ignore-case (IgnoreCase)
717 * Thus each opcode has 8 variants.
718 * FIXME: Maybe move all unusual variations
719 * (Reverse+IgnoreCase+Unicode) into a generic GenericChar opcode
720 * like in the old interpreter.
721 * FIXME: Move all the Reverse opcodes to a separate method.
725 if (strpos
< string_end
&& (COND (str
[strpos
]))) {
728 if (char_group_end
!= 0)
729 goto test_char_group_passed
;
734 * If we are inside a char group, the cases are ANDed
735 * together, so we have to continue checking the
736 * other cases, and we need to increase strpos after
738 * The char group is termined by a True, hence the
740 * FIXME: Optimize this.
743 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
))
745 if (pc
+ 1 == char_group_end
)
746 goto test_char_group_passed
;
751 if (char_group_end
== 0)
756 /* Fail both inside and outside a char group */
761 // Same as above, but use:
762 // - strpos > 0 instead of strpos < string_len
763 // - COND (str [strpos - 1]) instead of COND (str [strpos])
764 // - strpos -- instead of strpos ++
767 // GENERATED BY gen-interp.cs, DO NOT MODIFY
772 if (strpos
< string_end
) {
773 char c
= str
[strpos
];
774 if (((c
== program
[pc
+ 1]))) {
776 if (char_group_end
!= 0)
777 goto test_char_group_passed
;
782 if (char_group_end
== 0)
790 if (strpos
< string_end
) {
791 char c
= str
[strpos
];
792 if (((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
794 if (char_group_end
!= 0)
795 goto test_char_group_passed
;
800 if (char_group_end
== 0)
807 case RxOp
.UnicodeRange
:
808 if (strpos
< string_end
) {
809 char c
= str
[strpos
];
810 if (((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
812 if (char_group_end
!= 0)
813 goto test_char_group_passed
;
818 if (char_group_end
== 0)
825 case RxOp
.UnicodeChar
:
826 if (strpos
< string_end
) {
827 char c
= str
[strpos
];
828 if (((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
830 if (char_group_end
!= 0)
831 goto test_char_group_passed
;
836 if (char_group_end
== 0)
843 case RxOp
.CategoryAny
:
844 if (strpos
< string_end
) {
845 char c
= str
[strpos
];
848 if (char_group_end
!= 0)
849 goto test_char_group_passed
;
854 if (char_group_end
== 0)
859 /* CategoryAnySingleline */
861 case RxOp
.CategoryAnySingleline
:
862 if (strpos
< string_end
) {
863 char c
= str
[strpos
];
866 if (char_group_end
!= 0)
867 goto test_char_group_passed
;
872 if (char_group_end
== 0)
879 case RxOp
.CategoryWord
:
880 if (strpos
< string_end
) {
881 char c
= str
[strpos
];
882 if (((Char
.IsLetterOrDigit (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
))) {
884 if (char_group_end
!= 0)
885 goto test_char_group_passed
;
890 if (char_group_end
== 0)
897 case RxOp
.CategoryDigit
:
898 if (strpos
< string_end
) {
899 char c
= str
[strpos
];
900 if (((Char
.IsDigit (c
)))) {
902 if (char_group_end
!= 0)
903 goto test_char_group_passed
;
908 if (char_group_end
== 0)
913 /* CategoryWhiteSpace */
915 case RxOp
.CategoryWhiteSpace
:
916 if (strpos
< string_end
) {
917 char c
= str
[strpos
];
918 if (((Char
.IsWhiteSpace (c
)))) {
920 if (char_group_end
!= 0)
921 goto test_char_group_passed
;
926 if (char_group_end
== 0)
931 /* CategoryEcmaWord */
933 case RxOp
.CategoryEcmaWord
:
934 if (strpos
< string_end
) {
935 char c
= str
[strpos
];
936 if ((('a' <= c
&& c
<= 'z' || 'A' <= c
&& c
<= 'Z' || '0' <= c
&& c
<= '9' || c
== '_'))) {
938 if (char_group_end
!= 0)
939 goto test_char_group_passed
;
944 if (char_group_end
== 0)
949 /* CategoryEcmaWhiteSpace */
951 case RxOp
.CategoryEcmaWhiteSpace
:
952 if (strpos
< string_end
) {
953 char c
= str
[strpos
];
954 if (((c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '\f' || c
== '\v'))) {
956 if (char_group_end
!= 0)
957 goto test_char_group_passed
;
962 if (char_group_end
== 0)
967 /* CategoryUnicodeSpecials */
969 case RxOp
.CategoryUnicodeSpecials
:
970 if (strpos
< string_end
) {
971 char c
= str
[strpos
];
972 if ((('\uFEFF' <= c
&& c
<= '\uFEFF' || '\uFFF0' <= c
&& c
<= '\uFFFD'))) {
974 if (char_group_end
!= 0)
975 goto test_char_group_passed
;
980 if (char_group_end
== 0)
985 /* CategoryUnicode */
987 case RxOp
.CategoryUnicode
:
988 if (strpos
< string_end
) {
989 char c
= str
[strpos
];
990 if (((Char
.GetUnicodeCategory (c
) == (UnicodeCategory
)program
[pc
+ 1]))) {
992 if (char_group_end
!= 0)
993 goto test_char_group_passed
;
998 if (char_group_end
== 0)
1003 /* CategoryGeneral */
1005 case RxOp
.CategoryGeneral
:
1006 if (strpos
< string_end
) {
1007 char c
= str
[strpos
];
1008 if (((CategoryUtils
.IsCategory ((Category
)program
[pc
+ 1], c
)))) {
1010 if (char_group_end
!= 0)
1011 goto test_char_group_passed
;
1016 if (char_group_end
== 0)
1024 if (strpos
< string_end
) {
1025 char c
= str
[strpos
];
1026 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1027 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1029 if (char_group_end
!= 0)
1030 goto test_char_group_passed
;
1031 pc
+= 3 + program
[pc
+ 2];
1035 if (char_group_end
== 0)
1037 pc
+= 3 + program
[pc
+ 2];
1042 case RxOp
.UnicodeBitmap
:
1043 if (strpos
< string_end
) {
1044 char c
= str
[strpos
];
1045 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1046 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1048 if (char_group_end
!= 0)
1049 goto test_char_group_passed
;
1050 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1054 if (char_group_end
== 0)
1056 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1058 case RxOp
.CharIgnoreCase
:
1059 if (strpos
< string_end
) {
1060 char c
= Char
.ToLower (str
[strpos
]);
1061 if (((c
== program
[pc
+ 1]))) {
1063 if (char_group_end
!= 0)
1064 goto test_char_group_passed
;
1069 if (char_group_end
== 0)
1073 case RxOp
.RangeIgnoreCase
:
1074 if (strpos
< string_end
) {
1075 char c
= Char
.ToLower (str
[strpos
]);
1076 if (((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
1078 if (char_group_end
!= 0)
1079 goto test_char_group_passed
;
1084 if (char_group_end
== 0)
1088 case RxOp
.UnicodeRangeIgnoreCase
:
1089 if (strpos
< string_end
) {
1090 char c
= Char
.ToLower (str
[strpos
]);
1091 if (((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
1093 if (char_group_end
!= 0)
1094 goto test_char_group_passed
;
1099 if (char_group_end
== 0)
1103 case RxOp
.UnicodeCharIgnoreCase
:
1104 if (strpos
< string_end
) {
1105 char c
= Char
.ToLower (str
[strpos
]);
1106 if (((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
1108 if (char_group_end
!= 0)
1109 goto test_char_group_passed
;
1114 if (char_group_end
== 0)
1118 case RxOp
.BitmapIgnoreCase
:
1119 if (strpos
< string_end
) {
1120 char c
= Char
.ToLower (str
[strpos
]);
1121 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1122 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1124 if (char_group_end
!= 0)
1125 goto test_char_group_passed
;
1126 pc
+= 3 + program
[pc
+ 2];
1130 if (char_group_end
== 0)
1132 pc
+= 3 + program
[pc
+ 2];
1134 case RxOp
.UnicodeBitmapIgnoreCase
:
1135 if (strpos
< string_end
) {
1136 char c
= Char
.ToLower (str
[strpos
]);
1137 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1138 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1140 if (char_group_end
!= 0)
1141 goto test_char_group_passed
;
1142 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1146 if (char_group_end
== 0)
1148 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1151 if (strpos
< string_end
) {
1152 char c
= str
[strpos
];
1153 if (!((c
== program
[pc
+ 1]))) {
1155 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1157 if (pc
+ 1 == char_group_end
)
1158 goto test_char_group_passed
;
1165 if (strpos
< string_end
) {
1166 char c
= str
[strpos
];
1167 if (!((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
1169 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1171 if (pc
+ 1 == char_group_end
)
1172 goto test_char_group_passed
;
1178 case RxOp
.NoUnicodeRange
:
1179 if (strpos
< string_end
) {
1180 char c
= str
[strpos
];
1181 if (!((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
1183 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1185 if (pc
+ 1 == char_group_end
)
1186 goto test_char_group_passed
;
1192 case RxOp
.NoUnicodeChar
:
1193 if (strpos
< string_end
) {
1194 char c
= str
[strpos
];
1195 if (!((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
1197 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1199 if (pc
+ 1 == char_group_end
)
1200 goto test_char_group_passed
;
1206 case RxOp
.NoCategoryAny
:
1207 if (strpos
< string_end
) {
1208 char c
= str
[strpos
];
1209 if (!((c
!= '\n'))) {
1211 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1213 if (pc
+ 1 == char_group_end
)
1214 goto test_char_group_passed
;
1220 case RxOp
.NoCategoryAnySingleline
:
1221 if (strpos
< string_end
) {
1222 char c
= str
[strpos
];
1225 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1227 if (pc
+ 1 == char_group_end
)
1228 goto test_char_group_passed
;
1234 case RxOp
.NoCategoryWord
:
1235 if (strpos
< string_end
) {
1236 char c
= str
[strpos
];
1237 if (!((Char
.IsLetterOrDigit (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
))) {
1239 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1241 if (pc
+ 1 == char_group_end
)
1242 goto test_char_group_passed
;
1248 case RxOp
.NoCategoryDigit
:
1249 if (strpos
< string_end
) {
1250 char c
= str
[strpos
];
1251 if (!((Char
.IsDigit (c
)))) {
1253 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1255 if (pc
+ 1 == char_group_end
)
1256 goto test_char_group_passed
;
1262 case RxOp
.NoCategoryWhiteSpace
:
1263 if (strpos
< string_end
) {
1264 char c
= str
[strpos
];
1265 if (!((Char
.IsWhiteSpace (c
)))) {
1267 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1269 if (pc
+ 1 == char_group_end
)
1270 goto test_char_group_passed
;
1276 case RxOp
.NoCategoryEcmaWord
:
1277 if (strpos
< string_end
) {
1278 char c
= str
[strpos
];
1279 if (!(('a' <= c
&& c
<= 'z' || 'A' <= c
&& c
<= 'Z' || '0' <= c
&& c
<= '9' || c
== '_'))) {
1281 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1283 if (pc
+ 1 == char_group_end
)
1284 goto test_char_group_passed
;
1290 case RxOp
.NoCategoryEcmaWhiteSpace
:
1291 if (strpos
< string_end
) {
1292 char c
= str
[strpos
];
1293 if (!((c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '\f' || c
== '\v'))) {
1295 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1297 if (pc
+ 1 == char_group_end
)
1298 goto test_char_group_passed
;
1304 case RxOp
.NoCategoryUnicodeSpecials
:
1305 if (strpos
< string_end
) {
1306 char c
= str
[strpos
];
1307 if (!(('\uFEFF' <= c
&& c
<= '\uFEFF' || '\uFFF0' <= c
&& c
<= '\uFFFD'))) {
1309 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1311 if (pc
+ 1 == char_group_end
)
1312 goto test_char_group_passed
;
1318 case RxOp
.NoCategoryUnicode
:
1319 if (strpos
< string_end
) {
1320 char c
= str
[strpos
];
1321 if (!((Char
.GetUnicodeCategory (c
) == (UnicodeCategory
)program
[pc
+ 1]))) {
1323 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1325 if (pc
+ 1 == char_group_end
)
1326 goto test_char_group_passed
;
1332 case RxOp
.NoCategoryGeneral
:
1333 if (strpos
< string_end
) {
1334 char c
= str
[strpos
];
1335 if (!((CategoryUtils
.IsCategory ((Category
)program
[pc
+ 1], c
)))) {
1337 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1339 if (pc
+ 1 == char_group_end
)
1340 goto test_char_group_passed
;
1347 if (strpos
< string_end
) {
1348 char c
= str
[strpos
];
1349 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1350 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1351 pc
+= 3 + program
[pc
+ 2];
1352 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1354 if (pc
+ 1 == char_group_end
)
1355 goto test_char_group_passed
;
1361 case RxOp
.NoUnicodeBitmap
:
1362 if (strpos
< string_end
) {
1363 char c
= str
[strpos
];
1364 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1365 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1366 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1367 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1369 if (pc
+ 1 == char_group_end
)
1370 goto test_char_group_passed
;
1376 case RxOp
.NoCharIgnoreCase
:
1377 if (strpos
< string_end
) {
1378 char c
= Char
.ToLower (str
[strpos
]);
1379 if (!((c
== program
[pc
+ 1]))) {
1381 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1383 if (pc
+ 1 == char_group_end
)
1384 goto test_char_group_passed
;
1390 case RxOp
.NoRangeIgnoreCase
:
1391 if (strpos
< string_end
) {
1392 char c
= Char
.ToLower (str
[strpos
]);
1393 if (!((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
1395 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1397 if (pc
+ 1 == char_group_end
)
1398 goto test_char_group_passed
;
1404 case RxOp
.NoUnicodeRangeIgnoreCase
:
1405 if (strpos
< string_end
) {
1406 char c
= Char
.ToLower (str
[strpos
]);
1407 if (!((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
1409 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1411 if (pc
+ 1 == char_group_end
)
1412 goto test_char_group_passed
;
1418 case RxOp
.NoUnicodeCharIgnoreCase
:
1419 if (strpos
< string_end
) {
1420 char c
= Char
.ToLower (str
[strpos
]);
1421 if (!((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
1423 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1425 if (pc
+ 1 == char_group_end
)
1426 goto test_char_group_passed
;
1432 case RxOp
.NoBitmapIgnoreCase
:
1433 if (strpos
< string_end
) {
1434 char c
= Char
.ToLower (str
[strpos
]);
1435 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1436 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1437 pc
+= 3 + program
[pc
+ 2];
1438 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1440 if (pc
+ 1 == char_group_end
)
1441 goto test_char_group_passed
;
1447 case RxOp
.NoUnicodeBitmapIgnoreCase
:
1448 if (strpos
< string_end
) {
1449 char c
= Char
.ToLower (str
[strpos
]);
1450 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1451 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1452 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1453 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1455 if (pc
+ 1 == char_group_end
)
1456 goto test_char_group_passed
;
1462 case RxOp
.CharReverse
:
1464 char c
= str
[strpos
- 1];
1465 if (((c
== program
[pc
+ 1]))) {
1467 if (char_group_end
!= 0)
1468 goto test_char_group_passed
;
1473 if (char_group_end
== 0)
1477 case RxOp
.RangeReverse
:
1479 char c
= str
[strpos
- 1];
1480 if (((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
1482 if (char_group_end
!= 0)
1483 goto test_char_group_passed
;
1488 if (char_group_end
== 0)
1492 case RxOp
.UnicodeRangeReverse
:
1494 char c
= str
[strpos
- 1];
1495 if (((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
1497 if (char_group_end
!= 0)
1498 goto test_char_group_passed
;
1503 if (char_group_end
== 0)
1507 case RxOp
.UnicodeCharReverse
:
1509 char c
= str
[strpos
- 1];
1510 if (((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
1512 if (char_group_end
!= 0)
1513 goto test_char_group_passed
;
1518 if (char_group_end
== 0)
1522 case RxOp
.CategoryAnyReverse
:
1524 char c
= str
[strpos
- 1];
1525 if (((c
!= '\n'))) {
1527 if (char_group_end
!= 0)
1528 goto test_char_group_passed
;
1533 if (char_group_end
== 0)
1537 case RxOp
.CategoryAnySinglelineReverse
:
1539 char c
= str
[strpos
- 1];
1542 if (char_group_end
!= 0)
1543 goto test_char_group_passed
;
1548 if (char_group_end
== 0)
1552 case RxOp
.CategoryWordReverse
:
1554 char c
= str
[strpos
- 1];
1555 if (((Char
.IsLetterOrDigit (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
))) {
1557 if (char_group_end
!= 0)
1558 goto test_char_group_passed
;
1563 if (char_group_end
== 0)
1567 case RxOp
.CategoryDigitReverse
:
1569 char c
= str
[strpos
- 1];
1570 if (((Char
.IsDigit (c
)))) {
1572 if (char_group_end
!= 0)
1573 goto test_char_group_passed
;
1578 if (char_group_end
== 0)
1582 case RxOp
.CategoryWhiteSpaceReverse
:
1584 char c
= str
[strpos
- 1];
1585 if (((Char
.IsWhiteSpace (c
)))) {
1587 if (char_group_end
!= 0)
1588 goto test_char_group_passed
;
1593 if (char_group_end
== 0)
1597 case RxOp
.CategoryEcmaWordReverse
:
1599 char c
= str
[strpos
- 1];
1600 if ((('a' <= c
&& c
<= 'z' || 'A' <= c
&& c
<= 'Z' || '0' <= c
&& c
<= '9' || c
== '_'))) {
1602 if (char_group_end
!= 0)
1603 goto test_char_group_passed
;
1608 if (char_group_end
== 0)
1612 case RxOp
.CategoryEcmaWhiteSpaceReverse
:
1614 char c
= str
[strpos
- 1];
1615 if (((c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '\f' || c
== '\v'))) {
1617 if (char_group_end
!= 0)
1618 goto test_char_group_passed
;
1623 if (char_group_end
== 0)
1627 case RxOp
.CategoryUnicodeSpecialsReverse
:
1629 char c
= str
[strpos
- 1];
1630 if ((('\uFEFF' <= c
&& c
<= '\uFEFF' || '\uFFF0' <= c
&& c
<= '\uFFFD'))) {
1632 if (char_group_end
!= 0)
1633 goto test_char_group_passed
;
1638 if (char_group_end
== 0)
1642 case RxOp
.CategoryUnicodeReverse
:
1644 char c
= str
[strpos
- 1];
1645 if (((Char
.GetUnicodeCategory (c
) == (UnicodeCategory
)program
[pc
+ 1]))) {
1647 if (char_group_end
!= 0)
1648 goto test_char_group_passed
;
1653 if (char_group_end
== 0)
1657 case RxOp
.CategoryGeneralReverse
:
1659 char c
= str
[strpos
- 1];
1660 if (((CategoryUtils
.IsCategory ((Category
)program
[pc
+ 1], c
)))) {
1662 if (char_group_end
!= 0)
1663 goto test_char_group_passed
;
1668 if (char_group_end
== 0)
1672 case RxOp
.BitmapReverse
:
1674 char c
= str
[strpos
- 1];
1675 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1676 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1678 if (char_group_end
!= 0)
1679 goto test_char_group_passed
;
1680 pc
+= 3 + program
[pc
+ 2];
1684 if (char_group_end
== 0)
1686 pc
+= 3 + program
[pc
+ 2];
1688 case RxOp
.UnicodeBitmapReverse
:
1690 char c
= str
[strpos
- 1];
1691 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1692 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1694 if (char_group_end
!= 0)
1695 goto test_char_group_passed
;
1696 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1700 if (char_group_end
== 0)
1702 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1704 case RxOp
.CharIgnoreCaseReverse
:
1706 char c
= Char
.ToLower (str
[strpos
- 1]);
1707 if (((c
== program
[pc
+ 1]))) {
1709 if (char_group_end
!= 0)
1710 goto test_char_group_passed
;
1715 if (char_group_end
== 0)
1719 case RxOp
.RangeIgnoreCaseReverse
:
1721 char c
= Char
.ToLower (str
[strpos
- 1]);
1722 if (((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
1724 if (char_group_end
!= 0)
1725 goto test_char_group_passed
;
1730 if (char_group_end
== 0)
1734 case RxOp
.UnicodeRangeIgnoreCaseReverse
:
1736 char c
= Char
.ToLower (str
[strpos
- 1]);
1737 if (((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
1739 if (char_group_end
!= 0)
1740 goto test_char_group_passed
;
1745 if (char_group_end
== 0)
1749 case RxOp
.UnicodeCharIgnoreCaseReverse
:
1751 char c
= Char
.ToLower (str
[strpos
- 1]);
1752 if (((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
1754 if (char_group_end
!= 0)
1755 goto test_char_group_passed
;
1760 if (char_group_end
== 0)
1764 case RxOp
.BitmapIgnoreCaseReverse
:
1766 char c
= Char
.ToLower (str
[strpos
- 1]);
1767 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1768 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1770 if (char_group_end
!= 0)
1771 goto test_char_group_passed
;
1772 pc
+= 3 + program
[pc
+ 2];
1776 if (char_group_end
== 0)
1778 pc
+= 3 + program
[pc
+ 2];
1780 case RxOp
.UnicodeBitmapIgnoreCaseReverse
:
1782 char c
= Char
.ToLower (str
[strpos
- 1]);
1783 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1784 if (((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1786 if (char_group_end
!= 0)
1787 goto test_char_group_passed
;
1788 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1792 if (char_group_end
== 0)
1794 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
1796 case RxOp
.NoCharReverse
:
1798 char c
= str
[strpos
- 1];
1799 if (!((c
== program
[pc
+ 1]))) {
1801 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1803 if (pc
+ 1 == char_group_end
)
1804 goto test_char_group_passed
;
1810 case RxOp
.NoRangeReverse
:
1812 char c
= str
[strpos
- 1];
1813 if (!((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
1815 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1817 if (pc
+ 1 == char_group_end
)
1818 goto test_char_group_passed
;
1824 case RxOp
.NoUnicodeRangeReverse
:
1826 char c
= str
[strpos
- 1];
1827 if (!((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
1829 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1831 if (pc
+ 1 == char_group_end
)
1832 goto test_char_group_passed
;
1838 case RxOp
.NoUnicodeCharReverse
:
1840 char c
= str
[strpos
- 1];
1841 if (!((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
1843 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1845 if (pc
+ 1 == char_group_end
)
1846 goto test_char_group_passed
;
1852 case RxOp
.NoCategoryAnyReverse
:
1854 char c
= str
[strpos
- 1];
1855 if (!((c
!= '\n'))) {
1857 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1859 if (pc
+ 1 == char_group_end
)
1860 goto test_char_group_passed
;
1866 case RxOp
.NoCategoryAnySinglelineReverse
:
1868 char c
= str
[strpos
- 1];
1871 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1873 if (pc
+ 1 == char_group_end
)
1874 goto test_char_group_passed
;
1880 case RxOp
.NoCategoryWordReverse
:
1882 char c
= str
[strpos
- 1];
1883 if (!((Char
.IsLetterOrDigit (c
) || Char
.GetUnicodeCategory (c
) == UnicodeCategory
.ConnectorPunctuation
))) {
1885 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1887 if (pc
+ 1 == char_group_end
)
1888 goto test_char_group_passed
;
1894 case RxOp
.NoCategoryDigitReverse
:
1896 char c
= str
[strpos
- 1];
1897 if (!((Char
.IsDigit (c
)))) {
1899 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1901 if (pc
+ 1 == char_group_end
)
1902 goto test_char_group_passed
;
1908 case RxOp
.NoCategoryWhiteSpaceReverse
:
1910 char c
= str
[strpos
- 1];
1911 if (!((Char
.IsWhiteSpace (c
)))) {
1913 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1915 if (pc
+ 1 == char_group_end
)
1916 goto test_char_group_passed
;
1922 case RxOp
.NoCategoryEcmaWordReverse
:
1924 char c
= str
[strpos
- 1];
1925 if (!(('a' <= c
&& c
<= 'z' || 'A' <= c
&& c
<= 'Z' || '0' <= c
&& c
<= '9' || c
== '_'))) {
1927 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1929 if (pc
+ 1 == char_group_end
)
1930 goto test_char_group_passed
;
1936 case RxOp
.NoCategoryEcmaWhiteSpaceReverse
:
1938 char c
= str
[strpos
- 1];
1939 if (!((c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '\f' || c
== '\v'))) {
1941 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1943 if (pc
+ 1 == char_group_end
)
1944 goto test_char_group_passed
;
1950 case RxOp
.NoCategoryUnicodeSpecialsReverse
:
1952 char c
= str
[strpos
- 1];
1953 if (!(('\uFEFF' <= c
&& c
<= '\uFEFF' || '\uFFF0' <= c
&& c
<= '\uFFFD'))) {
1955 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1957 if (pc
+ 1 == char_group_end
)
1958 goto test_char_group_passed
;
1964 case RxOp
.NoCategoryUnicodeReverse
:
1966 char c
= str
[strpos
- 1];
1967 if (!((Char
.GetUnicodeCategory (c
) == (UnicodeCategory
)program
[pc
+ 1]))) {
1969 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1971 if (pc
+ 1 == char_group_end
)
1972 goto test_char_group_passed
;
1978 case RxOp
.NoCategoryGeneralReverse
:
1980 char c
= str
[strpos
- 1];
1981 if (!((CategoryUtils
.IsCategory ((Category
)program
[pc
+ 1], c
)))) {
1983 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
1985 if (pc
+ 1 == char_group_end
)
1986 goto test_char_group_passed
;
1992 case RxOp
.NoBitmapReverse
:
1994 char c
= str
[strpos
- 1];
1995 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
1996 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
1997 pc
+= 3 + program
[pc
+ 2];
1998 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2000 if (pc
+ 1 == char_group_end
)
2001 goto test_char_group_passed
;
2007 case RxOp
.NoUnicodeBitmapReverse
:
2009 char c
= str
[strpos
- 1];
2010 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
2011 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
2012 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
2013 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2015 if (pc
+ 1 == char_group_end
)
2016 goto test_char_group_passed
;
2022 case RxOp
.NoCharIgnoreCaseReverse
:
2024 char c
= Char
.ToLower (str
[strpos
- 1]);
2025 if (!((c
== program
[pc
+ 1]))) {
2027 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2029 if (pc
+ 1 == char_group_end
)
2030 goto test_char_group_passed
;
2036 case RxOp
.NoRangeIgnoreCaseReverse
:
2038 char c
= Char
.ToLower (str
[strpos
- 1]);
2039 if (!((c
>= program
[pc
+ 1] && c
<= program
[pc
+ 2]))) {
2041 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2043 if (pc
+ 1 == char_group_end
)
2044 goto test_char_group_passed
;
2050 case RxOp
.NoUnicodeRangeIgnoreCaseReverse
:
2052 char c
= Char
.ToLower (str
[strpos
- 1]);
2053 if (!((c
>= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))) && (c
<= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8))))) {
2055 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2057 if (pc
+ 1 == char_group_end
)
2058 goto test_char_group_passed
;
2064 case RxOp
.NoUnicodeCharIgnoreCaseReverse
:
2066 char c
= Char
.ToLower (str
[strpos
- 1]);
2067 if (!((c
== (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8))))) {
2069 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2071 if (pc
+ 1 == char_group_end
)
2072 goto test_char_group_passed
;
2078 case RxOp
.NoBitmapIgnoreCaseReverse
:
2080 char c
= Char
.ToLower (str
[strpos
- 1]);
2081 int c2
= (int)c
; c2
-= program
[pc
+ 1]; length
= program
[pc
+ 2];
2082 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 3 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
2083 pc
+= 3 + program
[pc
+ 2];
2084 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2086 if (pc
+ 1 == char_group_end
)
2087 goto test_char_group_passed
;
2093 case RxOp
.NoUnicodeBitmapIgnoreCaseReverse
:
2095 char c
= Char
.ToLower (str
[strpos
- 1]);
2096 int c2
= (int)c
; c2
-= (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8)); length
= (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
2097 if (!((c2
>= 0 && c2
< (length
<< 3) && (program
[pc
+ 5 + (c2
>> 3)] & (1 << (c2
& 0x7))) != 0))) {
2098 pc
+= 5 + (program
[pc
+ 3] | ((int)program
[pc
+ 4] << 8));
2099 if (char_group_end
== 0 || (pc
+ 1 == char_group_end
)) {
2101 if (pc
+ 1 == char_group_end
)
2102 goto test_char_group_passed
;
2109 // END OF GENERATED CODE
2113 if (EvalByteCode (pc
+ 3, strpos
, ref res
)) {
2114 strpos_result
= res
;
2117 //Console.WriteLine ("branch offset: {0}", program [pc + 1] | ((int)program [pc + 2] << 8));
2118 pc
+= program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8);
2122 case RxOp
.RepeatLazy
: {
2124 * Repetation is modelled by two opcodes: Repeat and Until which
2125 * contain the the qualified regex between them, i.e.:
2126 * Repeat, <bytecode for the inner regex>, Until, <Tail expr>
2127 * It is processed as follows:
2128 * Repeat, [Until, <inner expr>]*, <Tail>
2129 * This means that nested quantifiers are processed a bit
2130 * strangely: when the inner quantifier fails to match, its
2131 * tail is processed which includes the outer Until.
2133 * This code is from the old interpreter.cs.
2135 * FIXME: Rethink this.
2140 this.repeat
= new RepeatContext (
2141 this.repeat
, // previous context
2142 ReadInt (program
, pc
+ 3), // minimum
2143 ReadInt (program
, pc
+ 7), // maximum
2144 (RxOp
)program
[pc
] == RxOp
.RepeatLazy
, // lazy
2145 pc
+ 11 // subexpression
2148 int until
= pc
+ (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8));
2149 if (!EvalByteCode (until
, strpos
, ref res
)) {
2150 this.repeat
= this.repeat
.Previous
;
2155 strpos_result
= strpos
;
2159 RepeatContext current
= this.repeat
;
2163 // Can we avoid recursion?
2165 // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
2166 // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
2169 // If 'deep' was unmolested, that implies that there was no nested quantifiers.
2170 // Thus, we can safely avoid recursion.
2172 if (deep
== current
)
2175 start
= current
.Start
;
2176 int start_count
= current
.Count
;
2178 // First match at least 'start' items without backtracking
2179 while (!current
.IsMinimum
) {
2181 current
.Start
= strpos
;
2183 if (!EvalByteCode (current
.Expression
, strpos
, ref res
)) {
2184 current
.Start
= start
;
2185 current
.Count
= start_count
;
2189 if (deep
!= current
) // recursive mode
2193 if (strpos
== current
.Start
) {
2194 // degenerate match ... match tail or fail
2195 this.repeat
= current
.Previous
;
2197 if (EvalByteCode (pc
+ 1, strpos
, ref res
)) {
2201 this.repeat
= current
;
2205 if (current
.IsLazy
) {
2207 // match tail first ...
2208 this.repeat
= current
.Previous
;
2210 int cp
= Checkpoint ();
2211 if (EvalByteCode (pc
+ 1, strpos
, ref res
)) {
2218 // ... then match more
2219 this.repeat
= current
;
2220 if (current
.IsMaximum
)
2223 current
.Start
= strpos
;
2225 if (!EvalByteCode (current
.Expression
, strpos
, ref res
)) {
2226 current
.Start
= start
;
2227 current
.Count
= start_count
;
2231 if (deep
!= current
) // recursive mode
2233 // Degenerate match: ptr has not moved since the last (failed) tail match.
2234 // So, next and subsequent tail matches will fail.
2235 if (strpos
== current
.Start
)
2239 int stack_size
= stack
.Count
;
2241 // match greedily as much as possible
2242 while (!current
.IsMaximum
) {
2243 int cp
= Checkpoint ();
2244 int old_ptr
= strpos
;
2245 int old_start
= current
.Start
;
2249 Console
.WriteLine ("recurse with count {0}.", current
.Count
);
2250 current
.Start
= strpos
;
2252 if (!EvalByteCode (current
.Expression
, strpos
, ref res
)) {
2254 current
.Start
= old_start
;
2259 if (deep
!= current
) {
2260 // recursive mode: no more backtracking, truncate the stack
2261 stack
.Count
= stack_size
;
2265 stack
.Push (old_ptr
);
2267 // Degenerate match: no point going on
2268 if (strpos
== current
.Start
)
2273 Console
.WriteLine ("matching tail: {0} pc={1}", strpos
, pc
+ 1);
2274 // then, match the tail, backtracking as necessary.
2275 this.repeat
= current
.Previous
;
2278 if (EvalByteCode (pc
+ 1, strpos
, ref res
)) {
2280 stack
.Count
= stack_size
;
2283 if (stack
.Count
== stack_size
) {
2284 this.repeat
= current
;
2289 strpos
= stack
.Pop ();
2290 Backtrack (stack
.Pop ());
2292 Console
.WriteLine ("backtracking to {0} expr={1} pc={2}", strpos
, current
.Expression
, pc
);
2297 case RxOp
.FastRepeat
:
2298 case RxOp
.FastRepeatLazy
: {
2300 * A FastRepeat is a simplified version of Repeat which does
2301 * not contain another repeat inside, so backtracking is
2304 bool lazy
= program
[pc
] == (byte)RxOp
.FastRepeatLazy
;
2306 int tail
= pc
+ (program
[pc
+ 1] | ((int)program
[pc
+ 2] << 8));
2307 start
= ReadInt (program
, pc
+ 3);
2308 end
= ReadInt (program
, pc
+ 7);
2309 //Console.WriteLine ("min: {0}, max: {1} tail: {2}", start, end, tail);
2314 // First match at least 'start' items
2315 while (length
< start
) {
2316 if (!EvalByteCode (pc
+ 11, strpos
, ref res
))
2325 int cp
= Checkpoint ();
2326 if (EvalByteCode (tail
, strpos
, ref res
)) {
2328 goto repeat_success
;
2336 if (!EvalByteCode (pc
+ 11, strpos
, ref res
))
2342 // Then match as many items as possible, recording
2343 // backtracking information
2344 int old_stack_size
= stack
.Count
;
2345 while (length
< end
) {
2346 int cp
= Checkpoint ();
2347 if (!EvalByteCode (pc
+ 11, strpos
, ref res
)) {
2352 stack
.Push (strpos
);
2358 throw new Exception ();
2360 // Then, match the tail, backtracking as necessary.
2362 if (EvalByteCode (tail
, strpos
, ref res
)) {
2364 stack
.Count
= old_stack_size
;
2365 goto repeat_success
;
2367 if (stack
.Count
== old_stack_size
)
2371 strpos
= stack
.Pop ();
2372 Backtrack (stack
.Pop ());
2374 Console
.WriteLine ("backtracking to: {0}", strpos
);
2379 // We matched the tail too so just return
2384 Console
.WriteLine ("evaluating: {0} at pc: {1}, strpos: {2}", (RxOp
)program
[pc
], pc
, strpos
);
2385 throw new NotSupportedException ();
2390 strpos_result
= strpos
;
2394 test_char_group_passed:
2395 pc
= char_group_end
;
2398 } // end of while (true)