2 using System
.Collections
;
3 using System
.Globalization
;
4 using System
.Reflection
;
5 using System
.Reflection
.Emit
;
8 using System
.Collections
.Generic
;
11 namespace System
.Text
.RegularExpressions
{
14 // Compiler which generates IL bytecode to perform the matching instead of
15 // interpreting a program.
16 // For simplicity, we inherit from RxCompiler, and generate the IL code based
17 // on the program generated by it. This also allows us to fallback to interpretation
18 // if we can't handle something.
19 // This is net 2.0, since 1.0 doesn't support DynamicMethods
20 // FIXME: Add support for 1.0, and CompileToAssembly
21 // FIXME: Overwrite RxCompiler methods so we don't have to decode char
26 class CILCompiler
: RxCompiler
, ICompiler
{
27 DynamicMethod
[] eval_methods
;
28 bool[] eval_methods_defined
;
31 * To avoid the overhead of decoding the countless opcode variants created
32 * by RxCompiler, we save the original, 'generic' version and its flags
33 * in these two tables.
35 private Dictionary
<int, int> generic_ops
;
36 private Dictionary
<int, int> op_flags
;
38 static FieldInfo fi_str
= typeof (RxInterpreter
).GetField ("str", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
39 static FieldInfo fi_string_start
= typeof (RxInterpreter
).GetField ("string_start", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
40 static FieldInfo fi_string_end
= typeof (RxInterpreter
).GetField ("string_end", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
41 // static FieldInfo fi_match_start = typeof (RxInterpreter).GetField ("match_start", BindingFlags.Instance|BindingFlags.NonPublic);
42 static FieldInfo fi_program
= typeof (RxInterpreter
).GetField ("program", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
43 static FieldInfo fi_marks
= typeof (RxInterpreter
).GetField ("marks", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
44 static FieldInfo fi_groups
= typeof (RxInterpreter
).GetField ("groups", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
45 static FieldInfo fi_mark_start
= typeof (Mark
).GetField ("Start", BindingFlags
.Instance
|BindingFlags
.Public
|BindingFlags
.NonPublic
);
46 static FieldInfo fi_mark_end
= typeof (Mark
).GetField ("End", BindingFlags
.Instance
|BindingFlags
.Public
|BindingFlags
.NonPublic
);
47 static MethodInfo mi_is_word_char
= typeof (RxInterpreter
).GetMethod ("IsWordChar", BindingFlags
.Static
|BindingFlags
.NonPublic
);
48 static MethodInfo mi_reset_groups
= typeof (RxInterpreter
).GetMethod ("ResetGroups", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
49 static MethodInfo mi_open
= typeof (RxInterpreter
).GetMethod ("Open", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
50 static MethodInfo mi_close
= typeof (RxInterpreter
).GetMethod ("Close", BindingFlags
.Instance
|BindingFlags
.NonPublic
);
52 public CILCompiler () {
53 generic_ops
= new Dictionary
<int, int> ();
54 op_flags
= new Dictionary
<int, int> ();
57 IMachineFactory ICompiler
.GetMachineFactory () {
58 byte[] code
= new byte [curpos
];
59 Buffer
.BlockCopy (program
, 0, code
, 0, curpos
);
61 eval_methods
= new DynamicMethod
[code
.Length
];
62 eval_methods_defined
= new bool [code
.Length
];
64 // The main eval method
65 DynamicMethod main
= GetEvalMethod (code
, 11);
68 return new RxInterpreterFactory (code
, (EvalDelegate
)main
.CreateDelegate (typeof (EvalDelegate
)));
70 return new RxInterpreterFactory (code
, null);
73 DynamicMethod
GetEvalMethod (byte[] program
, int pc
) {
74 if (eval_methods_defined
[pc
])
75 return eval_methods
[pc
];
78 eval_methods_defined
[pc
] = true;
80 eval_methods
[pc
] = CreateEvalMethod (program
, pc
);
81 return eval_methods
[pc
];
84 private MethodInfo
GetInterpreterMethod (string name
) {
85 return typeof (RxInterpreter
).GetMethod (name
, BindingFlags
.Instance
|BindingFlags
.NonPublic
);
88 private int ReadInt (byte[] code
, int pc
) {
90 val
|= code
[pc
+ 1] << 8;
91 val
|= code
[pc
+ 2] << 16;
92 val
|= code
[pc
+ 3] << 24;
96 static OpFlags
MakeFlags (bool negate
, bool ignore
, bool reverse
, bool lazy
) {
98 if (negate
) flags
|= OpFlags
.Negate
;
99 if (ignore
) flags
|= OpFlags
.IgnoreCase
;
100 if (reverse
) flags
|= OpFlags
.RightToLeft
;
101 if (lazy
) flags
|= OpFlags
.Lazy
;
106 void EmitGenericOp (RxOp op
, bool negate
, bool ignore
, bool reverse
, bool lazy
) {
107 generic_ops
[curpos
] = (int)op
;
108 op_flags
[curpos
] = (int)MakeFlags (negate
, ignore
, reverse
, false);
111 void ICompiler
.EmitCharacter (char c
, bool negate
, bool ignore
, bool reverse
) {
112 EmitGenericOp (RxOp
.GenericChar
, negate
, ignore
, reverse
, false);
113 base.EmitCharacter (c
, negate
, ignore
, reverse
);
116 void ICompiler
.EmitRange (char lo
, char hi
, bool negate
, bool ignore
, bool reverse
) {
117 if (lo
< 256 && hi
< 256)
118 EmitGenericOp (RxOp
.GenericRange
, negate
, ignore
, reverse
, false);
120 EmitGenericOp (RxOp
.GenericUnicodeRange
, negate
, ignore
, reverse
, false);
121 base.EmitRange (lo
, hi
, negate
, ignore
, reverse
);
124 void ICompiler
.EmitCategory (Category cat
, bool negate
, bool reverse
) {
125 // This is decomposed into different opcodes by RxCompiler, so
126 // only save the flags
127 op_flags
[curpos
] = (int)MakeFlags (negate
, false, reverse
, false);
128 base.EmitCategory (cat
, negate
, reverse
);
131 void ICompiler
.EmitNotCategory (Category cat
, bool negate
, bool reverse
)
133 ((ICompiler
)this).EmitCategory (cat
, !negate
, reverse
);
137 public Label label_pass
, label_fail
;
138 public LocalBuilder local_strpos_res
;
140 public Frame (ILGenerator ilgen
) {
141 label_fail
= ilgen
.DefineLabel ();
142 label_pass
= ilgen
.DefineLabel ();
143 local_strpos_res
= ilgen
.DeclareLocal (typeof (int));
148 * Create a dynamic method which is equivalent to the RxInterpreter.EvalByteCode
149 * method specialized to the given program and a given pc. Return the newly
150 * created method or null if a not-supported opcode was encountered.
152 DynamicMethod
CreateEvalMethod (byte[] program
, int pc
) {
153 DynamicMethod m
= new DynamicMethod ("Eval_" + pc
, typeof (bool), new Type
[] { typeof (RxInterpreter), typeof (int), typeof (int).MakeByRefType () }
, typeof (RxInterpreter
), true);
154 ILGenerator ilgen
= m
.GetILGenerator ();
164 * Recursive calls to EvalByteCode are inlined manually by calling
165 * EmitEvalMethodBody with the pc of the recursive call. Frame objects hold
166 * the information required to link together the code generated by the recursive
167 * call with the rest of the code.
169 Frame frame
= new Frame (ilgen
);
171 m
= EmitEvalMethodBody (m
, ilgen
, frame
, program
, pc
, false, false, out pc
);
175 ilgen
.MarkLabel (frame
.label_pass
);
176 ilgen
.Emit (OpCodes
.Ldarg_2
);
177 ilgen
.Emit (OpCodes
.Ldloc
, frame
.local_strpos_res
);
178 ilgen
.Emit (OpCodes
.Stind_I4
);
179 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
180 ilgen
.Emit (OpCodes
.Ret
);
182 ilgen
.MarkLabel (frame
.label_fail
);
183 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
184 ilgen
.Emit (OpCodes
.Ret
);
190 * Emit IL code for a sequence of opcodes starting at pc. If there is a match,
191 * set frame.local_strpos_res to the position of the match, then branch to
192 * frame.label_pass. Else branch to frame.label_fail. If one_op is true, only
193 * generate code for one opcode and set out_pc to the next pc after the opcode.
194 * If no_bump is true, don't bump strpos in char matching opcodes.
195 * Keep this in synch with RxInterpreter.EvalByteCode (). It it is sync with
196 * the version in r96072.
197 * FIXME: In the new interpreter and the IL compiler, '<.+>' does not match '<FOO>'
198 * Also, '<.+?' matches '<FOO>', but the match is '<FOO>' instead of '<F'
199 * FIXME: Modify the regex tests so they are run with RegexOptions.Compiled as
202 private DynamicMethod
EmitEvalMethodBody (DynamicMethod m
, ILGenerator ilgen
,
203 Frame frame
, byte[] program
, int pc
,
204 bool one_op
, bool no_bump
,
207 int start
, length
, end
;
211 int group_count
= 1 + (program
[1] | (program
[2] << 8));
214 RxOp op
= (RxOp
)program
[pc
];
216 //Console.WriteLine (op);
218 // FIXME: Optimize this
219 if (generic_ops
.ContainsKey (pc
))
220 op
= (RxOp
)generic_ops
[pc
];
224 case RxOp
.AnchorReverse
: {
225 bool reverse
= (RxOp
)program
[pc
] == RxOp
.AnchorReverse
;
226 length
= program
[pc
+ 3] | (program
[pc
+ 4] << 8);
227 pc
+= program
[pc
+ 1] | (program
[pc
+ 2] << 8);
229 // Optimize some common cases by inlining the code generated for the
231 RxOp anch_op
= (RxOp
)program
[pc
];
232 // FIXME: Do this even if the archor op is not the last in the regex
233 if (!reverse
&& group_count
== 1 && anch_op
== RxOp
.Char
&& (RxOp
)program
[pc
+ 2] == RxOp
.True
) {
236 * while (strpos < string_end) {
237 * if (str [strpos] == program [pc + 1]) {
238 * match_start = strpos;
239 * strpos_result = strpos + 1;
240 * marks [groups [0]].Start = strpos;
241 * if (groups.Length > 1)
242 * marks [groups [0]].End = res;
249 // Add some locals to avoid an indirection
250 LocalBuilder local_string_end
= ilgen
.DeclareLocal (typeof (int));
251 ilgen
.Emit (OpCodes
.Ldarg_0
);
252 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
253 ilgen
.Emit (OpCodes
.Stloc
, local_string_end
);
254 LocalBuilder local_str
= ilgen
.DeclareLocal (typeof (string));
255 ilgen
.Emit (OpCodes
.Ldarg_0
);
256 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
257 ilgen
.Emit (OpCodes
.Stloc
, local_str
);
259 //while (strpos < string_end) {
260 // -> Done at the end of the loop like mcs does
261 Label l1
= ilgen
.DefineLabel ();
262 Label l2
= ilgen
.DefineLabel ();
263 ilgen
.Emit (OpCodes
.Br
, l2
);
264 ilgen
.MarkLabel (l1
);
266 // if (str [strpos] == program [pc + 1]) {
267 Label l3
= ilgen
.DefineLabel ();
268 ilgen
.Emit (OpCodes
.Ldloc
, local_str
);
269 ilgen
.Emit (OpCodes
.Ldarg_1
);
270 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
271 ilgen
.Emit (OpCodes
.Conv_I4
);
272 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 1]);
273 ilgen
.Emit (OpCodes
.Beq
, l3
);
275 // The true case is done after the loop
279 ilgen
.Emit (OpCodes
.Ldarg_1
);
280 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
281 ilgen
.Emit (OpCodes
.Add
);
282 ilgen
.Emit (OpCodes
.Starg
, 1);
284 ilgen
.MarkLabel (l2
);
285 ilgen
.Emit (OpCodes
.Ldarg_1
);
286 ilgen
.Emit (OpCodes
.Ldloc
, local_string_end
);
287 ilgen
.Emit (OpCodes
.Blt
, l1
);
290 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
293 ilgen
.MarkLabel (l3
);
294 // match_start = strpos;
295 // match_start doesn't seem to be used
297 ilgen.Emit (OpCodes.Ldarg_0);
298 ilgen.Emit (OpCodes.Ldarg_1);
299 ilgen.Emit (OpCodes.Stfld, fi_match_start);
301 // strpos_result = strpos + 1;
302 ilgen
.Emit (OpCodes
.Ldarg_1
);
303 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
304 ilgen
.Emit (OpCodes
.Add
);
305 ilgen
.Emit (OpCodes
.Stloc
, frame
.local_strpos_res
);
306 // call SetStartOfMatch (strpos)
307 ilgen
.Emit (OpCodes
.Ldarg_0
);
308 ilgen
.Emit (OpCodes
.Ldarg_1
);
309 ilgen
.Emit (OpCodes
.Call
, GetInterpreterMethod ("SetStartOfMatch"));
311 ilgen
.Emit (OpCodes
.Br
, frame
.label_pass
);
316 //Console.WriteLine ("Anchor op " + anch_op);
318 // Add some locals to avoid an indirection
319 LocalBuilder local_string_end
= ilgen
.DeclareLocal (typeof (int));
320 ilgen
.Emit (OpCodes
.Ldarg_0
);
321 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
322 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
323 ilgen
.Emit (OpCodes
.Add
);
324 ilgen
.Emit (OpCodes
.Stloc
, local_string_end
);
326 //while (strpos < string_end + 1) {
327 // -> Done at the end of the loop like mcs does
328 Label l1
= ilgen
.DefineLabel ();
329 Label l2
= ilgen
.DefineLabel ();
330 ilgen
.Emit (OpCodes
.Br
, l2
);
331 ilgen
.MarkLabel (l1
);
333 //if (groups.Length > 1) {
335 // marks [groups [0]].Start = strpos;
337 if (group_count
> 1) {
338 ilgen
.Emit (OpCodes
.Ldarg_0
);
339 ilgen
.Emit (OpCodes
.Call
, mi_reset_groups
);
341 ilgen
.Emit (OpCodes
.Ldarg_0
);
342 ilgen
.Emit (OpCodes
.Ldfld
, fi_marks
);
343 ilgen
.Emit (OpCodes
.Ldarg_0
);
344 ilgen
.Emit (OpCodes
.Ldfld
, fi_groups
);
345 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
346 ilgen
.Emit (OpCodes
.Ldelem_I4
);
347 ilgen
.Emit (OpCodes
.Ldelema
, typeof (Mark
));
348 ilgen
.Emit (OpCodes
.Ldarg_1
);
349 ilgen
.Emit (OpCodes
.Stfld
, fi_mark_start
);
352 // if (EvalByteCode (pc, strpos, ref res)) {
354 Frame new_frame
= new Frame (ilgen
);
356 // old_stros = strpos;
357 LocalBuilder local_old_strpos
= ilgen
.DeclareLocal (typeof (int));
358 ilgen
.Emit (OpCodes
.Ldarg_1
);
359 ilgen
.Emit (OpCodes
.Stloc
, local_old_strpos
);
361 m
= EmitEvalMethodBody (m
, ilgen
, new_frame
, program
, pc
, false, false, out out_pc
);
366 ilgen
.MarkLabel (new_frame
.label_pass
);
367 // match_start = old_strpos;
368 // match_start doesn't seem to be used
370 ilgen.Emit (OpCodes.Ldarg_0);
371 ilgen.Emit (OpCodes.Ldloc, local_old_strpos);
372 ilgen.Emit (OpCodes.Stfld, fi_match_start);
374 // strpos_result = res;
375 ilgen
.Emit (OpCodes
.Ldloc
, new_frame
.local_strpos_res
);
376 ilgen
.Emit (OpCodes
.Stloc
, frame
.local_strpos_res
);
377 // marks [groups [0]].Start = old_strpos;
378 ilgen
.Emit (OpCodes
.Ldarg_0
);
379 ilgen
.Emit (OpCodes
.Ldfld
, fi_marks
);
380 ilgen
.Emit (OpCodes
.Ldarg_0
);
381 ilgen
.Emit (OpCodes
.Ldfld
, fi_groups
);
382 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
383 ilgen
.Emit (OpCodes
.Ldelem_I4
);
384 ilgen
.Emit (OpCodes
.Ldelema
, typeof (Mark
));
385 ilgen
.Emit (OpCodes
.Ldloc
, local_old_strpos
);
386 ilgen
.Emit (OpCodes
.Stfld
, fi_mark_start
);
387 // if (groups.Length > 1)
388 // marks [groups [0]].End = res;
389 if (group_count
> 1) {
390 ilgen
.Emit (OpCodes
.Ldarg_0
);
391 ilgen
.Emit (OpCodes
.Ldfld
, fi_marks
);
392 ilgen
.Emit (OpCodes
.Ldarg_0
);
393 ilgen
.Emit (OpCodes
.Ldfld
, fi_groups
);
394 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
395 ilgen
.Emit (OpCodes
.Ldelem_I4
);
396 ilgen
.Emit (OpCodes
.Ldelema
, typeof (Mark
));
397 ilgen
.Emit (OpCodes
.Ldloc
, new_frame
.local_strpos_res
);
398 ilgen
.Emit (OpCodes
.Stfld
, fi_mark_end
);
402 ilgen
.Emit (OpCodes
.Br
, frame
.label_pass
);
405 ilgen
.MarkLabel (new_frame
.label_fail
);
406 // strpos = old_strpos +- 1;
407 ilgen
.Emit (OpCodes
.Ldloc
, local_old_strpos
);
408 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
410 ilgen
.Emit (OpCodes
.Sub
);
412 ilgen
.Emit (OpCodes
.Add
);
413 ilgen
.Emit (OpCodes
.Starg
, 1);
415 ilgen
.MarkLabel (l2
);
417 ilgen
.Emit (OpCodes
.Ldarg_1
);
418 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
419 ilgen
.Emit (OpCodes
.Bge
, l1
);
421 ilgen
.Emit (OpCodes
.Ldarg_1
);
422 ilgen
.Emit (OpCodes
.Ldloc
, local_string_end
);
423 ilgen
.Emit (OpCodes
.Blt
, l1
);
426 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
432 //if (EvalByteCode (pc + 3, strpos, ref res)) {
434 // Emit the rest of the code inline instead of making a recursive call
435 Frame new_frame
= new Frame (ilgen
);
437 // old_strpos = strpos;
438 LocalBuilder local_old_strpos
= ilgen
.DeclareLocal (typeof (int));
439 ilgen
.Emit (OpCodes
.Ldarg_1
);
440 ilgen
.Emit (OpCodes
.Stloc
, local_old_strpos
);
442 m
= EmitEvalMethodBody (m
, ilgen
, new_frame
, program
, pc
+ 3, false, false, out out_pc
);
447 ilgen
.MarkLabel (new_frame
.label_pass
);
448 // strpos_result = res;
449 ilgen
.Emit (OpCodes
.Ldloc
, new_frame
.local_strpos_res
);
450 ilgen
.Emit (OpCodes
.Stloc
, frame
.local_strpos_res
);
452 ilgen
.Emit (OpCodes
.Br
, frame
.label_pass
);
455 ilgen
.MarkLabel (new_frame
.label_fail
);
456 // strpos = old_strpos;
457 ilgen
.Emit (OpCodes
.Ldloc
, local_old_strpos
);
458 ilgen
.Emit (OpCodes
.Starg
, 1);
460 pc
+= program
[pc
+ 1] | (program
[pc
+ 2] << 8);
463 case RxOp
.GenericChar
:
464 case RxOp
.GenericRange
: {
465 OpFlags flags
= (OpFlags
)op_flags
[pc
];
466 bool negate
= (flags
& OpFlags
.Negate
) > 0;
467 bool ignore
= (flags
& OpFlags
.IgnoreCase
) > 0;
468 bool reverse
= (flags
& OpFlags
.RightToLeft
) > 0;
470 //if (strpos < string_end) {
471 Label l1
= ilgen
.DefineLabel ();
473 ilgen
.Emit (OpCodes
.Ldarg_1
);
474 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
475 ilgen
.Emit (OpCodes
.Ble
, l1
);
477 ilgen
.Emit (OpCodes
.Ldarg_1
);
478 ilgen
.Emit (OpCodes
.Ldarg_0
);
479 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
480 ilgen
.Emit (OpCodes
.Bge
, l1
);
483 // int c = str [strpos];
484 LocalBuilder local_c
= ilgen
.DeclareLocal (typeof (char));
485 ilgen
.Emit (OpCodes
.Ldarg_0
);
486 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
487 ilgen
.Emit (OpCodes
.Ldarg_1
);
489 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
490 ilgen
.Emit (OpCodes
.Sub
);
492 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
494 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("ToLower", new Type
[] { typeof (char) }
));
496 if (op
== RxOp
.GenericChar
) {
497 ilgen
.Emit (OpCodes
.Conv_I4
);
498 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 1]);
499 ilgen
.Emit (negate
? OpCodes
.Beq
: OpCodes
.Bne_Un
, l1
);
502 } else if (op
== RxOp
.GenericRange
) {
503 ilgen
.Emit (OpCodes
.Stloc
, local_c
);
505 // if (c >= program [pc + 1] && c <= program [pc + 2]) {
507 Label l3
= ilgen
.DefineLabel ();
509 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
510 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 1]);
511 ilgen
.Emit (OpCodes
.Blt
, l3
);
512 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
513 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 2]);
514 ilgen
.Emit (OpCodes
.Bgt
, l3
);
515 ilgen
.Emit (OpCodes
.Br
, l1
);
516 ilgen
.MarkLabel (l3
);
518 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
519 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 1]);
520 ilgen
.Emit (OpCodes
.Blt
, l1
);
521 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
522 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 2]);
523 ilgen
.Emit (OpCodes
.Bgt
, l1
);
528 throw new NotSupportedException ();
531 //ilgen.EmitWriteLine ("HIT:" + (char)program [pc + 1]);
533 // strpos++ / strpos--;
534 ilgen
.Emit (OpCodes
.Ldarg_1
);
535 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
537 ilgen
.Emit (OpCodes
.Sub
);
539 ilgen
.Emit (OpCodes
.Add
);
540 ilgen
.Emit (OpCodes
.Starg
, 1);
542 Label l2
= ilgen
.DefineLabel ();
543 ilgen
.Emit (OpCodes
.Br
, l2
);
545 ilgen
.MarkLabel (l1
);
547 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
548 ilgen
.MarkLabel (l2
);
553 //strpos_result = strpos;
554 ilgen
.Emit (OpCodes
.Ldarg_1
);
555 ilgen
.Emit (OpCodes
.Stloc
, frame
.local_strpos_res
);
557 ilgen
.Emit (OpCodes
.Br
, frame
.label_pass
);
563 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
567 case RxOp
.AnyPosition
: {
571 case RxOp
.StartOfString
: {
574 ilgen
.Emit (OpCodes
.Ldarg_1
);
575 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
576 ilgen
.Emit (OpCodes
.Bgt
, frame
.label_fail
);
580 case RxOp
.StartOfLine
: {
581 // FIXME: windows line endings
582 //if (!(strpos == 0 || str [strpos - 1] == '\n'))
584 Label l
= ilgen
.DefineLabel ();
585 ilgen
.Emit (OpCodes
.Ldarg_1
);
586 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
587 ilgen
.Emit (OpCodes
.Beq
, l
);
588 ilgen
.Emit (OpCodes
.Ldarg_0
);
589 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
590 ilgen
.Emit (OpCodes
.Ldarg_1
);
591 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
592 ilgen
.Emit (OpCodes
.Sub
);
593 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
594 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\n');
595 ilgen
.Emit (OpCodes
.Beq
, l
);
596 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
602 case RxOp
.StartOfScan
: {
603 //if (strpos != string_start)
605 ilgen
.Emit (OpCodes
.Ldarg_1
);
606 ilgen
.Emit (OpCodes
.Ldarg_0
);
607 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_start
);
608 ilgen
.Emit (OpCodes
.Bne_Un
, frame
.label_fail
);
613 //if (!(strpos == string_end || (strpos == string_end - 1 && str [strpos] == '\n')))
615 Label l
= ilgen
.DefineLabel ();
617 ilgen
.Emit (OpCodes
.Ldarg_1
);
618 ilgen
.Emit (OpCodes
.Ldarg_0
);
619 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
620 ilgen
.Emit (OpCodes
.Beq
, l
);
622 Label l2
= ilgen
.DefineLabel ();
623 ilgen
.Emit (OpCodes
.Ldarg_1
);
624 ilgen
.Emit (OpCodes
.Ldarg_0
);
625 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
626 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
627 ilgen
.Emit (OpCodes
.Sub
);
628 ilgen
.Emit (OpCodes
.Bne_Un
, l2
);
629 ilgen
.Emit (OpCodes
.Ldarg_0
);
630 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
631 ilgen
.Emit (OpCodes
.Ldarg_1
);
632 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
633 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\n');
634 ilgen
.Emit (OpCodes
.Bne_Un
, l2
);
635 ilgen
.Emit (OpCodes
.Br
, l
);
636 ilgen
.MarkLabel (l2
);
638 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
644 case RxOp
.EndOfString
: {
645 //if (strpos != string_end)
647 ilgen
.Emit (OpCodes
.Ldarg_1
);
648 ilgen
.Emit (OpCodes
.Ldarg_0
);
649 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
650 ilgen
.Emit (OpCodes
.Bne_Un
, frame
.label_fail
);
654 case RxOp
.EndOfLine
: {
655 //if (!(strpos == string_end || str [strpos] == '\n'))
657 Label l_match
= ilgen
.DefineLabel ();
658 ilgen
.Emit (OpCodes
.Ldarg_1
);
659 ilgen
.Emit (OpCodes
.Ldarg_0
);
660 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
661 ilgen
.Emit (OpCodes
.Beq
, l_match
);
662 ilgen
.Emit (OpCodes
.Ldarg_0
);
663 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
664 ilgen
.Emit (OpCodes
.Ldarg_1
);
665 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
666 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\n');
667 ilgen
.Emit (OpCodes
.Beq
, l_match
);
668 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
669 ilgen
.MarkLabel (l_match
);
674 case RxOp
.WordBoundary
:
675 case RxOp
.NoWordBoundary
: {
676 bool negate
= op
== RxOp
.NoWordBoundary
;
678 //if (string_end == 0)
680 ilgen
.Emit (OpCodes
.Ldarg_0
);
681 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
682 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
683 ilgen
.Emit (OpCodes
.Beq
, frame
.label_fail
);
685 Label l_match
= ilgen
.DefineLabel ();
688 Label l1
= ilgen
.DefineLabel ();
689 ilgen
.Emit (OpCodes
.Ldarg_1
);
690 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
691 ilgen
.Emit (OpCodes
.Bne_Un
, l1
);
692 //if (!IsWordChar (str [strpos])) {
694 ilgen
.Emit (OpCodes
.Ldarg_0
);
695 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
696 ilgen
.Emit (OpCodes
.Ldarg_1
);
697 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
698 ilgen
.Emit (OpCodes
.Call
, mi_is_word_char
);
699 ilgen
.Emit (negate
? OpCodes
.Brtrue
: OpCodes
.Brfalse
, frame
.label_fail
);
700 ilgen
.Emit (OpCodes
.Br
, l_match
);
702 //} else if (strpos == string_end) {
703 ilgen
.MarkLabel (l1
);
704 Label l2
= ilgen
.DefineLabel ();
705 ilgen
.Emit (OpCodes
.Ldarg_1
);
706 ilgen
.Emit (OpCodes
.Ldarg_0
);
707 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
708 ilgen
.Emit (OpCodes
.Bne_Un
, l2
);
709 //if (!IsWordChar (str [strpos - 1])) {
711 ilgen
.Emit (OpCodes
.Ldarg_0
);
712 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
713 ilgen
.Emit (OpCodes
.Ldarg_1
);
714 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
715 ilgen
.Emit (OpCodes
.Sub
);
716 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
717 ilgen
.Emit (OpCodes
.Call
, mi_is_word_char
);
718 ilgen
.Emit (negate
? OpCodes
.Brtrue
: OpCodes
.Brfalse
, frame
.label_fail
);
719 ilgen
.Emit (OpCodes
.Br
, l_match
);
722 ilgen
.MarkLabel (l2
);
723 //if (IsWordChar (str [strpos]) == IsWordChar (str [strpos - 1])) {
725 ilgen
.Emit (OpCodes
.Ldarg_0
);
726 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
727 ilgen
.Emit (OpCodes
.Ldarg_1
);
728 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
729 ilgen
.Emit (OpCodes
.Call
, mi_is_word_char
);
730 ilgen
.Emit (OpCodes
.Ldarg_0
);
731 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
732 ilgen
.Emit (OpCodes
.Ldarg_1
);
733 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
734 ilgen
.Emit (OpCodes
.Sub
);
735 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
736 ilgen
.Emit (OpCodes
.Call
, mi_is_word_char
);
737 ilgen
.Emit (negate
? OpCodes
.Bne_Un
: OpCodes
.Beq
, frame
.label_fail
);
738 ilgen
.Emit (OpCodes
.Br
, l_match
);
740 ilgen
.MarkLabel (l_match
);
746 case RxOp
.BitmapIgnoreCase
: {
747 bool ignore
= (op
== RxOp
.BitmapIgnoreCase
);
749 //if (strpos < string_end) {
750 Label l1
= ilgen
.DefineLabel ();
751 Label l2
= ilgen
.DefineLabel ();
752 ilgen
.Emit (OpCodes
.Ldarg_1
);
753 ilgen
.Emit (OpCodes
.Ldarg_0
);
754 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
755 ilgen
.Emit (OpCodes
.Bge
, l1
);
756 // int c = str [strpos];
757 LocalBuilder local_c
= ilgen
.DeclareLocal (typeof (int));
758 ilgen
.Emit (OpCodes
.Ldarg_0
);
759 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
760 ilgen
.Emit (OpCodes
.Ldarg_1
);
761 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
762 ilgen
.Emit (OpCodes
.Conv_I4
);
764 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("ToLower", new Type
[] { typeof (char) }
));
765 // c -= program [pc + 1];
766 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 1]);
767 ilgen
.Emit (OpCodes
.Sub
);
768 ilgen
.Emit (OpCodes
.Stloc
, local_c
);
769 length
= program
[pc
+ 2];
770 // if (c < 0 || c >= (length << 3))
772 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
773 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
774 ilgen
.Emit (OpCodes
.Blt
, frame
.label_fail
);
775 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
776 ilgen
.Emit (OpCodes
.Ldc_I4
, length
<< 3);
777 ilgen
.Emit (OpCodes
.Bge
, frame
.label_fail
);
780 // Optimized version for small bitmaps
782 uint bitmap
= program
[pc
];
785 bitmap
|= ((uint)program
[pc
+ 1] << 8);
787 bitmap
|= ((uint)program
[pc
+ 2] << 16);
789 bitmap
|= ((uint)program
[pc
+ 3] << 24);
791 //if ((bitmap >> c) & 1)
792 ilgen
.Emit (OpCodes
.Ldc_I4
, bitmap
);
793 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
794 ilgen
.Emit (OpCodes
.Shr_Un
);
795 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
796 ilgen
.Emit (OpCodes
.And
);
797 ilgen
.Emit (OpCodes
.Brfalse
, l1
);
799 // if ((program [pc + (c >> 3)] & (1 << (c & 0x7))) != 0) {
800 ilgen
.Emit (OpCodes
.Ldarg_0
);
801 ilgen
.Emit (OpCodes
.Ldfld
, fi_program
);
802 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
803 ilgen
.Emit (OpCodes
.Ldc_I4_3
);
804 ilgen
.Emit (OpCodes
.Shr
);
805 ilgen
.Emit (OpCodes
.Ldc_I4
, pc
);
806 ilgen
.Emit (OpCodes
.Add
);
807 ilgen
.Emit (OpCodes
.Ldelem_I1
);
808 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
809 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
810 ilgen
.Emit (OpCodes
.Ldc_I4
, 7);
811 ilgen
.Emit (OpCodes
.And
);
812 ilgen
.Emit (OpCodes
.Shl
);
813 ilgen
.Emit (OpCodes
.And
);
814 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
815 ilgen
.Emit (OpCodes
.Beq
, l1
);
818 ilgen
.Emit (OpCodes
.Ldarg_1
);
819 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
820 ilgen
.Emit (OpCodes
.Add
);
821 ilgen
.Emit (OpCodes
.Starg
, 1);
823 ilgen
.Emit (OpCodes
.Br
, l2
);
827 ilgen
.MarkLabel (l1
);
828 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
830 ilgen
.MarkLabel (l2
);
836 case RxOp
.NoBitmapIgnoreCase
: {
837 // Not currently used
838 Console
.WriteLine ("Opcode " + op
+ " not supported.");
842 case RxOp
.StringIgnoreCase
:
843 case RxOp
.StringReverse
:
844 case RxOp
.StringIgnoreCaseReverse
: {
845 bool ignore
= (op
== RxOp
.StringIgnoreCase
|| op
== RxOp
.StringIgnoreCaseReverse
);
846 bool reverse
= (op
== RxOp
.StringReverse
|| op
== RxOp
.StringIgnoreCaseReverse
);
849 length
= program
[pc
+ 1];
850 //if (strpos + length > string_end)
853 ilgen
.Emit (OpCodes
.Ldarg_1
);
854 ilgen
.Emit (OpCodes
.Ldc_I4
, length
);
855 ilgen
.Emit (OpCodes
.Blt
, frame
.label_fail
);
857 ilgen
.Emit (OpCodes
.Ldarg_1
);
858 ilgen
.Emit (OpCodes
.Ldc_I4
, length
);
859 ilgen
.Emit (OpCodes
.Add
);
860 ilgen
.Emit (OpCodes
.Ldarg_0
);
861 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
862 ilgen
.Emit (OpCodes
.Bgt
, frame
.label_fail
);
865 /* Avoid unsafe code in Moonlight build */
866 #if false && !NET_2_1
869 throw new NotImplementedException ();
871 LocalBuilder local_strptr
= ilgen
.DeclareLocal (typeof (char).MakePointerType ());
872 // char *strptr = &str.start_char + strpos
873 ilgen
.Emit (OpCodes
.Ldarg_0
);
874 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
875 ilgen
.Emit (OpCodes
.Ldflda
, typeof (String
).GetField ("start_char", BindingFlags
.Instance
|BindingFlags
.NonPublic
));
876 ilgen
.Emit (OpCodes
.Ldarg_1
);
877 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
878 ilgen
.Emit (OpCodes
.Shl
);
879 ilgen
.Emit (OpCodes
.Add
);
880 ilgen
.Emit (OpCodes
.Stloc
, local_strptr
);
882 end
= start
+ length
;
883 for (i
= 0; i
< length
; ++i
) {
884 // if (*(strptr + i) != program [start + i])
886 ilgen
.Emit (OpCodes
.Ldloc
, local_strptr
);
887 ilgen
.Emit (OpCodes
.Ldc_I4
, i
* 2);
888 ilgen
.Emit (OpCodes
.Add
);
889 ilgen
.Emit (OpCodes
.Ldind_I2
);
891 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("ToLower", new Type
[] { typeof (char) }
));
892 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[start
+ i
]);
893 ilgen
.Emit (OpCodes
.Bne_Un
, frame
.label_fail
);
897 ilgen
.Emit (OpCodes
.Ldarg_1
);
898 ilgen
.Emit (OpCodes
.Ldc_I4
, length
);
899 ilgen
.Emit (OpCodes
.Add
);
900 ilgen
.Emit (OpCodes
.Starg
, 1);
903 // Allocate a local for 'str' to save an indirection
904 LocalBuilder local_str
= ilgen
.DeclareLocal (typeof (string));
905 ilgen
.Emit (OpCodes
.Ldarg_0
);
906 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
907 ilgen
.Emit (OpCodes
.Stloc
, local_str
);
911 ilgen
.Emit (OpCodes
.Ldarg_1
);
912 ilgen
.Emit (OpCodes
.Ldc_I4
, length
);
913 ilgen
.Emit (OpCodes
.Sub
);
914 ilgen
.Emit (OpCodes
.Starg
, 1);
917 // FIXME: Emit a loop for long strings
918 end
= start
+ length
;
919 for (; start
< end
; ++start
) {
920 //if (str [strpos] != program [start])
922 ilgen
.Emit (OpCodes
.Ldloc
, local_str
);
923 ilgen
.Emit (OpCodes
.Ldarg_1
);
924 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
926 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("ToLower", new Type
[] { typeof (char) }
));
927 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[start
]);
928 ilgen
.Emit (OpCodes
.Bne_Un
, frame
.label_fail
);
930 ilgen
.Emit (OpCodes
.Ldarg_1
);
931 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
932 ilgen
.Emit (OpCodes
.Add
);
933 ilgen
.Emit (OpCodes
.Starg
, 1);
938 ilgen
.Emit (OpCodes
.Ldarg_1
);
939 ilgen
.Emit (OpCodes
.Ldc_I4
, length
);
940 ilgen
.Emit (OpCodes
.Sub
);
941 ilgen
.Emit (OpCodes
.Starg
, 1);
948 case RxOp
.OpenGroup
: {
949 //Open (program [pc + 1] | (program [pc + 2] << 8), strpos);
950 int group_id
= program
[pc
+ 1] | (program
[pc
+ 2] << 8);
951 ilgen
.Emit (OpCodes
.Ldarg_0
);
952 ilgen
.Emit (OpCodes
.Ldc_I4
, group_id
);
953 ilgen
.Emit (OpCodes
.Ldarg_1
);
954 ilgen
.Emit (OpCodes
.Call
, mi_open
);
959 case RxOp
.CloseGroup
: {
960 //Close (program [pc + 1] | (program [pc + 2] << 8), strpos);
961 int group_id
= program
[pc
+ 1] | (program
[pc
+ 2] << 8);
962 ilgen
.Emit (OpCodes
.Ldarg_0
);
963 ilgen
.Emit (OpCodes
.Ldc_I4
, group_id
);
964 ilgen
.Emit (OpCodes
.Ldarg_1
);
965 ilgen
.Emit (OpCodes
.Call
, mi_close
);
971 pc
+= program
[pc
+ 1] | (program
[pc
+ 2] << 8);
974 case RxOp
.TestCharGroup
: {
975 int char_group_end
= pc
+ program
[pc
+ 1] | (program
[pc
+ 2] << 8);
978 Label label_match
= ilgen
.DefineLabel ();
980 /* Determine the negate/reverse flags by examining the first op */
981 OpFlags flags
= (OpFlags
)op_flags
[pc
];
983 /* Determine whenever this is a negated character class */
984 /* If it is, then the conditions are ANDed together, not ORed */
985 bool revert
= (flags
& OpFlags
.Negate
) > 0;
986 bool reverse
= (flags
& OpFlags
.RightToLeft
) > 0;
989 * Generate code for all the matching ops in the group
991 while (pc
< char_group_end
) {
992 Frame new_frame
= new Frame (ilgen
);
993 m
= EmitEvalMethodBody (m
, ilgen
, new_frame
, program
, pc
, true, true, out pc
);
999 ilgen
.MarkLabel (new_frame
.label_pass
);
1000 ilgen
.Emit (OpCodes
.Br
, label_match
);
1003 // Just fall through to the next test
1004 ilgen
.MarkLabel (new_frame
.label_fail
);
1007 // Just fall through to the next test
1008 ilgen
.MarkLabel (new_frame
.label_pass
);
1009 Label l2
= ilgen
.DefineLabel ();
1010 ilgen
.Emit (OpCodes
.Br
, l2
);
1014 ilgen
.MarkLabel (new_frame
.label_fail
);
1015 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
1017 ilgen
.MarkLabel (l2
);
1023 ilgen
.Emit (OpCodes
.Br
, label_match
);
1025 // If we reached here, all the matching ops have failed
1026 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
1029 ilgen
.MarkLabel (label_match
);
1031 // strpos++ / strpos--;
1032 ilgen
.Emit (OpCodes
.Ldarg_1
);
1033 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
1035 ilgen
.Emit (OpCodes
.Sub
);
1037 ilgen
.Emit (OpCodes
.Add
);
1038 ilgen
.Emit (OpCodes
.Starg
, 1);
1043 // FIXME: This is the old repeat, need to reimplement it as
1044 // FastRepeat/FastRepeatLazy. The general Repeat/Until opcodes
1045 // are some complex, probably not worth emitting them as IL.
1047 start
= ReadInt (program
, pc
+ 3);
1048 end
= ReadInt (program
, pc
+ 7);
1050 LocalBuilder local_length
= ilgen
.DeclareLocal (typeof (int));
1052 Label label_repeat_success
= ilgen
.DefineLabel ();
1056 //while (length < end) {
1057 // -> done at the end of the loop
1058 Label l1
= ilgen
.DefineLabel ();
1059 Label l2
= ilgen
.DefineLabel ();
1060 ilgen
.Emit (OpCodes
.Br
, l2
);
1061 ilgen
.MarkLabel (l1
);
1063 //if (!EvalByteCode (pc + 11, strpos, ref res)) {
1065 Frame new_frame
= new Frame (ilgen
);
1067 // old_strpos = strpos;
1068 LocalBuilder local_old_strpos
= ilgen
.DeclareLocal (typeof (int));
1069 ilgen
.Emit (OpCodes
.Ldarg_1
);
1070 ilgen
.Emit (OpCodes
.Stloc
, local_old_strpos
);
1072 m
= EmitEvalMethodBody (m
, ilgen
, new_frame
, program
, pc
+ 11, false, false, out out_pc
);
1077 ilgen
.MarkLabel (new_frame
.label_fail
);
1078 //if (length >= start) {
1079 // goto repeat_success;
1081 ilgen
.Emit (OpCodes
.Ldloc
, local_length
);
1082 ilgen
.Emit (OpCodes
.Ldc_I4
, start
);
1083 ilgen
.Emit (OpCodes
.Bge
, label_repeat_success
);
1085 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
1088 ilgen
.MarkLabel (new_frame
.label_pass
);
1090 ilgen
.Emit (OpCodes
.Ldloc
, new_frame
.local_strpos_res
);
1091 ilgen
.Emit (OpCodes
.Starg
, 1);
1093 ilgen
.Emit (OpCodes
.Ldloc
, local_length
);
1094 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
1095 ilgen
.Emit (OpCodes
.Add
);
1096 ilgen
.Emit (OpCodes
.Stloc
, local_length
);
1098 ilgen
.MarkLabel (l2
);
1099 ilgen
.Emit (OpCodes
.Ldloc
, local_length
);
1100 ilgen
.Emit (OpCodes
.Ldc_I4
, end
);
1101 ilgen
.Emit (OpCodes
.Blt
, l1
);
1103 //if (length != end)
1105 ilgen
.Emit (OpCodes
.Ldloc
, local_length
);
1106 ilgen
.Emit (OpCodes
.Ldc_I4
, end
);
1107 ilgen
.Emit (OpCodes
.Bne_Un
, frame
.label_fail
);
1110 ilgen
.MarkLabel (label_repeat_success
);
1112 pc
+= program
[pc
+ 1] | (program
[pc
+ 2] << 8);
1116 //if (strpos < string_end && str [strpos] != '\n') {
1117 ilgen
.Emit (OpCodes
.Ldarg_1
);
1118 ilgen
.Emit (OpCodes
.Ldarg_0
);
1119 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
1120 ilgen
.Emit (OpCodes
.Bge
, frame
.label_fail
);
1121 ilgen
.Emit (OpCodes
.Ldarg_0
);
1122 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
1123 ilgen
.Emit (OpCodes
.Ldarg_1
);
1124 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
1125 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\n');
1126 ilgen
.Emit (OpCodes
.Beq
, frame
.label_fail
);
1128 ilgen
.Emit (OpCodes
.Ldarg_1
);
1129 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
1130 ilgen
.Emit (OpCodes
.Add
);
1131 ilgen
.Emit (OpCodes
.Starg
, 1);
1137 case RxOp
.CategoryAny
:
1138 case RxOp
.CategoryAnySingleline
:
1139 case RxOp
.CategoryWord
:
1140 case RxOp
.CategoryDigit
:
1141 case RxOp
.CategoryWhiteSpace
:
1142 case RxOp
.CategoryEcmaWord
:
1143 case RxOp
.CategoryEcmaWhiteSpace
:
1144 case RxOp
.CategoryUnicodeSpecials
:
1145 case RxOp
.CategoryUnicode
:
1146 case RxOp
.NoCategoryAny
:
1147 case RxOp
.NoCategoryAnySingleline
:
1148 case RxOp
.NoCategoryWord
:
1149 case RxOp
.NoCategoryDigit
:
1150 case RxOp
.NoCategoryWhiteSpace
:
1151 case RxOp
.NoCategoryEcmaWord
:
1152 case RxOp
.NoCategoryEcmaWhiteSpace
:
1153 case RxOp
.NoCategoryUnicodeSpecials
:
1154 case RxOp
.NoCategoryUnicode
:
1155 case RxOp
.CategoryAnyReverse
:
1156 case RxOp
.CategoryAnySinglelineReverse
:
1157 case RxOp
.CategoryWordReverse
:
1158 case RxOp
.CategoryDigitReverse
:
1159 case RxOp
.CategoryWhiteSpaceReverse
:
1160 case RxOp
.CategoryEcmaWordReverse
:
1161 case RxOp
.CategoryEcmaWhiteSpaceReverse
:
1162 case RxOp
.CategoryUnicodeSpecialsReverse
:
1163 case RxOp
.CategoryUnicodeReverse
:
1164 case RxOp
.NoCategoryAnyReverse
:
1165 case RxOp
.NoCategoryAnySinglelineReverse
:
1166 case RxOp
.NoCategoryWordReverse
:
1167 case RxOp
.NoCategoryDigitReverse
:
1168 case RxOp
.NoCategoryWhiteSpaceReverse
:
1169 case RxOp
.NoCategoryEcmaWordReverse
:
1170 case RxOp
.NoCategoryEcmaWhiteSpaceReverse
:
1171 case RxOp
.NoCategoryUnicodeSpecialsReverse
:
1172 case RxOp
.NoCategoryUnicodeReverse
: {
1173 OpFlags flags
= (OpFlags
)op_flags
[pc
];
1174 bool negate
= (flags
& OpFlags
.Negate
) > 0;
1175 bool reverse
= (flags
& OpFlags
.RightToLeft
) > 0;
1177 /* Get back the normal opcodes */
1179 op
= (RxOp
)((int)op
- 2);
1181 op
= (RxOp
)((int)op
- 1);
1183 //if (strpos < string_end) {
1184 Label l_nomatch
= ilgen
.DefineLabel ();
1186 ilgen
.Emit (OpCodes
.Ldarg_1
);
1187 ilgen
.Emit (OpCodes
.Ldc_I4_0
);
1188 ilgen
.Emit (OpCodes
.Ble
, l_nomatch
);
1190 ilgen
.Emit (OpCodes
.Ldarg_1
);
1191 ilgen
.Emit (OpCodes
.Ldarg_0
);
1192 ilgen
.Emit (OpCodes
.Ldfld
, fi_string_end
);
1193 ilgen
.Emit (OpCodes
.Bge
, l_nomatch
);
1196 // int c = str [strpos];
1197 LocalBuilder local_c
= ilgen
.DeclareLocal (typeof (char));
1198 ilgen
.Emit (OpCodes
.Ldarg_0
);
1199 ilgen
.Emit (OpCodes
.Ldfld
, fi_str
);
1200 ilgen
.Emit (OpCodes
.Ldarg_1
);
1202 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
1203 ilgen
.Emit (OpCodes
.Sub
);
1205 ilgen
.Emit (OpCodes
.Callvirt
, typeof (string).GetMethod ("get_Chars"));
1206 ilgen
.Emit (OpCodes
.Stloc
, local_c
);
1208 Label l_match
= ilgen
.DefineLabel ();
1210 Label l_true
, l_false
;
1212 l_true
= negate
? l_nomatch
: l_match
;
1213 l_false
= negate
? l_match
: l_nomatch
;
1216 case RxOp
.CategoryAny
:
1217 // if (str [strpos] != '\n') {
1218 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1219 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\n');
1220 ilgen
.Emit (OpCodes
.Bne_Un
, l_true
);
1222 case RxOp
.CategoryAnySingleline
:
1223 ilgen
.Emit (OpCodes
.Br
, l_true
);
1225 case RxOp
.CategoryWord
:
1226 // if (Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation) {
1227 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1228 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("IsLetterOrDigit", new Type
[] { typeof (char) }
));
1229 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1230 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1231 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("GetUnicodeCategory", new Type
[] { typeof (char) }
));
1232 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)UnicodeCategory
.ConnectorPunctuation
);
1233 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1235 case RxOp
.CategoryDigit
:
1236 // if (Char.IsDigit (c)) {
1237 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1238 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("IsDigit", new Type
[] { typeof (char) }
));
1239 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1241 case RxOp
.CategoryWhiteSpace
:
1242 // if (Char.IsWhiteSpace (c)) {
1243 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1244 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("IsWhiteSpace", new Type
[] { typeof (char) }
));
1245 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1247 case RxOp
.CategoryEcmaWord
:
1248 // if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_') {
1249 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1250 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'a' - 1);
1251 ilgen
.Emit (OpCodes
.Cgt
);
1252 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1253 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'z' + 1);
1254 ilgen
.Emit (OpCodes
.Clt
);
1255 ilgen
.Emit (OpCodes
.And
);
1256 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1258 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1259 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'A' - 1);
1260 ilgen
.Emit (OpCodes
.Cgt
);
1261 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1262 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'Z' + 1);
1263 ilgen
.Emit (OpCodes
.Clt
);
1264 ilgen
.Emit (OpCodes
.And
);
1265 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1267 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1268 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'0' - 1);
1269 ilgen
.Emit (OpCodes
.Cgt
);
1270 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1271 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'9' + 1);
1272 ilgen
.Emit (OpCodes
.Clt
);
1273 ilgen
.Emit (OpCodes
.And
);
1274 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1276 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1277 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'_');
1278 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1280 case RxOp
.CategoryEcmaWhiteSpace
:
1281 // if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') {
1282 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1283 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)' ');
1284 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1285 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1286 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\t');
1287 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1288 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1289 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\n');
1290 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1291 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1292 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\r');
1293 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1294 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1295 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\f');
1296 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1297 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1298 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\v');
1299 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1301 case RxOp
.CategoryUnicodeSpecials
:
1302 // if ('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD') {
1303 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1304 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\uFEFF' - 1);
1305 ilgen
.Emit (OpCodes
.Cgt
);
1306 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1307 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\uFEFF' + 1);
1308 ilgen
.Emit (OpCodes
.Clt
);
1309 ilgen
.Emit (OpCodes
.And
);
1310 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1312 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1313 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\uFFF0' - 1);
1314 ilgen
.Emit (OpCodes
.Cgt
);
1315 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1316 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)'\uFFFD' + 1);
1317 ilgen
.Emit (OpCodes
.Clt
);
1318 ilgen
.Emit (OpCodes
.And
);
1319 ilgen
.Emit (OpCodes
.Brtrue
, l_true
);
1321 case RxOp
.CategoryUnicode
:
1322 // if (Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]) {
1323 ilgen
.Emit (OpCodes
.Ldloc
, local_c
);
1324 ilgen
.Emit (OpCodes
.Call
, typeof (Char
).GetMethod ("GetUnicodeCategory", new Type
[] { typeof (char) }
));
1325 ilgen
.Emit (OpCodes
.Ldc_I4
, (int)program
[pc
+ 1]);
1326 ilgen
.Emit (OpCodes
.Beq
, l_true
);
1330 ilgen
.Emit (OpCodes
.Br
, l_false
);
1332 ilgen
.MarkLabel (l_match
);
1335 ilgen
.Emit (OpCodes
.Ldarg_1
);
1336 ilgen
.Emit (OpCodes
.Ldc_I4_1
);
1338 ilgen
.Emit (OpCodes
.Sub
);
1340 ilgen
.Emit (OpCodes
.Add
);
1341 ilgen
.Emit (OpCodes
.Starg
, 1);
1343 Label l2
= ilgen
.DefineLabel ();
1344 ilgen
.Emit (OpCodes
.Br
, l2
);
1346 ilgen
.MarkLabel (l_nomatch
);
1348 ilgen
.Emit (OpCodes
.Br
, frame
.label_fail
);
1350 ilgen
.MarkLabel (l2
);
1352 if (op
== RxOp
.CategoryUnicode
)
1359 Console
.WriteLine ("Opcode " + op
+ " not supported.");
1375 class CILCompiler
: RxCompiler
{