2008-08-30 Zoltan Varga <vargaz@gmail.com>
[mcs.git] / class / System / System.Text.RegularExpressions / CILCompiler.cs
blob7b643e79ef09b7e98d7183ee9de3f7651c1974c5
1 using System;
2 using System.Collections;
3 using System.Globalization;
4 using System.Reflection;
5 using System.Reflection.Emit;
7 #if NET_2_0
8 using System.Collections.Generic;
9 #endif
11 namespace System.Text.RegularExpressions {
14 // Compiler which generates IL bytecode to perform the matching instead of
15 // interpreting a program.
16 // For simplicity, we inherit from RxCompiler, and generate the IL code based
17 // on the program generated by it. This also allows us to fallback to interpretation
18 // if we can't handle something.
19 // This is net 2.0, since 1.0 doesn't support DynamicMethods
20 // FIXME: Add support for 1.0, and CompileToAssembly
21 // FIXME: Overwrite RxCompiler methods so we don't have to decode char
22 // matching opcodes
25 #if NET_2_0
26 class CILCompiler : RxCompiler, ICompiler {
27 DynamicMethod[] eval_methods;
28 bool[] eval_methods_defined;
31 * To avoid the overhead of decoding the countless opcode variants created
32 * by RxCompiler, we save the original, 'generic' version and its flags
33 * in these two tables.
35 private Dictionary<int, int> generic_ops;
36 private Dictionary<int, int> op_flags;
38 static FieldInfo fi_str = typeof (RxInterpreter).GetField ("str", BindingFlags.Instance|BindingFlags.NonPublic);
39 static FieldInfo fi_string_start = typeof (RxInterpreter).GetField ("string_start", BindingFlags.Instance|BindingFlags.NonPublic);
40 static FieldInfo fi_string_end = typeof (RxInterpreter).GetField ("string_end", BindingFlags.Instance|BindingFlags.NonPublic);
41 // static FieldInfo fi_match_start = typeof (RxInterpreter).GetField ("match_start", BindingFlags.Instance|BindingFlags.NonPublic);
42 static FieldInfo fi_program = typeof (RxInterpreter).GetField ("program", BindingFlags.Instance|BindingFlags.NonPublic);
43 static FieldInfo fi_marks = typeof (RxInterpreter).GetField ("marks", BindingFlags.Instance|BindingFlags.NonPublic);
44 static FieldInfo fi_groups = typeof (RxInterpreter).GetField ("groups", BindingFlags.Instance|BindingFlags.NonPublic);
45 static FieldInfo fi_mark_start = typeof (Mark).GetField ("Start", BindingFlags.Instance|BindingFlags.Public|BindingFlags.NonPublic);
46 static FieldInfo fi_mark_end = typeof (Mark).GetField ("End", BindingFlags.Instance|BindingFlags.Public|BindingFlags.NonPublic);
47 static MethodInfo mi_is_word_char = typeof (RxInterpreter).GetMethod ("IsWordChar", BindingFlags.Static|BindingFlags.NonPublic);
48 static MethodInfo mi_reset_groups = typeof (RxInterpreter).GetMethod ("ResetGroups", BindingFlags.Instance|BindingFlags.NonPublic);
49 static MethodInfo mi_open = typeof (RxInterpreter).GetMethod ("Open", BindingFlags.Instance|BindingFlags.NonPublic);
50 static MethodInfo mi_close = typeof (RxInterpreter).GetMethod ("Close", BindingFlags.Instance|BindingFlags.NonPublic);
52 public CILCompiler () {
53 generic_ops = new Dictionary <int, int> ();
54 op_flags = new Dictionary <int, int> ();
57 IMachineFactory ICompiler.GetMachineFactory () {
58 byte[] code = new byte [curpos];
59 Buffer.BlockCopy (program, 0, code, 0, curpos);
61 eval_methods = new DynamicMethod [code.Length];
62 eval_methods_defined = new bool [code.Length];
64 // The main eval method
65 DynamicMethod main = GetEvalMethod (code, 11);
67 if (main != null)
68 return new RxInterpreterFactory (code, (EvalDelegate)main.CreateDelegate (typeof (EvalDelegate)));
69 else
70 return new RxInterpreterFactory (code, null);
73 DynamicMethod GetEvalMethod (byte[] program, int pc) {
74 if (eval_methods_defined [pc])
75 return eval_methods [pc];
77 // FIXME: Recursion ?
78 eval_methods_defined [pc] = true;
80 eval_methods [pc] = CreateEvalMethod (program, pc);
81 return eval_methods [pc];
84 private MethodInfo GetInterpreterMethod (string name) {
85 return typeof (RxInterpreter).GetMethod (name, BindingFlags.Instance|BindingFlags.NonPublic);
88 private int ReadInt (byte[] code, int pc) {
89 int val = code [pc];
90 val |= code [pc + 1] << 8;
91 val |= code [pc + 2] << 16;
92 val |= code [pc + 3] << 24;
93 return val;
96 static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
97 OpFlags flags = 0;
98 if (negate) flags |= OpFlags.Negate;
99 if (ignore) flags |= OpFlags.IgnoreCase;
100 if (reverse) flags |= OpFlags.RightToLeft;
101 if (lazy) flags |= OpFlags.Lazy;
103 return flags;
106 void EmitGenericOp (RxOp op, bool negate, bool ignore, bool reverse, bool lazy) {
107 generic_ops [curpos] = (int)op;
108 op_flags [curpos] = (int)MakeFlags (negate, ignore, reverse, false);
111 void ICompiler.EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
112 EmitGenericOp (RxOp.GenericChar, negate, ignore, reverse, false);
113 base.EmitCharacter (c, negate, ignore, reverse);
116 void ICompiler.EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
117 if (lo < 256 && hi < 256)
118 EmitGenericOp (RxOp.GenericRange, negate, ignore, reverse, false);
119 else
120 EmitGenericOp (RxOp.GenericUnicodeRange, negate, ignore, reverse, false);
121 base.EmitRange (lo, hi, negate, ignore, reverse);
124 void ICompiler.EmitCategory (Category cat, bool negate, bool reverse) {
125 // This is decomposed into different opcodes by RxCompiler, so
126 // only save the flags
127 op_flags [curpos] = (int)MakeFlags (negate, false, reverse, false);
128 base.EmitCategory (cat, negate, reverse);
131 void ICompiler.EmitNotCategory (Category cat, bool negate, bool reverse)
133 ((ICompiler)this).EmitCategory (cat, !negate, reverse);
136 class Frame {
137 public Label label_pass, label_fail;
138 public LocalBuilder local_strpos_res;
140 public Frame (ILGenerator ilgen) {
141 label_fail = ilgen.DefineLabel ();
142 label_pass = ilgen.DefineLabel ();
143 local_strpos_res = ilgen.DeclareLocal (typeof (int));
148 * Create a dynamic method which is equivalent to the RxInterpreter.EvalByteCode
149 * method specialized to the given program and a given pc. Return the newly
150 * created method or null if a not-supported opcode was encountered.
152 DynamicMethod CreateEvalMethod (byte[] program, int pc) {
153 DynamicMethod m = new DynamicMethod ("Eval_" + pc, typeof (bool), new Type [] { typeof (RxInterpreter), typeof (int), typeof (int).MakeByRefType () }, typeof (RxInterpreter), true);
154 ILGenerator ilgen = m.GetILGenerator ();
157 Args:
158 interp - 0
159 strpos - 1
160 strpos_result - 2
164 * Recursive calls to EvalByteCode are inlined manually by calling
165 * EmitEvalMethodBody with the pc of the recursive call. Frame objects hold
166 * the information required to link together the code generated by the recursive
167 * call with the rest of the code.
169 Frame frame = new Frame (ilgen);
171 m = EmitEvalMethodBody (m, ilgen, frame, program, pc, false, false, out pc);
172 if (m == null)
173 return null;
175 ilgen.MarkLabel (frame.label_pass);
176 ilgen.Emit (OpCodes.Ldarg_2);
177 ilgen.Emit (OpCodes.Ldloc, frame.local_strpos_res);
178 ilgen.Emit (OpCodes.Stind_I4);
179 ilgen.Emit (OpCodes.Ldc_I4_1);
180 ilgen.Emit (OpCodes.Ret);
182 ilgen.MarkLabel (frame.label_fail);
183 ilgen.Emit (OpCodes.Ldc_I4_0);
184 ilgen.Emit (OpCodes.Ret);
186 return m;
190 * Emit IL code for a sequence of opcodes starting at pc. If there is a match,
191 * set frame.local_strpos_res to the position of the match, then branch to
192 * frame.label_pass. Else branch to frame.label_fail. If one_op is true, only
193 * generate code for one opcode and set out_pc to the next pc after the opcode.
194 * If no_bump is true, don't bump strpos in char matching opcodes.
195 * Keep this in synch with RxInterpreter.EvalByteCode (). It it is sync with
196 * the version in r96072.
197 * FIXME: In the new interpreter and the IL compiler, '<.+>' does not match '<FOO>'
198 * Also, '<.+?' matches '<FOO>', but the match is '<FOO>' instead of '<F'
199 * FIXME: Modify the regex tests so they are run with RegexOptions.Compiled as
200 * well.
202 private DynamicMethod EmitEvalMethodBody (DynamicMethod m, ILGenerator ilgen,
203 Frame frame, byte[] program, int pc,
204 bool one_op, bool no_bump,
205 out int out_pc)
207 int start, length, end;
209 out_pc = 0;
211 int group_count = 1 + (program [1] | (program [2] << 8));
213 while (true) {
214 RxOp op = (RxOp)program [pc];
216 //Console.WriteLine (op);
218 // FIXME: Optimize this
219 if (generic_ops.ContainsKey (pc))
220 op = (RxOp)generic_ops [pc];
222 switch (op) {
223 case RxOp.Anchor:
224 case RxOp.AnchorReverse: {
225 bool reverse = (RxOp)program [pc] == RxOp.AnchorReverse;
226 length = program [pc + 3] | (program [pc + 4] << 8);
227 pc += program [pc + 1] | (program [pc + 2] << 8);
229 // Optimize some common cases by inlining the code generated for the
230 // anchor body
231 RxOp anch_op = (RxOp)program [pc];
232 // FIXME: Do this even if the archor op is not the last in the regex
233 if (!reverse && group_count == 1 && anch_op == RxOp.Char && (RxOp)program [pc + 2] == RxOp.True) {
236 * while (strpos < string_end) {
237 * if (str [strpos] == program [pc + 1]) {
238 * match_start = strpos;
239 * strpos_result = strpos + 1;
240 * marks [groups [0]].Start = strpos;
241 * if (groups.Length > 1)
242 * marks [groups [0]].End = res;
243 * return true;
245 * strpos ++;
247 * return false;
249 // Add some locals to avoid an indirection
250 LocalBuilder local_string_end = ilgen.DeclareLocal (typeof (int));
251 ilgen.Emit (OpCodes.Ldarg_0);
252 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
253 ilgen.Emit (OpCodes.Stloc, local_string_end);
254 LocalBuilder local_str = ilgen.DeclareLocal (typeof (string));
255 ilgen.Emit (OpCodes.Ldarg_0);
256 ilgen.Emit (OpCodes.Ldfld, fi_str);
257 ilgen.Emit (OpCodes.Stloc, local_str);
259 //while (strpos < string_end) {
260 // -> Done at the end of the loop like mcs does
261 Label l1 = ilgen.DefineLabel ();
262 Label l2 = ilgen.DefineLabel ();
263 ilgen.Emit (OpCodes.Br, l2);
264 ilgen.MarkLabel (l1);
266 // if (str [strpos] == program [pc + 1]) {
267 Label l3 = ilgen.DefineLabel ();
268 ilgen.Emit (OpCodes.Ldloc, local_str);
269 ilgen.Emit (OpCodes.Ldarg_1);
270 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
271 ilgen.Emit (OpCodes.Conv_I4);
272 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 1]);
273 ilgen.Emit (OpCodes.Beq, l3);
275 // The true case is done after the loop
277 // }
278 // strpos++;
279 ilgen.Emit (OpCodes.Ldarg_1);
280 ilgen.Emit (OpCodes.Ldc_I4_1);
281 ilgen.Emit (OpCodes.Add);
282 ilgen.Emit (OpCodes.Starg, 1);
284 ilgen.MarkLabel (l2);
285 ilgen.Emit (OpCodes.Ldarg_1);
286 ilgen.Emit (OpCodes.Ldloc, local_string_end);
287 ilgen.Emit (OpCodes.Blt, l1);
289 //return false;
290 ilgen.Emit (OpCodes.Br, frame.label_fail);
292 // True case
293 ilgen.MarkLabel (l3);
294 // match_start = strpos;
295 // match_start doesn't seem to be used
297 ilgen.Emit (OpCodes.Ldarg_0);
298 ilgen.Emit (OpCodes.Ldarg_1);
299 ilgen.Emit (OpCodes.Stfld, fi_match_start);
301 // strpos_result = strpos + 1;
302 ilgen.Emit (OpCodes.Ldarg_1);
303 ilgen.Emit (OpCodes.Ldc_I4_1);
304 ilgen.Emit (OpCodes.Add);
305 ilgen.Emit (OpCodes.Stloc, frame.local_strpos_res);
306 // call SetStartOfMatch (strpos)
307 ilgen.Emit (OpCodes.Ldarg_0);
308 ilgen.Emit (OpCodes.Ldarg_1);
309 ilgen.Emit (OpCodes.Call, GetInterpreterMethod ("SetStartOfMatch"));
310 // return true;
311 ilgen.Emit (OpCodes.Br, frame.label_pass);
313 } else {
314 // General case
316 //Console.WriteLine ("Anchor op " + anch_op);
318 // Add some locals to avoid an indirection
319 LocalBuilder local_string_end = ilgen.DeclareLocal (typeof (int));
320 ilgen.Emit (OpCodes.Ldarg_0);
321 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
322 ilgen.Emit (OpCodes.Ldc_I4_1);
323 ilgen.Emit (OpCodes.Add);
324 ilgen.Emit (OpCodes.Stloc, local_string_end);
326 //while (strpos < string_end + 1) {
327 // -> Done at the end of the loop like mcs does
328 Label l1 = ilgen.DefineLabel ();
329 Label l2 = ilgen.DefineLabel ();
330 ilgen.Emit (OpCodes.Br, l2);
331 ilgen.MarkLabel (l1);
333 //if (groups.Length > 1) {
334 // ResetGroups ();
335 // marks [groups [0]].Start = strpos;
337 if (group_count > 1) {
338 ilgen.Emit (OpCodes.Ldarg_0);
339 ilgen.Emit (OpCodes.Call, mi_reset_groups);
341 ilgen.Emit (OpCodes.Ldarg_0);
342 ilgen.Emit (OpCodes.Ldfld, fi_marks);
343 ilgen.Emit (OpCodes.Ldarg_0);
344 ilgen.Emit (OpCodes.Ldfld, fi_groups);
345 ilgen.Emit (OpCodes.Ldc_I4_0);
346 ilgen.Emit (OpCodes.Ldelem_I4);
347 ilgen.Emit (OpCodes.Ldelema, typeof (Mark));
348 ilgen.Emit (OpCodes.Ldarg_1);
349 ilgen.Emit (OpCodes.Stfld, fi_mark_start);
352 // if (EvalByteCode (pc, strpos, ref res)) {
354 Frame new_frame = new Frame (ilgen);
356 // old_stros = strpos;
357 LocalBuilder local_old_strpos = ilgen.DeclareLocal (typeof (int));
358 ilgen.Emit (OpCodes.Ldarg_1);
359 ilgen.Emit (OpCodes.Stloc, local_old_strpos);
361 m = EmitEvalMethodBody (m, ilgen, new_frame, program, pc, false, false, out out_pc);
362 if (m == null)
363 return null;
365 // Pass
366 ilgen.MarkLabel (new_frame.label_pass);
367 // match_start = old_strpos;
368 // match_start doesn't seem to be used
370 ilgen.Emit (OpCodes.Ldarg_0);
371 ilgen.Emit (OpCodes.Ldloc, local_old_strpos);
372 ilgen.Emit (OpCodes.Stfld, fi_match_start);
374 // strpos_result = res;
375 ilgen.Emit (OpCodes.Ldloc, new_frame.local_strpos_res);
376 ilgen.Emit (OpCodes.Stloc, frame.local_strpos_res);
377 // marks [groups [0]].Start = old_strpos;
378 ilgen.Emit (OpCodes.Ldarg_0);
379 ilgen.Emit (OpCodes.Ldfld, fi_marks);
380 ilgen.Emit (OpCodes.Ldarg_0);
381 ilgen.Emit (OpCodes.Ldfld, fi_groups);
382 ilgen.Emit (OpCodes.Ldc_I4_0);
383 ilgen.Emit (OpCodes.Ldelem_I4);
384 ilgen.Emit (OpCodes.Ldelema, typeof (Mark));
385 ilgen.Emit (OpCodes.Ldloc, local_old_strpos);
386 ilgen.Emit (OpCodes.Stfld, fi_mark_start);
387 // if (groups.Length > 1)
388 // marks [groups [0]].End = res;
389 if (group_count > 1) {
390 ilgen.Emit (OpCodes.Ldarg_0);
391 ilgen.Emit (OpCodes.Ldfld, fi_marks);
392 ilgen.Emit (OpCodes.Ldarg_0);
393 ilgen.Emit (OpCodes.Ldfld, fi_groups);
394 ilgen.Emit (OpCodes.Ldc_I4_0);
395 ilgen.Emit (OpCodes.Ldelem_I4);
396 ilgen.Emit (OpCodes.Ldelema, typeof (Mark));
397 ilgen.Emit (OpCodes.Ldloc, new_frame.local_strpos_res);
398 ilgen.Emit (OpCodes.Stfld, fi_mark_end);
401 // return true;
402 ilgen.Emit (OpCodes.Br, frame.label_pass);
404 // Fail
405 ilgen.MarkLabel (new_frame.label_fail);
406 // strpos = old_strpos +- 1;
407 ilgen.Emit (OpCodes.Ldloc, local_old_strpos);
408 ilgen.Emit (OpCodes.Ldc_I4_1);
409 if (reverse)
410 ilgen.Emit (OpCodes.Sub);
411 else
412 ilgen.Emit (OpCodes.Add);
413 ilgen.Emit (OpCodes.Starg, 1);
415 ilgen.MarkLabel (l2);
416 if (reverse) {
417 ilgen.Emit (OpCodes.Ldarg_1);
418 ilgen.Emit (OpCodes.Ldc_I4_0);
419 ilgen.Emit (OpCodes.Bge, l1);
420 } else {
421 ilgen.Emit (OpCodes.Ldarg_1);
422 ilgen.Emit (OpCodes.Ldloc, local_string_end);
423 ilgen.Emit (OpCodes.Blt, l1);
425 //return false;
426 ilgen.Emit (OpCodes.Br, frame.label_fail);
429 goto End;
431 case RxOp.Branch: {
432 //if (EvalByteCode (pc + 3, strpos, ref res)) {
434 // Emit the rest of the code inline instead of making a recursive call
435 Frame new_frame = new Frame (ilgen);
437 // old_strpos = strpos;
438 LocalBuilder local_old_strpos = ilgen.DeclareLocal (typeof (int));
439 ilgen.Emit (OpCodes.Ldarg_1);
440 ilgen.Emit (OpCodes.Stloc, local_old_strpos);
442 m = EmitEvalMethodBody (m, ilgen, new_frame, program, pc + 3, false, false, out out_pc);
443 if (m == null)
444 return null;
446 // Pass
447 ilgen.MarkLabel (new_frame.label_pass);
448 // strpos_result = res;
449 ilgen.Emit (OpCodes.Ldloc, new_frame.local_strpos_res);
450 ilgen.Emit (OpCodes.Stloc, frame.local_strpos_res);
451 // return true;
452 ilgen.Emit (OpCodes.Br, frame.label_pass);
454 // Fail
455 ilgen.MarkLabel (new_frame.label_fail);
456 // strpos = old_strpos;
457 ilgen.Emit (OpCodes.Ldloc, local_old_strpos);
458 ilgen.Emit (OpCodes.Starg, 1);
460 pc += program [pc + 1] | (program [pc + 2] << 8);
461 break;
463 case RxOp.GenericChar:
464 case RxOp.GenericRange: {
465 OpFlags flags = (OpFlags)op_flags [pc];
466 bool negate = (flags & OpFlags.Negate) > 0;
467 bool ignore = (flags & OpFlags.IgnoreCase) > 0;
468 bool reverse = (flags & OpFlags.RightToLeft) > 0;
470 //if (strpos < string_end) {
471 Label l1 = ilgen.DefineLabel ();
472 if (reverse) {
473 ilgen.Emit (OpCodes.Ldarg_1);
474 ilgen.Emit (OpCodes.Ldc_I4_0);
475 ilgen.Emit (OpCodes.Ble, l1);
476 } else {
477 ilgen.Emit (OpCodes.Ldarg_1);
478 ilgen.Emit (OpCodes.Ldarg_0);
479 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
480 ilgen.Emit (OpCodes.Bge, l1);
483 // int c = str [strpos];
484 LocalBuilder local_c = ilgen.DeclareLocal (typeof (char));
485 ilgen.Emit (OpCodes.Ldarg_0);
486 ilgen.Emit (OpCodes.Ldfld, fi_str);
487 ilgen.Emit (OpCodes.Ldarg_1);
488 if (reverse) {
489 ilgen.Emit (OpCodes.Ldc_I4_1);
490 ilgen.Emit (OpCodes.Sub);
492 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
493 if (ignore)
494 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("ToLower", new Type [] { typeof (char) }));
496 if (op == RxOp.GenericChar) {
497 ilgen.Emit (OpCodes.Conv_I4);
498 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 1]);
499 ilgen.Emit (negate ? OpCodes.Beq : OpCodes.Bne_Un, l1);
501 pc += 2;
502 } else if (op == RxOp.GenericRange) {
503 ilgen.Emit (OpCodes.Stloc, local_c);
505 // if (c >= program [pc + 1] && c <= program [pc + 2]) {
506 if (negate) {
507 Label l3 = ilgen.DefineLabel ();
509 ilgen.Emit (OpCodes.Ldloc, local_c);
510 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 1]);
511 ilgen.Emit (OpCodes.Blt, l3);
512 ilgen.Emit (OpCodes.Ldloc, local_c);
513 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 2]);
514 ilgen.Emit (OpCodes.Bgt, l3);
515 ilgen.Emit (OpCodes.Br, l1);
516 ilgen.MarkLabel (l3);
517 } else {
518 ilgen.Emit (OpCodes.Ldloc, local_c);
519 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 1]);
520 ilgen.Emit (OpCodes.Blt, l1);
521 ilgen.Emit (OpCodes.Ldloc, local_c);
522 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 2]);
523 ilgen.Emit (OpCodes.Bgt, l1);
526 pc += 3;
527 } else {
528 throw new NotSupportedException ();
531 //ilgen.EmitWriteLine ("HIT:" + (char)program [pc + 1]);
532 if (!no_bump) {
533 // strpos++ / strpos--;
534 ilgen.Emit (OpCodes.Ldarg_1);
535 ilgen.Emit (OpCodes.Ldc_I4_1);
536 if (reverse)
537 ilgen.Emit (OpCodes.Sub);
538 else
539 ilgen.Emit (OpCodes.Add);
540 ilgen.Emit (OpCodes.Starg, 1);
542 Label l2 = ilgen.DefineLabel ();
543 ilgen.Emit (OpCodes.Br, l2);
545 ilgen.MarkLabel (l1);
546 //return false;
547 ilgen.Emit (OpCodes.Br, frame.label_fail);
548 ilgen.MarkLabel (l2);
550 break;
552 case RxOp.True: {
553 //strpos_result = strpos;
554 ilgen.Emit (OpCodes.Ldarg_1);
555 ilgen.Emit (OpCodes.Stloc, frame.local_strpos_res);
556 // return true;
557 ilgen.Emit (OpCodes.Br, frame.label_pass);
558 pc++;
559 goto End;
561 case RxOp.False: {
562 // return false;
563 ilgen.Emit (OpCodes.Br, frame.label_fail);
564 pc++;
565 goto End;
567 case RxOp.AnyPosition: {
568 pc++;
569 break;
571 case RxOp.StartOfString: {
572 //if (strpos != 0)
573 // return false;
574 ilgen.Emit (OpCodes.Ldarg_1);
575 ilgen.Emit (OpCodes.Ldc_I4_0);
576 ilgen.Emit (OpCodes.Bgt, frame.label_fail);
577 pc++;
578 break;
580 case RxOp.StartOfLine: {
581 // FIXME: windows line endings
582 //if (!(strpos == 0 || str [strpos - 1] == '\n'))
583 // return false;
584 Label l = ilgen.DefineLabel ();
585 ilgen.Emit (OpCodes.Ldarg_1);
586 ilgen.Emit (OpCodes.Ldc_I4_0);
587 ilgen.Emit (OpCodes.Beq, l);
588 ilgen.Emit (OpCodes.Ldarg_0);
589 ilgen.Emit (OpCodes.Ldfld, fi_str);
590 ilgen.Emit (OpCodes.Ldarg_1);
591 ilgen.Emit (OpCodes.Ldc_I4_1);
592 ilgen.Emit (OpCodes.Sub);
593 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
594 ilgen.Emit (OpCodes.Ldc_I4, (int)'\n');
595 ilgen.Emit (OpCodes.Beq, l);
596 ilgen.Emit (OpCodes.Br, frame.label_fail);
597 ilgen.MarkLabel (l);
599 pc++;
600 break;
602 case RxOp.StartOfScan: {
603 //if (strpos != string_start)
604 // return false;
605 ilgen.Emit (OpCodes.Ldarg_1);
606 ilgen.Emit (OpCodes.Ldarg_0);
607 ilgen.Emit (OpCodes.Ldfld, fi_string_start);
608 ilgen.Emit (OpCodes.Bne_Un, frame.label_fail);
609 pc++;
610 break;
612 case RxOp.End: {
613 //if (!(strpos == string_end || (strpos == string_end - 1 && str [strpos] == '\n')))
614 // return false;
615 Label l = ilgen.DefineLabel ();
617 ilgen.Emit (OpCodes.Ldarg_1);
618 ilgen.Emit (OpCodes.Ldarg_0);
619 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
620 ilgen.Emit (OpCodes.Beq, l);
622 Label l2 = ilgen.DefineLabel ();
623 ilgen.Emit (OpCodes.Ldarg_1);
624 ilgen.Emit (OpCodes.Ldarg_0);
625 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
626 ilgen.Emit (OpCodes.Ldc_I4_1);
627 ilgen.Emit (OpCodes.Sub);
628 ilgen.Emit (OpCodes.Bne_Un, l2);
629 ilgen.Emit (OpCodes.Ldarg_0);
630 ilgen.Emit (OpCodes.Ldfld, fi_str);
631 ilgen.Emit (OpCodes.Ldarg_1);
632 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
633 ilgen.Emit (OpCodes.Ldc_I4, (int)'\n');
634 ilgen.Emit (OpCodes.Bne_Un, l2);
635 ilgen.Emit (OpCodes.Br, l);
636 ilgen.MarkLabel (l2);
638 ilgen.Emit (OpCodes.Br, frame.label_fail);
639 ilgen.MarkLabel (l);
641 pc++;
642 break;
644 case RxOp.EndOfString: {
645 //if (strpos != string_end)
646 // return false;
647 ilgen.Emit (OpCodes.Ldarg_1);
648 ilgen.Emit (OpCodes.Ldarg_0);
649 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
650 ilgen.Emit (OpCodes.Bne_Un, frame.label_fail);
651 pc++;
652 break;
654 case RxOp.EndOfLine: {
655 //if (!(strpos == string_end || str [strpos] == '\n'))
656 // return false;
657 Label l_match = ilgen.DefineLabel ();
658 ilgen.Emit (OpCodes.Ldarg_1);
659 ilgen.Emit (OpCodes.Ldarg_0);
660 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
661 ilgen.Emit (OpCodes.Beq, l_match);
662 ilgen.Emit (OpCodes.Ldarg_0);
663 ilgen.Emit (OpCodes.Ldfld, fi_str);
664 ilgen.Emit (OpCodes.Ldarg_1);
665 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
666 ilgen.Emit (OpCodes.Ldc_I4, (int)'\n');
667 ilgen.Emit (OpCodes.Beq, l_match);
668 ilgen.Emit (OpCodes.Br, frame.label_fail);
669 ilgen.MarkLabel (l_match);
671 pc++;
672 break;
674 case RxOp.WordBoundary:
675 case RxOp.NoWordBoundary: {
676 bool negate = op == RxOp.NoWordBoundary;
678 //if (string_end == 0)
679 // return false;
680 ilgen.Emit (OpCodes.Ldarg_0);
681 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
682 ilgen.Emit (OpCodes.Ldc_I4_0);
683 ilgen.Emit (OpCodes.Beq, frame.label_fail);
685 Label l_match = ilgen.DefineLabel ();
687 //if (strpos == 0) {
688 Label l1 = ilgen.DefineLabel ();
689 ilgen.Emit (OpCodes.Ldarg_1);
690 ilgen.Emit (OpCodes.Ldc_I4_0);
691 ilgen.Emit (OpCodes.Bne_Un, l1);
692 //if (!IsWordChar (str [strpos])) {
693 // return false;
694 ilgen.Emit (OpCodes.Ldarg_0);
695 ilgen.Emit (OpCodes.Ldfld, fi_str);
696 ilgen.Emit (OpCodes.Ldarg_1);
697 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
698 ilgen.Emit (OpCodes.Call, mi_is_word_char);
699 ilgen.Emit (negate ? OpCodes.Brtrue : OpCodes.Brfalse, frame.label_fail);
700 ilgen.Emit (OpCodes.Br, l_match);
702 //} else if (strpos == string_end) {
703 ilgen.MarkLabel (l1);
704 Label l2 = ilgen.DefineLabel ();
705 ilgen.Emit (OpCodes.Ldarg_1);
706 ilgen.Emit (OpCodes.Ldarg_0);
707 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
708 ilgen.Emit (OpCodes.Bne_Un, l2);
709 //if (!IsWordChar (str [strpos - 1])) {
710 // return false;
711 ilgen.Emit (OpCodes.Ldarg_0);
712 ilgen.Emit (OpCodes.Ldfld, fi_str);
713 ilgen.Emit (OpCodes.Ldarg_1);
714 ilgen.Emit (OpCodes.Ldc_I4_1);
715 ilgen.Emit (OpCodes.Sub);
716 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
717 ilgen.Emit (OpCodes.Call, mi_is_word_char);
718 ilgen.Emit (negate ? OpCodes.Brtrue : OpCodes.Brfalse, frame.label_fail);
719 ilgen.Emit (OpCodes.Br, l_match);
721 //} else {
722 ilgen.MarkLabel (l2);
723 //if (IsWordChar (str [strpos]) == IsWordChar (str [strpos - 1])) {
724 // return false;
725 ilgen.Emit (OpCodes.Ldarg_0);
726 ilgen.Emit (OpCodes.Ldfld, fi_str);
727 ilgen.Emit (OpCodes.Ldarg_1);
728 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
729 ilgen.Emit (OpCodes.Call, mi_is_word_char);
730 ilgen.Emit (OpCodes.Ldarg_0);
731 ilgen.Emit (OpCodes.Ldfld, fi_str);
732 ilgen.Emit (OpCodes.Ldarg_1);
733 ilgen.Emit (OpCodes.Ldc_I4_1);
734 ilgen.Emit (OpCodes.Sub);
735 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
736 ilgen.Emit (OpCodes.Call, mi_is_word_char);
737 ilgen.Emit (negate ? OpCodes.Bne_Un : OpCodes.Beq, frame.label_fail);
738 ilgen.Emit (OpCodes.Br, l_match);
740 ilgen.MarkLabel (l_match);
742 pc++;
743 break;
745 case RxOp.Bitmap:
746 case RxOp.BitmapIgnoreCase: {
747 bool ignore = (op == RxOp.BitmapIgnoreCase);
749 //if (strpos < string_end) {
750 Label l1 = ilgen.DefineLabel ();
751 Label l2 = ilgen.DefineLabel ();
752 ilgen.Emit (OpCodes.Ldarg_1);
753 ilgen.Emit (OpCodes.Ldarg_0);
754 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
755 ilgen.Emit (OpCodes.Bge, l1);
756 // int c = str [strpos];
757 LocalBuilder local_c = ilgen.DeclareLocal (typeof (int));
758 ilgen.Emit (OpCodes.Ldarg_0);
759 ilgen.Emit (OpCodes.Ldfld, fi_str);
760 ilgen.Emit (OpCodes.Ldarg_1);
761 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
762 ilgen.Emit (OpCodes.Conv_I4);
763 if (ignore)
764 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("ToLower", new Type [] { typeof (char) }));
765 // c -= program [pc + 1];
766 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 1]);
767 ilgen.Emit (OpCodes.Sub);
768 ilgen.Emit (OpCodes.Stloc, local_c);
769 length = program [pc + 2];
770 // if (c < 0 || c >= (length << 3))
771 // return false;
772 ilgen.Emit (OpCodes.Ldloc, local_c);
773 ilgen.Emit (OpCodes.Ldc_I4_0);
774 ilgen.Emit (OpCodes.Blt, frame.label_fail);
775 ilgen.Emit (OpCodes.Ldloc, local_c);
776 ilgen.Emit (OpCodes.Ldc_I4, length << 3);
777 ilgen.Emit (OpCodes.Bge, frame.label_fail);
778 pc += 3;
780 // Optimized version for small bitmaps
781 if (length <= 4) {
782 uint bitmap = program [pc];
784 if (length > 1)
785 bitmap |= ((uint)program [pc + 1] << 8);
786 if (length > 2)
787 bitmap |= ((uint)program [pc + 2] << 16);
788 if (length > 3)
789 bitmap |= ((uint)program [pc + 3] << 24);
791 //if ((bitmap >> c) & 1)
792 ilgen.Emit (OpCodes.Ldc_I4, bitmap);
793 ilgen.Emit (OpCodes.Ldloc, local_c);
794 ilgen.Emit (OpCodes.Shr_Un);
795 ilgen.Emit (OpCodes.Ldc_I4_1);
796 ilgen.Emit (OpCodes.And);
797 ilgen.Emit (OpCodes.Brfalse, l1);
798 } else {
799 // if ((program [pc + (c >> 3)] & (1 << (c & 0x7))) != 0) {
800 ilgen.Emit (OpCodes.Ldarg_0);
801 ilgen.Emit (OpCodes.Ldfld, fi_program);
802 ilgen.Emit (OpCodes.Ldloc, local_c);
803 ilgen.Emit (OpCodes.Ldc_I4_3);
804 ilgen.Emit (OpCodes.Shr);
805 ilgen.Emit (OpCodes.Ldc_I4, pc);
806 ilgen.Emit (OpCodes.Add);
807 ilgen.Emit (OpCodes.Ldelem_I1);
808 ilgen.Emit (OpCodes.Ldc_I4_1);
809 ilgen.Emit (OpCodes.Ldloc, local_c);
810 ilgen.Emit (OpCodes.Ldc_I4, 7);
811 ilgen.Emit (OpCodes.And);
812 ilgen.Emit (OpCodes.Shl);
813 ilgen.Emit (OpCodes.And);
814 ilgen.Emit (OpCodes.Ldc_I4_0);
815 ilgen.Emit (OpCodes.Beq, l1);
817 // strpos++;
818 ilgen.Emit (OpCodes.Ldarg_1);
819 ilgen.Emit (OpCodes.Ldc_I4_1);
820 ilgen.Emit (OpCodes.Add);
821 ilgen.Emit (OpCodes.Starg, 1);
822 // continue;
823 ilgen.Emit (OpCodes.Br, l2);
824 // }
826 //return false;
827 ilgen.MarkLabel (l1);
828 ilgen.Emit (OpCodes.Br, frame.label_fail);
830 ilgen.MarkLabel (l2);
832 pc += length;
833 break;
835 case RxOp.NoBitmap:
836 case RxOp.NoBitmapIgnoreCase: {
837 // Not currently used
838 Console.WriteLine ("Opcode " + op + " not supported.");
839 return null;
841 case RxOp.String:
842 case RxOp.StringIgnoreCase:
843 case RxOp.StringReverse:
844 case RxOp.StringIgnoreCaseReverse: {
845 bool ignore = (op == RxOp.StringIgnoreCase || op == RxOp.StringIgnoreCaseReverse);
846 bool reverse = (op == RxOp.StringReverse || op == RxOp.StringIgnoreCaseReverse);
848 start = pc + 2;
849 length = program [pc + 1];
850 //if (strpos + length > string_end)
851 // return false;
852 if (reverse) {
853 ilgen.Emit (OpCodes.Ldarg_1);
854 ilgen.Emit (OpCodes.Ldc_I4, length);
855 ilgen.Emit (OpCodes.Blt, frame.label_fail);
856 } else {
857 ilgen.Emit (OpCodes.Ldarg_1);
858 ilgen.Emit (OpCodes.Ldc_I4, length);
859 ilgen.Emit (OpCodes.Add);
860 ilgen.Emit (OpCodes.Ldarg_0);
861 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
862 ilgen.Emit (OpCodes.Bgt, frame.label_fail);
865 /* Avoid unsafe code in Moonlight build */
866 #if false && !NET_2_1
867 // FIXME:
868 if (reverse)
869 throw new NotImplementedException ();
870 int i;
871 LocalBuilder local_strptr = ilgen.DeclareLocal (typeof (char).MakePointerType ());
872 // char *strptr = &str.start_char + strpos
873 ilgen.Emit (OpCodes.Ldarg_0);
874 ilgen.Emit (OpCodes.Ldfld, fi_str);
875 ilgen.Emit (OpCodes.Ldflda, typeof (String).GetField ("start_char", BindingFlags.Instance|BindingFlags.NonPublic));
876 ilgen.Emit (OpCodes.Ldarg_1);
877 ilgen.Emit (OpCodes.Ldc_I4_1);
878 ilgen.Emit (OpCodes.Shl);
879 ilgen.Emit (OpCodes.Add);
880 ilgen.Emit (OpCodes.Stloc, local_strptr);
882 end = start + length;
883 for (i = 0; i < length; ++i) {
884 // if (*(strptr + i) != program [start + i])
885 // return false;
886 ilgen.Emit (OpCodes.Ldloc, local_strptr);
887 ilgen.Emit (OpCodes.Ldc_I4, i * 2);
888 ilgen.Emit (OpCodes.Add);
889 ilgen.Emit (OpCodes.Ldind_I2);
890 if (ignore)
891 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("ToLower", new Type [] { typeof (char) }));
892 ilgen.Emit (OpCodes.Ldc_I4, (int)program [start + i]);
893 ilgen.Emit (OpCodes.Bne_Un, frame.label_fail);
896 // strpos += length
897 ilgen.Emit (OpCodes.Ldarg_1);
898 ilgen.Emit (OpCodes.Ldc_I4, length);
899 ilgen.Emit (OpCodes.Add);
900 ilgen.Emit (OpCodes.Starg, 1);
902 #else
903 // Allocate a local for 'str' to save an indirection
904 LocalBuilder local_str = ilgen.DeclareLocal (typeof (string));
905 ilgen.Emit (OpCodes.Ldarg_0);
906 ilgen.Emit (OpCodes.Ldfld, fi_str);
907 ilgen.Emit (OpCodes.Stloc, local_str);
909 if (reverse) {
910 // strpos -= length;
911 ilgen.Emit (OpCodes.Ldarg_1);
912 ilgen.Emit (OpCodes.Ldc_I4, length);
913 ilgen.Emit (OpCodes.Sub);
914 ilgen.Emit (OpCodes.Starg, 1);
917 // FIXME: Emit a loop for long strings
918 end = start + length;
919 for (; start < end; ++start) {
920 //if (str [strpos] != program [start])
921 // return false;
922 ilgen.Emit (OpCodes.Ldloc, local_str);
923 ilgen.Emit (OpCodes.Ldarg_1);
924 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
925 if (ignore)
926 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("ToLower", new Type [] { typeof (char) }));
927 ilgen.Emit (OpCodes.Ldc_I4, (int)program [start]);
928 ilgen.Emit (OpCodes.Bne_Un, frame.label_fail);
929 //strpos++;
930 ilgen.Emit (OpCodes.Ldarg_1);
931 ilgen.Emit (OpCodes.Ldc_I4_1);
932 ilgen.Emit (OpCodes.Add);
933 ilgen.Emit (OpCodes.Starg, 1);
936 if (reverse) {
937 // strpos -= length;
938 ilgen.Emit (OpCodes.Ldarg_1);
939 ilgen.Emit (OpCodes.Ldc_I4, length);
940 ilgen.Emit (OpCodes.Sub);
941 ilgen.Emit (OpCodes.Starg, 1);
943 #endif
945 pc = end;
946 break;
948 case RxOp.OpenGroup: {
949 //Open (program [pc + 1] | (program [pc + 2] << 8), strpos);
950 int group_id = program [pc + 1] | (program [pc + 2] << 8);
951 ilgen.Emit (OpCodes.Ldarg_0);
952 ilgen.Emit (OpCodes.Ldc_I4, group_id);
953 ilgen.Emit (OpCodes.Ldarg_1);
954 ilgen.Emit (OpCodes.Call, mi_open);
956 pc += 3;
957 break;
959 case RxOp.CloseGroup: {
960 //Close (program [pc + 1] | (program [pc + 2] << 8), strpos);
961 int group_id = program [pc + 1] | (program [pc + 2] << 8);
962 ilgen.Emit (OpCodes.Ldarg_0);
963 ilgen.Emit (OpCodes.Ldc_I4, group_id);
964 ilgen.Emit (OpCodes.Ldarg_1);
965 ilgen.Emit (OpCodes.Call, mi_close);
967 pc += 3;
968 break;
970 case RxOp.Jump: {
971 pc += program [pc + 1] | (program [pc + 2] << 8);
972 break;
974 case RxOp.TestCharGroup: {
975 int char_group_end = pc + program [pc + 1] | (program [pc + 2] << 8);
976 pc += 3;
978 Label label_match = ilgen.DefineLabel ();
980 /* Determine the negate/reverse flags by examining the first op */
981 OpFlags flags = (OpFlags)op_flags [pc];
983 /* Determine whenever this is a negated character class */
984 /* If it is, then the conditions are ANDed together, not ORed */
985 bool revert = (flags & OpFlags.Negate) > 0;
986 bool reverse = (flags & OpFlags.RightToLeft) > 0;
989 * Generate code for all the matching ops in the group
991 while (pc < char_group_end) {
992 Frame new_frame = new Frame (ilgen);
993 m = EmitEvalMethodBody (m, ilgen, new_frame, program, pc, true, true, out pc);
994 if (m == null)
995 return null;
997 if (!revert) {
998 // Pass
999 ilgen.MarkLabel (new_frame.label_pass);
1000 ilgen.Emit (OpCodes.Br, label_match);
1002 // Fail
1003 // Just fall through to the next test
1004 ilgen.MarkLabel (new_frame.label_fail);
1005 } else {
1006 // Pass
1007 // Just fall through to the next test
1008 ilgen.MarkLabel (new_frame.label_pass);
1009 Label l2 = ilgen.DefineLabel ();
1010 ilgen.Emit (OpCodes.Br, l2);
1012 // Fail
1013 // Fail completely
1014 ilgen.MarkLabel (new_frame.label_fail);
1015 ilgen.Emit (OpCodes.Br, frame.label_fail);
1017 ilgen.MarkLabel (l2);
1021 if (revert) {
1022 /* Success */
1023 ilgen.Emit (OpCodes.Br, label_match);
1024 } else {
1025 // If we reached here, all the matching ops have failed
1026 ilgen.Emit (OpCodes.Br, frame.label_fail);
1029 ilgen.MarkLabel (label_match);
1031 // strpos++ / strpos--;
1032 ilgen.Emit (OpCodes.Ldarg_1);
1033 ilgen.Emit (OpCodes.Ldc_I4_1);
1034 if (reverse)
1035 ilgen.Emit (OpCodes.Sub);
1036 else
1037 ilgen.Emit (OpCodes.Add);
1038 ilgen.Emit (OpCodes.Starg, 1);
1040 break;
1042 case RxOp.Repeat: {
1043 // FIXME: This is the old repeat, need to reimplement it as
1044 // FastRepeat/FastRepeatLazy. The general Repeat/Until opcodes
1045 // are some complex, probably not worth emitting them as IL.
1047 start = ReadInt (program, pc + 3);
1048 end = ReadInt (program, pc + 7);
1050 LocalBuilder local_length = ilgen.DeclareLocal (typeof (int));
1052 Label label_repeat_success = ilgen.DefineLabel ();
1054 //length = 0;
1056 //while (length < end) {
1057 // -> done at the end of the loop
1058 Label l1 = ilgen.DefineLabel ();
1059 Label l2 = ilgen.DefineLabel ();
1060 ilgen.Emit (OpCodes.Br, l2);
1061 ilgen.MarkLabel (l1);
1063 //if (!EvalByteCode (pc + 11, strpos, ref res)) {
1065 Frame new_frame = new Frame (ilgen);
1067 // old_strpos = strpos;
1068 LocalBuilder local_old_strpos = ilgen.DeclareLocal (typeof (int));
1069 ilgen.Emit (OpCodes.Ldarg_1);
1070 ilgen.Emit (OpCodes.Stloc, local_old_strpos);
1072 m = EmitEvalMethodBody (m, ilgen, new_frame, program, pc + 11, false, false, out out_pc);
1073 if (m == null)
1074 return null;
1076 // Fail
1077 ilgen.MarkLabel (new_frame.label_fail);
1078 //if (length >= start) {
1079 // goto repeat_success;
1081 ilgen.Emit (OpCodes.Ldloc, local_length);
1082 ilgen.Emit (OpCodes.Ldc_I4, start);
1083 ilgen.Emit (OpCodes.Bge, label_repeat_success);
1084 //return false;
1085 ilgen.Emit (OpCodes.Br, frame.label_fail);
1087 // Pass
1088 ilgen.MarkLabel (new_frame.label_pass);
1089 // strpos = res;
1090 ilgen.Emit (OpCodes.Ldloc, new_frame.local_strpos_res);
1091 ilgen.Emit (OpCodes.Starg, 1);
1092 // length++;
1093 ilgen.Emit (OpCodes.Ldloc, local_length);
1094 ilgen.Emit (OpCodes.Ldc_I4_1);
1095 ilgen.Emit (OpCodes.Add);
1096 ilgen.Emit (OpCodes.Stloc, local_length);
1098 ilgen.MarkLabel (l2);
1099 ilgen.Emit (OpCodes.Ldloc, local_length);
1100 ilgen.Emit (OpCodes.Ldc_I4, end);
1101 ilgen.Emit (OpCodes.Blt, l1);
1103 //if (length != end)
1104 // return false;
1105 ilgen.Emit (OpCodes.Ldloc, local_length);
1106 ilgen.Emit (OpCodes.Ldc_I4, end);
1107 ilgen.Emit (OpCodes.Bne_Un, frame.label_fail);
1109 //repeat_success:
1110 ilgen.MarkLabel (label_repeat_success);
1112 pc += program [pc + 1] | (program [pc + 2] << 8);
1113 break;
1115 #if FALSE
1116 //if (strpos < string_end && str [strpos] != '\n') {
1117 ilgen.Emit (OpCodes.Ldarg_1);
1118 ilgen.Emit (OpCodes.Ldarg_0);
1119 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
1120 ilgen.Emit (OpCodes.Bge, frame.label_fail);
1121 ilgen.Emit (OpCodes.Ldarg_0);
1122 ilgen.Emit (OpCodes.Ldfld, fi_str);
1123 ilgen.Emit (OpCodes.Ldarg_1);
1124 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
1125 ilgen.Emit (OpCodes.Ldc_I4, (int)'\n');
1126 ilgen.Emit (OpCodes.Beq, frame.label_fail);
1127 // strpos++;
1128 ilgen.Emit (OpCodes.Ldarg_1);
1129 ilgen.Emit (OpCodes.Ldc_I4_1);
1130 ilgen.Emit (OpCodes.Add);
1131 ilgen.Emit (OpCodes.Starg, 1);
1133 pc++;
1134 break;
1136 #endif
1137 case RxOp.CategoryAny:
1138 case RxOp.CategoryAnySingleline:
1139 case RxOp.CategoryWord:
1140 case RxOp.CategoryDigit:
1141 case RxOp.CategoryWhiteSpace:
1142 case RxOp.CategoryEcmaWord:
1143 case RxOp.CategoryEcmaWhiteSpace:
1144 case RxOp.CategoryUnicodeSpecials:
1145 case RxOp.CategoryUnicode:
1146 case RxOp.NoCategoryAny:
1147 case RxOp.NoCategoryAnySingleline:
1148 case RxOp.NoCategoryWord:
1149 case RxOp.NoCategoryDigit:
1150 case RxOp.NoCategoryWhiteSpace:
1151 case RxOp.NoCategoryEcmaWord:
1152 case RxOp.NoCategoryEcmaWhiteSpace:
1153 case RxOp.NoCategoryUnicodeSpecials:
1154 case RxOp.NoCategoryUnicode:
1155 case RxOp.CategoryAnyReverse:
1156 case RxOp.CategoryAnySinglelineReverse:
1157 case RxOp.CategoryWordReverse:
1158 case RxOp.CategoryDigitReverse:
1159 case RxOp.CategoryWhiteSpaceReverse:
1160 case RxOp.CategoryEcmaWordReverse:
1161 case RxOp.CategoryEcmaWhiteSpaceReverse:
1162 case RxOp.CategoryUnicodeSpecialsReverse:
1163 case RxOp.CategoryUnicodeReverse:
1164 case RxOp.NoCategoryAnyReverse:
1165 case RxOp.NoCategoryAnySinglelineReverse:
1166 case RxOp.NoCategoryWordReverse:
1167 case RxOp.NoCategoryDigitReverse:
1168 case RxOp.NoCategoryWhiteSpaceReverse:
1169 case RxOp.NoCategoryEcmaWordReverse:
1170 case RxOp.NoCategoryEcmaWhiteSpaceReverse:
1171 case RxOp.NoCategoryUnicodeSpecialsReverse:
1172 case RxOp.NoCategoryUnicodeReverse: {
1173 OpFlags flags = (OpFlags)op_flags [pc];
1174 bool negate = (flags & OpFlags.Negate) > 0;
1175 bool reverse = (flags & OpFlags.RightToLeft) > 0;
1177 /* Get back the normal opcodes */
1178 if (reverse)
1179 op = (RxOp)((int)op - 2);
1180 if (negate)
1181 op = (RxOp)((int)op - 1);
1183 //if (strpos < string_end) {
1184 Label l_nomatch = ilgen.DefineLabel ();
1185 if (reverse) {
1186 ilgen.Emit (OpCodes.Ldarg_1);
1187 ilgen.Emit (OpCodes.Ldc_I4_0);
1188 ilgen.Emit (OpCodes.Ble, l_nomatch);
1189 } else {
1190 ilgen.Emit (OpCodes.Ldarg_1);
1191 ilgen.Emit (OpCodes.Ldarg_0);
1192 ilgen.Emit (OpCodes.Ldfld, fi_string_end);
1193 ilgen.Emit (OpCodes.Bge, l_nomatch);
1196 // int c = str [strpos];
1197 LocalBuilder local_c = ilgen.DeclareLocal (typeof (char));
1198 ilgen.Emit (OpCodes.Ldarg_0);
1199 ilgen.Emit (OpCodes.Ldfld, fi_str);
1200 ilgen.Emit (OpCodes.Ldarg_1);
1201 if (reverse) {
1202 ilgen.Emit (OpCodes.Ldc_I4_1);
1203 ilgen.Emit (OpCodes.Sub);
1205 ilgen.Emit (OpCodes.Callvirt, typeof (string).GetMethod ("get_Chars"));
1206 ilgen.Emit (OpCodes.Stloc, local_c);
1208 Label l_match = ilgen.DefineLabel ();
1210 Label l_true, l_false;
1212 l_true = negate ? l_nomatch : l_match;
1213 l_false = negate ? l_match : l_nomatch;
1215 switch (op) {
1216 case RxOp.CategoryAny:
1217 // if (str [strpos] != '\n') {
1218 ilgen.Emit (OpCodes.Ldloc, local_c);
1219 ilgen.Emit (OpCodes.Ldc_I4, (int)'\n');
1220 ilgen.Emit (OpCodes.Bne_Un, l_true);
1221 break;
1222 case RxOp.CategoryAnySingleline:
1223 ilgen.Emit (OpCodes.Br, l_true);
1224 break;
1225 case RxOp.CategoryWord:
1226 // if (Char.IsLetterOrDigit (c) || Char.GetUnicodeCategory (c) == UnicodeCategory.ConnectorPunctuation) {
1227 ilgen.Emit (OpCodes.Ldloc, local_c);
1228 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("IsLetterOrDigit", new Type [] { typeof (char) }));
1229 ilgen.Emit (OpCodes.Brtrue, l_true);
1230 ilgen.Emit (OpCodes.Ldloc, local_c);
1231 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("GetUnicodeCategory", new Type [] { typeof (char) }));
1232 ilgen.Emit (OpCodes.Ldc_I4, (int)UnicodeCategory.ConnectorPunctuation);
1233 ilgen.Emit (OpCodes.Beq, l_true);
1234 break;
1235 case RxOp.CategoryDigit:
1236 // if (Char.IsDigit (c)) {
1237 ilgen.Emit (OpCodes.Ldloc, local_c);
1238 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("IsDigit", new Type [] { typeof (char) }));
1239 ilgen.Emit (OpCodes.Brtrue, l_true);
1240 break;
1241 case RxOp.CategoryWhiteSpace:
1242 // if (Char.IsWhiteSpace (c)) {
1243 ilgen.Emit (OpCodes.Ldloc, local_c);
1244 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("IsWhiteSpace", new Type [] { typeof (char) }));
1245 ilgen.Emit (OpCodes.Brtrue, l_true);
1246 break;
1247 case RxOp.CategoryEcmaWord:
1248 // if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' || c == '_') {
1249 ilgen.Emit (OpCodes.Ldloc, local_c);
1250 ilgen.Emit (OpCodes.Ldc_I4, (int)'a' - 1);
1251 ilgen.Emit (OpCodes.Cgt);
1252 ilgen.Emit (OpCodes.Ldloc, local_c);
1253 ilgen.Emit (OpCodes.Ldc_I4, (int)'z' + 1);
1254 ilgen.Emit (OpCodes.Clt);
1255 ilgen.Emit (OpCodes.And);
1256 ilgen.Emit (OpCodes.Brtrue, l_true);
1258 ilgen.Emit (OpCodes.Ldloc, local_c);
1259 ilgen.Emit (OpCodes.Ldc_I4, (int)'A' - 1);
1260 ilgen.Emit (OpCodes.Cgt);
1261 ilgen.Emit (OpCodes.Ldloc, local_c);
1262 ilgen.Emit (OpCodes.Ldc_I4, (int)'Z' + 1);
1263 ilgen.Emit (OpCodes.Clt);
1264 ilgen.Emit (OpCodes.And);
1265 ilgen.Emit (OpCodes.Brtrue, l_true);
1267 ilgen.Emit (OpCodes.Ldloc, local_c);
1268 ilgen.Emit (OpCodes.Ldc_I4, (int)'0' - 1);
1269 ilgen.Emit (OpCodes.Cgt);
1270 ilgen.Emit (OpCodes.Ldloc, local_c);
1271 ilgen.Emit (OpCodes.Ldc_I4, (int)'9' + 1);
1272 ilgen.Emit (OpCodes.Clt);
1273 ilgen.Emit (OpCodes.And);
1274 ilgen.Emit (OpCodes.Brtrue, l_true);
1276 ilgen.Emit (OpCodes.Ldloc, local_c);
1277 ilgen.Emit (OpCodes.Ldc_I4, (int)'_');
1278 ilgen.Emit (OpCodes.Beq, l_true);
1279 break;
1280 case RxOp.CategoryEcmaWhiteSpace:
1281 // if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v') {
1282 ilgen.Emit (OpCodes.Ldloc, local_c);
1283 ilgen.Emit (OpCodes.Ldc_I4, (int)' ');
1284 ilgen.Emit (OpCodes.Beq, l_true);
1285 ilgen.Emit (OpCodes.Ldloc, local_c);
1286 ilgen.Emit (OpCodes.Ldc_I4, (int)'\t');
1287 ilgen.Emit (OpCodes.Beq, l_true);
1288 ilgen.Emit (OpCodes.Ldloc, local_c);
1289 ilgen.Emit (OpCodes.Ldc_I4, (int)'\n');
1290 ilgen.Emit (OpCodes.Beq, l_true);
1291 ilgen.Emit (OpCodes.Ldloc, local_c);
1292 ilgen.Emit (OpCodes.Ldc_I4, (int)'\r');
1293 ilgen.Emit (OpCodes.Beq, l_true);
1294 ilgen.Emit (OpCodes.Ldloc, local_c);
1295 ilgen.Emit (OpCodes.Ldc_I4, (int)'\f');
1296 ilgen.Emit (OpCodes.Beq, l_true);
1297 ilgen.Emit (OpCodes.Ldloc, local_c);
1298 ilgen.Emit (OpCodes.Ldc_I4, (int)'\v');
1299 ilgen.Emit (OpCodes.Beq, l_true);
1300 break;
1301 case RxOp.CategoryUnicodeSpecials:
1302 // if ('\uFEFF' <= c && c <= '\uFEFF' || '\uFFF0' <= c && c <= '\uFFFD') {
1303 ilgen.Emit (OpCodes.Ldloc, local_c);
1304 ilgen.Emit (OpCodes.Ldc_I4, (int)'\uFEFF' - 1);
1305 ilgen.Emit (OpCodes.Cgt);
1306 ilgen.Emit (OpCodes.Ldloc, local_c);
1307 ilgen.Emit (OpCodes.Ldc_I4, (int)'\uFEFF' + 1);
1308 ilgen.Emit (OpCodes.Clt);
1309 ilgen.Emit (OpCodes.And);
1310 ilgen.Emit (OpCodes.Brtrue, l_true);
1312 ilgen.Emit (OpCodes.Ldloc, local_c);
1313 ilgen.Emit (OpCodes.Ldc_I4, (int)'\uFFF0' - 1);
1314 ilgen.Emit (OpCodes.Cgt);
1315 ilgen.Emit (OpCodes.Ldloc, local_c);
1316 ilgen.Emit (OpCodes.Ldc_I4, (int)'\uFFFD' + 1);
1317 ilgen.Emit (OpCodes.Clt);
1318 ilgen.Emit (OpCodes.And);
1319 ilgen.Emit (OpCodes.Brtrue, l_true);
1320 break;
1321 case RxOp.CategoryUnicode:
1322 // if (Char.GetUnicodeCategory (c) == (UnicodeCategory)program [pc + 1]) {
1323 ilgen.Emit (OpCodes.Ldloc, local_c);
1324 ilgen.Emit (OpCodes.Call, typeof (Char).GetMethod ("GetUnicodeCategory", new Type [] { typeof (char) }));
1325 ilgen.Emit (OpCodes.Ldc_I4, (int)program [pc + 1]);
1326 ilgen.Emit (OpCodes.Beq, l_true);
1327 break;
1330 ilgen.Emit (OpCodes.Br, l_false);
1332 ilgen.MarkLabel (l_match);
1334 // strpos++;
1335 ilgen.Emit (OpCodes.Ldarg_1);
1336 ilgen.Emit (OpCodes.Ldc_I4_1);
1337 if (reverse)
1338 ilgen.Emit (OpCodes.Sub);
1339 else
1340 ilgen.Emit (OpCodes.Add);
1341 ilgen.Emit (OpCodes.Starg, 1);
1342 // }
1343 Label l2 = ilgen.DefineLabel ();
1344 ilgen.Emit (OpCodes.Br, l2);
1346 ilgen.MarkLabel (l_nomatch);
1347 //return false;
1348 ilgen.Emit (OpCodes.Br, frame.label_fail);
1350 ilgen.MarkLabel (l2);
1352 if (op == RxOp.CategoryUnicode)
1353 pc += 2;
1354 else
1355 pc++;
1356 break;
1358 default:
1359 Console.WriteLine ("Opcode " + op + " not supported.");
1360 return null;
1363 if (one_op)
1364 break;
1367 End:
1369 out_pc = pc;
1371 return m;
1374 #else
1375 class CILCompiler : RxCompiler {
1377 #endif