**** Merged from MCS ****
[mono-project.git] / mcs / class / System / System.Text.RegularExpressions / compiler.cs
bloba5443ff69f84c73aed2ccd663d0072875ddfdede
1 //
2 // assembly: System
3 // namespace: System.Text.RegularExpressions
4 // file: compiler.cs
5 //
6 // author: Dan Lewis (dlewis@gmx.co.uk)
7 // (c) 2002
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 using System;
31 using System.Collections;
33 namespace System.Text.RegularExpressions {
34 abstract class LinkRef {
35 // empty
38 interface ICompiler {
39 void Reset ();
40 IMachineFactory GetMachineFactory ();
42 // instruction emission
44 void EmitFalse ();
45 void EmitTrue ();
47 // character matching
49 void EmitCharacter (char c, bool negate, bool ignore, bool reverse);
50 void EmitCategory (Category cat, bool negate, bool reverse);
51 void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse);
52 void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse);
54 // other operators
56 void EmitString (string str, bool ignore, bool reverse);
57 void EmitPosition (Position pos);
58 void EmitOpen (int gid);
59 void EmitClose (int gid);
60 void EmitBalanceStart(int gid, int balance, bool capture, LinkRef tail);
61 void EmitBalance ();
62 void EmitReference (int gid, bool ignore, bool reverse);
64 // constructs
66 void EmitIfDefined (int gid, LinkRef tail);
67 void EmitSub (LinkRef tail);
68 void EmitTest (LinkRef yes, LinkRef tail);
69 void EmitBranch (LinkRef next);
70 void EmitJump (LinkRef target);
71 void EmitRepeat (int min, int max, bool lazy, LinkRef until);
72 void EmitUntil (LinkRef repeat);
73 void EmitIn (LinkRef tail);
74 void EmitInfo (int count, int min, int max);
75 void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail);
76 void EmitAnchor (bool reverse, int offset, LinkRef tail);
78 // event for the CILCompiler
79 void EmitBranchEnd();
80 void EmitAlternationEnd();
82 LinkRef NewLink ();
83 void ResolveLink (LinkRef link);
86 class InterpreterFactory : IMachineFactory {
87 public InterpreterFactory (ushort[] pattern) {
88 this.pattern = pattern;
91 public IMachine NewInstance () {
92 return new Interpreter (pattern);
95 public int GroupCount {
96 get { return pattern[0]; }
99 public IDictionary Mapping {
100 get { return mapping; }
101 set { mapping = value; }
104 private IDictionary mapping;
105 private ushort[] pattern;
108 class PatternCompiler : ICompiler {
109 public static ushort EncodeOp (OpCode op, OpFlags flags) {
110 return (ushort)((int)op | ((int)flags & 0xff00));
113 public static void DecodeOp (ushort word, out OpCode op, out OpFlags flags) {
114 op = (OpCode)(word & 0x00ff);
115 flags = (OpFlags)(word & 0xff00);
118 public PatternCompiler () {
119 pgm = new ArrayList ();
122 // ICompiler implementation
124 public void Reset () {
125 pgm.Clear ();
128 public IMachineFactory GetMachineFactory () {
129 ushort[] image = new ushort[pgm.Count];
130 pgm.CopyTo (image);
132 return new InterpreterFactory (image);
135 public void EmitFalse () {
136 Emit (OpCode.False);
139 public void EmitTrue () {
140 Emit (OpCode.True);
143 public void EmitCharacter (char c, bool negate, bool ignore, bool reverse) {
144 Emit (OpCode.Character, MakeFlags (negate, ignore, reverse, false));
146 if (ignore)
147 c = Char.ToLower (c);
149 Emit ((ushort)c);
152 public void EmitCategory (Category cat, bool negate, bool reverse) {
153 Emit (OpCode.Category, MakeFlags (negate, false, reverse, false));
154 Emit ((ushort)cat);
157 public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse) {
158 Emit (OpCode.Range, MakeFlags (negate, ignore, reverse, false));
159 Emit ((ushort)lo);
160 Emit ((ushort)hi);
163 public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse) {
164 Emit (OpCode.Set, MakeFlags (negate, ignore, reverse, false));
165 Emit ((ushort)lo);
167 int len = (set.Length + 0xf) >> 4;
168 Emit ((ushort)len);
170 int b = 0;
171 while (len -- != 0) {
172 ushort word = 0;
173 for (int i = 0; i < 16; ++ i) {
174 if (b >= set.Length)
175 break;
177 if (set[b ++])
178 word |= (ushort)(1 << i);
181 Emit (word);
185 public void EmitString (string str, bool ignore, bool reverse) {
186 Emit (OpCode.String, MakeFlags (false, ignore, reverse, false));
187 int len = str.Length;
188 Emit ((ushort)len);
190 if (ignore)
191 str = str.ToLower ();
193 for (int i = 0; i < len; ++ i)
194 Emit ((ushort)str[i]);
197 public void EmitPosition (Position pos) {
198 Emit (OpCode.Position, 0);
199 Emit ((ushort)pos);
202 public void EmitOpen (int gid) {
203 Emit (OpCode.Open);
204 Emit ((ushort)gid);
207 public void EmitClose (int gid) {
208 Emit (OpCode.Close);
209 Emit ((ushort)gid);
214 public void EmitBalanceStart (int gid, int balance, bool capture, LinkRef tail) {
215 BeginLink (tail);
216 Emit (OpCode.BalanceStart);
217 Emit ((ushort)gid);
218 Emit ((ushort)balance);
219 Emit ((ushort)(capture ? 1 : 0));
220 EmitLink (tail);
223 public void EmitBalance () {
224 Emit (OpCode.Balance);
227 public void EmitReference (int gid, bool ignore, bool reverse) {
228 Emit (OpCode.Reference, MakeFlags (false, ignore, reverse, false));
229 Emit ((ushort)gid);
232 public void EmitIfDefined (int gid, LinkRef tail) {
233 BeginLink (tail);
234 Emit (OpCode.IfDefined);
235 EmitLink (tail);
236 Emit ((ushort)gid);
239 public void EmitSub (LinkRef tail) {
240 BeginLink (tail);
241 Emit (OpCode.Sub);
242 EmitLink (tail);
245 public void EmitTest (LinkRef yes, LinkRef tail) {
246 BeginLink (yes);
247 BeginLink (tail);
248 Emit (OpCode.Test);
249 EmitLink (yes);
250 EmitLink (tail);
253 public void EmitBranch (LinkRef next) {
254 BeginLink (next);
255 Emit (OpCode.Branch, 0);
256 EmitLink (next);
259 public void EmitJump (LinkRef target) {
260 BeginLink (target);
261 Emit (OpCode.Jump, 0);
262 EmitLink (target);
265 public void EmitRepeat (int min, int max, bool lazy, LinkRef until) {
266 BeginLink (until);
267 Emit (OpCode.Repeat, MakeFlags (false, false, false, lazy));
268 EmitLink (until);
269 Emit ((ushort)min);
270 Emit ((ushort)max);
273 public void EmitUntil (LinkRef repeat) {
274 ResolveLink (repeat);
275 Emit (OpCode.Until);
278 public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail) {
279 BeginLink (tail);
280 Emit (OpCode.FastRepeat, MakeFlags (false, false, false, lazy));
281 EmitLink (tail);
282 Emit ((ushort)min);
283 Emit ((ushort)max);
286 public void EmitIn (LinkRef tail) {
287 BeginLink (tail);
288 Emit (OpCode.In);
289 EmitLink (tail);
292 public void EmitAnchor (bool reverse, int offset, LinkRef tail) {
293 BeginLink (tail);
294 Emit (OpCode.Anchor, MakeFlags(false, false, reverse, false));
295 EmitLink (tail);
296 Emit ((ushort)offset);
299 public void EmitInfo (int count, int min, int max) {
300 Emit (OpCode.Info);
301 Emit ((ushort)count);
302 Emit ((ushort)min);
303 Emit ((ushort)max);
306 public LinkRef NewLink () {
307 return new PatternLinkStack ();
310 public void ResolveLink (LinkRef lref) {
311 PatternLinkStack stack = (PatternLinkStack)lref;
313 while (stack.Pop ())
314 pgm[stack.OffsetAddress] = (ushort)stack.GetOffset (CurrentAddress);
317 public void EmitBranchEnd(){}
318 public void EmitAlternationEnd(){}
320 // private members
322 private static OpFlags MakeFlags (bool negate, bool ignore, bool reverse, bool lazy) {
323 OpFlags flags = 0;
324 if (negate) flags |= OpFlags.Negate;
325 if (ignore) flags |= OpFlags.IgnoreCase;
326 if (reverse) flags |= OpFlags.RightToLeft;
327 if (lazy) flags |= OpFlags.Lazy;
329 return flags;
332 private void Emit (OpCode op) {
333 Emit (op, (OpFlags)0);
336 private void Emit (OpCode op, OpFlags flags) {
337 Emit (EncodeOp (op, flags));
340 private void Emit (ushort word) {
341 pgm.Add (word);
344 private int CurrentAddress {
345 get { return pgm.Count; }
348 private void BeginLink (LinkRef lref) {
349 PatternLinkStack stack = (PatternLinkStack)lref;
350 stack.BaseAddress = CurrentAddress;
353 private void EmitLink (LinkRef lref) {
354 PatternLinkStack stack = (PatternLinkStack)lref;
355 stack.OffsetAddress = CurrentAddress;
356 Emit ((ushort)0); // placeholder
357 stack.Push ();
361 private class PatternLinkStack : LinkStack {
362 public PatternLinkStack () {
365 public int BaseAddress {
366 set { link.base_addr = value; }
369 public int OffsetAddress {
370 get { return link.offset_addr; }
371 set { link.offset_addr = value; }
374 public int GetOffset (int target_addr) {
375 return target_addr - link.base_addr;
378 // LinkStack implementation
380 protected override object GetCurrent () { return link; }
381 protected override void SetCurrent (object l) { link = (Link)l; }
383 private struct Link {
384 public int base_addr;
385 public int offset_addr;
388 Link link;
391 private ArrayList pgm;
394 abstract class LinkStack : LinkRef {
395 public LinkStack () {
396 stack = new Stack ();
399 public void Push () {
400 stack.Push (GetCurrent ());
403 public bool Pop () {
404 if (stack.Count > 0) {
405 SetCurrent (stack.Pop ());
406 return true;
409 return false;
412 protected abstract object GetCurrent ();
413 protected abstract void SetCurrent (object l);
415 private Stack stack;
418 //Used by CILCompiler and Interpreter
419 internal struct Mark {
420 public int Start, End;
421 public int Previous;
423 public bool IsDefined {
424 get { return Start >= 0 && End >= 0; }
427 public int Index {
428 get { return Start < End ? Start : End; }
431 public int Length {
432 get { return Start < End ? End - Start : Start - End; }