3 // namespace: System.Text.RegularExpressions
6 // author: Dan Lewis (dlewis@gmx.co.uk)
10 // Permission is hereby granted, free of charge, to any person obtaining
11 // a copy of this software and associated documentation files (the
12 // "Software"), to deal in the Software without restriction, including
13 // without limitation the rights to use, copy, modify, merge, publish,
14 // distribute, sublicense, and/or sell copies of the Software, and to
15 // permit persons to whom the Software is furnished to do so, subject to
16 // the following conditions:
18 // The above copyright notice and this permission notice shall be
19 // included in all copies or substantial portions of the Software.
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
25 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
31 using System
.Collections
;
33 namespace System
.Text
.RegularExpressions
{
34 abstract class LinkRef
{
40 IMachineFactory
GetMachineFactory ();
42 // instruction emission
49 void EmitCharacter (char c
, bool negate
, bool ignore
, bool reverse
);
50 void EmitCategory (Category cat
, bool negate
, bool reverse
);
51 void EmitRange (char lo
, char hi
, bool negate
, bool ignore
, bool reverse
);
52 void EmitSet (char lo
, BitArray
set, bool negate
, bool ignore
, bool reverse
);
56 void EmitString (string str
, bool ignore
, bool reverse
);
57 void EmitPosition (Position pos
);
58 void EmitOpen (int gid
);
59 void EmitClose (int gid
);
60 void EmitBalanceStart(int gid
, int balance
, bool capture
, LinkRef tail
);
62 void EmitReference (int gid
, bool ignore
, bool reverse
);
66 void EmitIfDefined (int gid
, LinkRef tail
);
67 void EmitSub (LinkRef tail
);
68 void EmitTest (LinkRef yes
, LinkRef tail
);
69 void EmitBranch (LinkRef next
);
70 void EmitJump (LinkRef target
);
71 void EmitRepeat (int min
, int max
, bool lazy
, LinkRef until
);
72 void EmitUntil (LinkRef repeat
);
73 void EmitIn (LinkRef tail
);
74 void EmitInfo (int count
, int min
, int max
);
75 void EmitFastRepeat (int min
, int max
, bool lazy
, LinkRef tail
);
76 void EmitAnchor (bool reverse
, int offset
, LinkRef tail
);
78 // event for the CILCompiler
80 void EmitAlternationEnd();
83 void ResolveLink (LinkRef link
);
86 class InterpreterFactory
: IMachineFactory
{
87 public InterpreterFactory (ushort[] pattern
) {
88 this.pattern
= pattern
;
91 public IMachine
NewInstance () {
92 return new Interpreter (pattern
);
95 public int GroupCount
{
96 get { return pattern[0]; }
99 public IDictionary Mapping
{
100 get { return mapping; }
101 set { mapping = value; }
104 private IDictionary mapping
;
105 private ushort[] pattern
;
108 class PatternCompiler
: ICompiler
{
109 public static ushort EncodeOp (OpCode op
, OpFlags flags
) {
110 return (ushort)((int)op
| ((int)flags
& 0xff00));
113 public static void DecodeOp (ushort word
, out OpCode op
, out OpFlags flags
) {
114 op
= (OpCode
)(word
& 0x00ff);
115 flags
= (OpFlags
)(word
& 0xff00);
118 public PatternCompiler () {
119 pgm
= new ArrayList ();
122 // ICompiler implementation
124 public void Reset () {
128 public IMachineFactory
GetMachineFactory () {
129 ushort[] image
= new ushort[pgm
.Count
];
132 return new InterpreterFactory (image
);
135 public void EmitFalse () {
139 public void EmitTrue () {
143 public void EmitCharacter (char c
, bool negate
, bool ignore
, bool reverse
) {
144 Emit (OpCode
.Character
, MakeFlags (negate
, ignore
, reverse
, false));
147 c
= Char
.ToLower (c
);
152 public void EmitCategory (Category cat
, bool negate
, bool reverse
) {
153 Emit (OpCode
.Category
, MakeFlags (negate
, false, reverse
, false));
157 public void EmitRange (char lo
, char hi
, bool negate
, bool ignore
, bool reverse
) {
158 Emit (OpCode
.Range
, MakeFlags (negate
, ignore
, reverse
, false));
163 public void EmitSet (char lo
, BitArray
set, bool negate
, bool ignore
, bool reverse
) {
164 Emit (OpCode
.Set
, MakeFlags (negate
, ignore
, reverse
, false));
167 int len
= (set.Length
+ 0xf) >> 4;
171 while (len
-- != 0) {
173 for (int i
= 0; i
< 16; ++ i
) {
178 word
|= (ushort)(1 << i
);
185 public void EmitString (string str
, bool ignore
, bool reverse
) {
186 Emit (OpCode
.String
, MakeFlags (false, ignore
, reverse
, false));
187 int len
= str
.Length
;
191 str
= str
.ToLower ();
193 for (int i
= 0; i
< len
; ++ i
)
194 Emit ((ushort)str
[i
]);
197 public void EmitPosition (Position pos
) {
198 Emit (OpCode
.Position
, 0);
202 public void EmitOpen (int gid
) {
207 public void EmitClose (int gid
) {
214 public void EmitBalanceStart (int gid
, int balance
, bool capture
, LinkRef tail
) {
216 Emit (OpCode
.BalanceStart
);
218 Emit ((ushort)balance
);
219 Emit ((ushort)(capture
? 1 : 0));
223 public void EmitBalance () {
224 Emit (OpCode
.Balance
);
227 public void EmitReference (int gid
, bool ignore
, bool reverse
) {
228 Emit (OpCode
.Reference
, MakeFlags (false, ignore
, reverse
, false));
232 public void EmitIfDefined (int gid
, LinkRef tail
) {
234 Emit (OpCode
.IfDefined
);
239 public void EmitSub (LinkRef tail
) {
245 public void EmitTest (LinkRef yes
, LinkRef tail
) {
253 public void EmitBranch (LinkRef next
) {
255 Emit (OpCode
.Branch
, 0);
259 public void EmitJump (LinkRef target
) {
261 Emit (OpCode
.Jump
, 0);
265 public void EmitRepeat (int min
, int max
, bool lazy
, LinkRef until
) {
267 Emit (OpCode
.Repeat
, MakeFlags (false, false, false, lazy
));
273 public void EmitUntil (LinkRef repeat
) {
274 ResolveLink (repeat
);
278 public void EmitFastRepeat (int min
, int max
, bool lazy
, LinkRef tail
) {
280 Emit (OpCode
.FastRepeat
, MakeFlags (false, false, false, lazy
));
286 public void EmitIn (LinkRef tail
) {
292 public void EmitAnchor (bool reverse
, int offset
, LinkRef tail
) {
294 Emit (OpCode
.Anchor
, MakeFlags(false, false, reverse
, false));
296 Emit ((ushort)offset
);
299 public void EmitInfo (int count
, int min
, int max
) {
301 Emit ((ushort)count
);
306 public LinkRef
NewLink () {
307 return new PatternLinkStack ();
310 public void ResolveLink (LinkRef lref
) {
311 PatternLinkStack stack
= (PatternLinkStack
)lref
;
314 pgm
[stack
.OffsetAddress
] = (ushort)stack
.GetOffset (CurrentAddress
);
317 public void EmitBranchEnd(){}
318 public void EmitAlternationEnd(){}
322 private static OpFlags
MakeFlags (bool negate
, bool ignore
, bool reverse
, bool lazy
) {
324 if (negate
) flags
|= OpFlags
.Negate
;
325 if (ignore
) flags
|= OpFlags
.IgnoreCase
;
326 if (reverse
) flags
|= OpFlags
.RightToLeft
;
327 if (lazy
) flags
|= OpFlags
.Lazy
;
332 private void Emit (OpCode op
) {
333 Emit (op
, (OpFlags
)0);
336 private void Emit (OpCode op
, OpFlags flags
) {
337 Emit (EncodeOp (op
, flags
));
340 private void Emit (ushort word
) {
344 private int CurrentAddress
{
345 get { return pgm.Count; }
348 private void BeginLink (LinkRef lref
) {
349 PatternLinkStack stack
= (PatternLinkStack
)lref
;
350 stack
.BaseAddress
= CurrentAddress
;
353 private void EmitLink (LinkRef lref
) {
354 PatternLinkStack stack
= (PatternLinkStack
)lref
;
355 stack
.OffsetAddress
= CurrentAddress
;
356 Emit ((ushort)0); // placeholder
361 private class PatternLinkStack
: LinkStack
{
362 public PatternLinkStack () {
365 public int BaseAddress
{
366 set { link.base_addr = value; }
369 public int OffsetAddress
{
370 get { return link.offset_addr; }
371 set { link.offset_addr = value; }
374 public int GetOffset (int target_addr
) {
375 return target_addr
- link
.base_addr
;
378 // LinkStack implementation
380 protected override object GetCurrent () { return link; }
381 protected override void SetCurrent (object l
) { link = (Link)l; }
383 private struct Link
{
384 public int base_addr
;
385 public int offset_addr
;
391 private ArrayList pgm
;
394 abstract class LinkStack
: LinkRef
{
395 public LinkStack () {
396 stack
= new Stack ();
399 public void Push () {
400 stack
.Push (GetCurrent ());
404 if (stack
.Count
> 0) {
405 SetCurrent (stack
.Pop ());
412 protected abstract object GetCurrent ();
413 protected abstract void SetCurrent (object l
);
418 //Used by CILCompiler and Interpreter
419 internal struct Mark
{
420 public int Start
, End
;
423 public bool IsDefined
{
424 get { return Start >= 0 && End >= 0; }
428 get { return Start < End ? Start : End; }
432 get { return Start < End ? End - Start : Start - End; }