2 coded by Ketmar // Vampire Avalon (psyc://ketmar.no-ip.org/~Ketmar)
3 Understanding is not required. Only obedience.
5 This program is free software. It comes without any warranty, to
6 the extent permitted by applicable law. You can redistribute it
7 and/or modify it under the terms of the Do What The Fuck You Want
8 To Public License, Version 2, as published by Sam Hocevar. See
9 http://sam.zoy.org/wtfpl/COPYING for more details.
11 " simple regular expression class
17 ^ at the start of re: match should start at the start of the line
18 $ at the end of re: match should end at the end of the line
20 any atom can be followed by '?', '+' or '*'; add '?' to non-greedy mode
26 atoms string position hasSOL hasEOL
27 matchStart matchEnd captures captureCount |
30 self error: 'use "SimpleRegExp new:" to create SimpleRegExp instance'
36 obj parseString: aStr.
40 ^match: aRex for: aStr [
42 obj := self new: aRex.
47 "-------- getters --------"
66 ^(captures at: idx) at: 1
70 ^(captures at: idx) at: 2
77 "-------- atom adder --------"
79 atoms := atoms with: aSymbol
82 addAtom: aSymbol arg: aArg [
83 atoms := atoms with: (Array with: aSymbol with: aArg)
87 "-------- char eater --------"
89 position > string size ifTrue: [ ^nil ]
90 ifFalse: [^ string at: position ].
95 position > string size ifTrue: [ ^nil ]
97 c := string at: position.
98 position := position + 1.
103 "-------- parser --------"
105 rexStr := string := aStr.
107 atoms := Array new: 0.
112 self peekChar == $? ifTrue: [
119 parserAddWild: aName arg: aArg [
120 self isNonGreedy ifTrue: [ aName := aName + 'NonGreedy' ].
121 aName := aName + ':from:'.
122 self addAtom: aName asSymbol arg: aArg
126 | c curAtom cpt cptClosed |
128 hasSOL := false. hasEOL := false.
129 self peekChar == $^ ifTrue: [
133 [ c := self nextChar ] whileNotNil: [
134 (cpt := c == $() ifTrue: [
135 (c := self nextChar) ifNil: [ self error: 'unexpected end of RegExp' ].
136 captureCount := captureCount + 1.
137 self addAtom: #captureStart: arg: captureCount.
140 case: $[ do: [ curAtom := self parseRange ];
141 case: $. do: [ curAtom := Array with: #any: with: nil ];
143 position > string size ifTrue: [ hasEOL := true. ^self ].
144 curAtom := Array with: #char: with: $$.
146 else: [:c | curAtom := Array with: #char: with: c ].
148 self peekChar == $) ifTrue: [
149 position > (string size + 1) ifFalse: [
151 ('?*+' includes: self peekChar) ifTrue: [ cptClosed := true. ] ifFalse: [ position := position - 1 ]
154 Case test: self peekChar;
155 case: $? do: [ self nextChar. self parserAddWild: 'zeroOrOne' arg: curAtom ];
156 case: $* do: [ self nextChar. self parserAddWild: 'zeroOrMore' arg: curAtom ];
157 case: $+ do: [ self nextChar.
158 self addAtom: #one:from: arg: curAtom.
159 self parserAddWild: 'zeroOrMore' arg: curAtom.
161 else: [:c | self addAtom: #one:from: arg: curAtom. ].
164 cptClosed ifFalse: [ self nextChar == $) ifFalse: [ self error: 'missing ")"' ]].
165 self addAtom: #captureEnd: arg: captureCount.
172 (c := self peekChar) ifNil: [ self error: 'unexpected end of RegExp' ].
173 (isNot := c == $^) ifTrue: [ self nextChar ].
174 (c := self peekChar) ifNil: [ self error: 'unexpected end of RegExp' ].
176 (c == $]) ifTrue: [ self nextChar. range := range + ']' ].
177 [ c := self nextChar ] whileNotNil: [
179 ^Array with: (isNot ifTrue: [ #rangeNot: ] ifFalse: [ #range: ]) with: range.
181 self peekChar == $- ifTrue: [
183 (ce := self nextChar) ifNil: [ self error: 'unexpected end of RegExp' ].
185 range := range + '-'.
186 ^Array with: (isNot ifTrue: [ #rangeNot: ] ifFalse: [ #range: ]) with: range.
189 c value to: ce value do: [:c | range := range + (Char new: c) ].
190 ] ifFalse: [ range := range + c ].
192 self error: 'unexpected end of RegExp'
196 "-------- captures --------"
199 (c := captures at: aNo) ifNil: [ captures at: aNo put: (c := Array new: 2) ].
200 c at: 1 put: position.
205 (c := captures at: aNo) ifNil: [ captures at: aNo put: (c := Array new: 2) ].
206 c at: 2 put: position - 1.
210 "-------- simple matchers --------"
212 ^self nextChar notNil
216 ^self nextChar = aChar
221 (c := self nextChar) ifNil: [ ^false ].
227 (c := self nextChar) ifNil: [ ^false ].
228 ^(aString includes: c) not
232 "-------- repeat matchers --------"
233 one: aAtom from: aPosNext [
234 (self perform: (aAtom at: 1) with: (aAtom at: 2)) ifFalse: [ ^nil ].
238 zeroOrOne: aAtom from: aPosNext [
239 (self perform: (aAtom at: 1) with: (aAtom at: 2))
241 "try to match the rest"
242 (self matchFrom: aPosNext) ifTrue: [ ^true ].
245 position := position - 1.
249 zeroOrMore: aAtom from: aPosNext [
252 self findLastMatch: aAtom.
253 [ position > stpos ] whileTrue: [
254 (self matchFrom: aPosNext) ifTrue: [ ^true ].
255 position := position - 1.
260 findLastMatch: aAtom [
261 "find the last match for aAtom"
265 [ position > string size ] whileFalse: [
266 (self perform: sym with: arg) ifFalse: [
267 position := position - 1.
274 zeroOrOneNonGreedy: aAtom from: aPosNext [
275 "try to match the rest"
276 (self matchFrom: aPosNext) ifTrue: [ ^true ].
277 (self perform: (aAtom at: 1) with: (aAtom at: 2)) ifFalse: [ ^nil ]. "failure"
279 position := position - 1.
283 zeroOrMoreNonGreedy: aAtom from: aPosNext [
287 [ (self matchFrom: aPosNext) ifTrue: [ ^true ].
288 position > string size ]
290 (self perform: sym with: arg) ifFalse: [ ^nil ]. "failure"
296 "-------- sequence matcher --------"
298 | opos res atom sym arg |
300 [ aPos > atoms size ] whileFalse: [
301 sym := (atom := atoms at: aPos) at: 1.
304 sym == #captureStart: ifTrue: [ self captureStart: arg ]
306 sym == #captureEnd: ifTrue: [ self captureEnd: arg ]
308 (res := (self perform: sym with: arg with: aPos)) ifNil: [ position := opos. ^false ].
309 res ifTrue: [ ^true ].
313 hasEOL ifTrue: [ res := position > string size ] ifFalse: [ res := true ].
314 res ifTrue: [ matchEnd := position - 1 ].
320 "-------- matcher entry point --------"
324 position := matchStart := 1.
325 captures := Array new: captureCount.
326 (self matchFrom: 1) ifTrue: [ ^true ].
327 hasSOL ifTrue: [ ^false ].
329 [ stpos > string size ] whileFalse: [
330 matchStart := position := stpos.
331 (self matchFrom: 1) ifTrue: [ ^true ].