1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Very simple (non-deterministic) regular expression matching
4 ;;; Created: 1999-01-21
5 ;;; Author: Gilbert Baumann <gilbert@base-engineering.com>
6 ;;; License: LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1999 by Gilbert Baumann
10 ;;; Permission is hereby granted, free of charge, to any person obtaining
11 ;;; a copy of this software and associated documentation files (the
12 ;;; "Software"), to deal in the Software without restriction, including
13 ;;; without limitation the rights to use, copy, modify, merge, publish,
14 ;;; distribute, sublicense, and/or sell copies of the Software, and to
15 ;;; permit persons to whom the Software is furnished to do so, subject to
16 ;;; the following conditions:
18 ;;; The above copyright notice and this permission notice shall be
19 ;;; included in all copies or substantial portions of the Software.
21 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 ;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 ;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
34 ;; atom -- match the atom
35 ;; (p predicate) -- match, iff (funcall p elt) is non-NIL
36 ;; (& a0 .. an) -- match a0a1..an
37 ;; (/ a0 .. an) -- match a0 or a1 ... or an
38 ;; (* a0 .. an) -- iteration, match any number of (& a0 ... an)
39 ;; (+ . rest) == (/ (& . rest) (* . rest))
40 ;; (? . rest) == (/ (& . rest) (&))
41 ;; (= var subexpr) == assign the subexpr to the match variable 'var'
44 ;; (- a b) -- match a, but not b
45 ;; (and a b) -- matches if a and b matches
46 ;; (or a b) == (/ a b)
47 ;; (not x) == matches if x does not match
50 ;; This syntax has to be merged with clex as well.
52 (defvar *match-macros
* (make-hash-table :test
#'eq
))
54 (defmacro define-match-macro
(name args
&body body
)
55 `(eval-when (compile load eval
)
56 (setf (gethash ',name
*match-macros
*)
58 (destructuring-bind ,args
(cdr whole
)
62 (defun symcat (&rest syms
)
63 (let ((pack (dolist (k syms nil
)
65 (return (symbol-package k
))))))
67 (error "No package for ~S of ~S." 'symcat syms
))
69 (intern (apply #'concatenate
'string
(mapcar #'string syms
))
72 (defun sym-equal (a b
)
73 (string= (symbol-name a
) (symbol-name b
)))
75 (defun bau-funcall (fun &rest args
)
76 (cond ((and (consp fun
) (eq (car fun
) 'lambda
))
78 ((and (consp fun
) (eq (car fun
) 'function
))
79 (cons (cadr fun
) args
))
81 (list* 'funcall fun args
))))
83 (defun compile-srx (srx action
&key
(string-type 'vector
) (test '#'eql
))
85 (labels ((cmp (x cont-expr
)
88 (with-unique-names (string start end
)
89 `(lambda (,string
,start
,end
)
90 (declare (type fixnum
,start
,end
)
91 (type ,string-type
,string
))
92 (if (and (< ,start
,end
)
93 ,(bau-funcall test
`(aref ,string
,start
) `',x
))
94 ,(bau-funcall cont-expr string
`(the fixnum
(1+ ,start
)) end
)))))
96 ((sym-equal (car x
) 'p
)
97 (destructuring-bind (p) (cdr x
)
98 (with-unique-names (string start end
)
99 `(lambda (,string
,start
,end
)
100 (declare (type fixnum
,start
,end
)
101 (type ,string-type
,string
))
102 (if (and (< ,start
,end
)
103 ,(bau-funcall p
`(aref ,string
,start
)))
104 ,(bau-funcall cont-expr string
`(the fixnum
(1+ ,start
)) end
))))))
106 ((sym-equal (car x
) '/)
107 (with-unique-names (ccfn string string2 start end end2 j
)
108 `(lambda (,string
,start
,end
)
109 (declare (type fixnum
,start
,end
)
110 (type ,string-type
,string
))
111 (labels ((,ccfn
(,string2
,j
,end2
)
112 (declare (type fixnum
,j
,end2
)
113 (type ,string-type
,string2
))
114 ,(bau-funcall cont-expr string2 j end2
)))
115 ,@(mapcar (lambda (a)
116 `(,(cmp a
`#',ccfn
) ,string
,start
,end
))
119 ((sym-equal (car x
) '*)
120 (with-unique-names (ccfn string string2 start end end2 j
)
121 (let ((subexpr (cons '& (cdr x
))))
122 `(lambda (,string
,start
,end
)
123 (declare (type fixnum
,start
,end
)
124 (type ,string-type
,string
))
125 (labels ((,ccfn
(,string2
,j
,end2
)
126 (declare (type fixnum
,j
,end2
)
127 (type ,string-type
,string2
))
128 (,(cmp subexpr
`#',ccfn
) ,string2
,j
,end2
)
129 ,(bau-funcall cont-expr string j end
)))
130 (,ccfn
,string
,start
,end
))))))
132 ((sym-equal (car x
) '&)
134 (1 (with-unique-names (string start end
)
135 `(lambda (,string
,start
,end
)
136 (declare (type fixnum
,start
,end
)
137 (type ,string-type
,string
))
138 ,(bau-funcall cont-expr string start end
))))
139 (2 (cmp (cadr x
) cont-expr
))
141 (with-unique-names (string start end
)
142 `(lambda (,string
,start
,end
)
143 (declare (type fixnum
,start
,end
)
144 (type ,string-type
,string
))
146 (with-unique-names (string j end
)
147 `#'(lambda (,string
,j
,end
)
148 (declare (type fixnum
,j
,end
)
149 (type ,string-type
,string
))
150 (,(cmp (cons '& (cddr x
)) cont-expr
) ,string
,j
,end
))))
151 ,string
,start
,end
))))))
153 ((sym-equal (car x
) '=)
154 (destructuring-bind (var subexpr
) (cdr x
)
156 (with-unique-names (string i0 end
)
157 `(lambda (,string
,i0
,end
)
158 (declare (type fixnum
,i0
,end
)
159 (type ,string-type
,string
))
161 (with-unique-names (string i1 end
)
162 `#'(lambda (,string
,i1
,end
)
163 (declare (type fixnum
,i1
,end
)
164 (type ,string-type
,string
))
165 (setf ,(symcat var
"-START") ,i0
166 ,(symcat var
"-END") ,i1
)
167 ,(bau-funcall cont-expr string i1 end
))))
168 ,string
,i0
,end
)))))
170 ((sym-equal (car x
) '+)
171 (cmp `(& ,@(cdr x
) (* ,@(cdr x
))) cont-expr
))
173 ((sym-equal (car x
) '?
)
174 (cmp `(/ (&) (& ,@(cdr x
))) cont-expr
))
177 (let ((mmf (gethash (car x
) *match-macros
*)))
179 (cmp (funcall mmf x
) cont-expr
))
181 (error "Unknown symbolic regular expression: ~S." x
))))) )))
183 (with-unique-names (string start end continuation match
)
184 (let ((cf (cmp srx
`#',continuation
)))
185 `(lambda (,string
,start
,end
)
186 (declare ;;#.cl-user:+optimize-very-fast+
187 (type fixnum
,start
,end
)
188 (type ,string-type
,string
))
190 (let ,(mapcan (lambda (var) (list (symcat var
"-START") (symcat var
"-END"))) vars
)
191 (labels (,(with-unique-names (string j end
)
192 `(,continuation
(,string
,j
,end
)
193 (declare (type fixnum
,j
,end
)
194 (type ,string-type
,string
))
195 (declare (ignore ,string
))
198 (return-from ,match
,action
))))))
199 (,cf
,string
,start
,end
)))
202 (defmacro if-match
((string &key start end type
(test '#'eql
)) srx
&body actions
)
203 (let ((str (gensym "str")))
204 `(let ((,str
,string
))
205 (,(compile-srx srx
`(progn .
,actions
)
206 :string-type
(or type
'vector
)
210 ,(if end end
`(length ,str
))))))