Umlaute raus!
[closure-html.git] / src / glisp / match.lisp
blob929297b1bccdea26ae4264dd3d52e5bba32bde94
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: GLISP; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: Very simple (non-deterministic) regular expression matching
4 ;;; Created: 1999-01-21
5 ;;; Author: Gilbert Baumann <gilbert@base-engineering.com>
6 ;;; License: LGPL (See file COPYING for details).
7 ;;; ---------------------------------------------------------------------------
8 ;;; (c) copyright 1999 by Gilbert Baumann
10 ;;; Permission is hereby granted, free of charge, to any person obtaining
11 ;;; a copy of this software and associated documentation files (the
12 ;;; "Software"), to deal in the Software without restriction, including
13 ;;; without limitation the rights to use, copy, modify, merge, publish,
14 ;;; distribute, sublicense, and/or sell copies of the Software, and to
15 ;;; permit persons to whom the Software is furnished to do so, subject to
16 ;;; the following conditions:
17 ;;;
18 ;;; The above copyright notice and this permission notice shall be
19 ;;; included in all copies or substantial portions of the Software.
20 ;;;
21 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 ;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 ;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 (in-package :GLISP)
31 ;; Syntax
32 ;; ------
34 ;; atom -- match the atom
35 ;; (p predicate) -- match, iff (funcall p elt) is non-NIL
36 ;; (& a0 .. an) -- match a0a1..an
37 ;; (/ a0 .. an) -- match a0 or a1 ... or an
38 ;; (* a0 .. an) -- iteration, match any number of (& a0 ... an)
39 ;; (+ . rest) == (/ (& . rest) (* . rest))
40 ;; (? . rest) == (/ (& . rest) (&))
41 ;; (= var subexpr) == assign the subexpr to the match variable 'var'
43 ;; not implemented:
44 ;; (- a b) -- match a, but not b
45 ;; (and a b) -- matches if a and b matches
46 ;; (or a b) == (/ a b)
47 ;; (not x) == matches if x does not match
48 ;;
50 ;; This syntax has to be merged with clex as well.
52 (defvar *match-macros* (make-hash-table :test #'eq))
54 (defmacro define-match-macro (name args &body body)
55 `(eval-when (compile load eval)
56 (setf (gethash ',name *match-macros*)
57 #'(lambda (whole)
58 (destructuring-bind ,args (cdr whole)
59 ,@body)))
60 ',name))
62 (defun symcat (&rest syms)
63 (let ((pack (dolist (k syms nil)
64 (when (symbolp k)
65 (return (symbol-package k))))))
66 (cond ((null pack)
67 (error "No package for ~S of ~S." 'symcat syms))
69 (intern (apply #'concatenate 'string (mapcar #'string syms))
70 pack)))))
72 (defun sym-equal (a b)
73 (string= (symbol-name a) (symbol-name b)))
75 (defun bau-funcall (fun &rest args)
76 (cond ((and (consp fun) (eq (car fun) 'lambda))
77 (cons fun args))
78 ((and (consp fun) (eq (car fun) 'function))
79 (cons (cadr fun) args))
81 (list* 'funcall fun args))))
83 (defun compile-srx (srx action &key (string-type 'vector) (test '#'eql))
84 (let ((vars nil))
85 (labels ((cmp (x cont-expr)
86 (cond
87 ((atom x)
88 (with-unique-names (string start end)
89 `(lambda (,string ,start ,end)
90 (declare (type fixnum ,start ,end)
91 (type ,string-type ,string))
92 (if (and (< ,start ,end)
93 ,(bau-funcall test `(aref ,string ,start) `',x))
94 ,(bau-funcall cont-expr string `(the fixnum (1+ ,start)) end)))))
96 ((sym-equal (car x) 'p)
97 (destructuring-bind (p) (cdr x)
98 (with-unique-names (string start end)
99 `(lambda (,string ,start ,end)
100 (declare (type fixnum ,start ,end)
101 (type ,string-type ,string))
102 (if (and (< ,start ,end)
103 ,(bau-funcall p `(aref ,string ,start)))
104 ,(bau-funcall cont-expr string `(the fixnum (1+ ,start)) end))))))
106 ((sym-equal (car x) '/)
107 (with-unique-names (ccfn string string2 start end end2 j)
108 `(lambda (,string ,start ,end)
109 (declare (type fixnum ,start ,end)
110 (type ,string-type ,string))
111 (labels ((,ccfn (,string2 ,j ,end2)
112 (declare (type fixnum ,j ,end2)
113 (type ,string-type ,string2))
114 ,(bau-funcall cont-expr string2 j end2)))
115 ,@(mapcar (lambda (a)
116 `(,(cmp a `#',ccfn) ,string ,start ,end))
117 (cdr x))))))
119 ((sym-equal (car x) '*)
120 (with-unique-names (ccfn string string2 start end end2 j)
121 (let ((subexpr (cons '& (cdr x))))
122 `(lambda (,string ,start ,end)
123 (declare (type fixnum ,start ,end)
124 (type ,string-type ,string))
125 (labels ((,ccfn (,string2 ,j ,end2)
126 (declare (type fixnum ,j ,end2)
127 (type ,string-type ,string2))
128 (,(cmp subexpr `#',ccfn) ,string2 ,j ,end2)
129 ,(bau-funcall cont-expr string j end)))
130 (,ccfn ,string ,start ,end))))))
132 ((sym-equal (car x) '&)
133 (case (length x)
134 (1 (with-unique-names (string start end)
135 `(lambda (,string ,start ,end)
136 (declare (type fixnum ,start ,end)
137 (type ,string-type ,string))
138 ,(bau-funcall cont-expr string start end))))
139 (2 (cmp (cadr x) cont-expr))
140 (otherwise
141 (with-unique-names (string start end)
142 `(lambda (,string ,start ,end)
143 (declare (type fixnum ,start ,end)
144 (type ,string-type ,string))
145 (,(cmp (cadr x)
146 (with-unique-names (string j end)
147 `#'(lambda (,string ,j ,end)
148 (declare (type fixnum ,j ,end)
149 (type ,string-type ,string))
150 (,(cmp (cons '& (cddr x)) cont-expr) ,string ,j ,end))))
151 ,string ,start ,end))))))
153 ((sym-equal (car x) '=)
154 (destructuring-bind (var subexpr) (cdr x)
155 (pushnew var vars)
156 (with-unique-names (string i0 end)
157 `(lambda (,string ,i0 ,end)
158 (declare (type fixnum ,i0 ,end)
159 (type ,string-type ,string))
160 (,(cmp subexpr
161 (with-unique-names (string i1 end)
162 `#'(lambda (,string ,i1 ,end)
163 (declare (type fixnum ,i1 ,end)
164 (type ,string-type ,string))
165 (setf ,(symcat var "-START") ,i0
166 ,(symcat var "-END") ,i1)
167 ,(bau-funcall cont-expr string i1 end))))
168 ,string ,i0 ,end)))))
170 ((sym-equal (car x) '+)
171 (cmp `(& ,@(cdr x) (* ,@(cdr x))) cont-expr))
173 ((sym-equal (car x) '?)
174 (cmp `(/ (&) (& ,@(cdr x))) cont-expr))
177 (let ((mmf (gethash (car x) *match-macros*)))
178 (cond (mmf
179 (cmp (funcall mmf x) cont-expr))
181 (error "Unknown symbolic regular expression: ~S." x))))) )))
183 (with-unique-names (string start end continuation match)
184 (let ((cf (cmp srx `#',continuation)))
185 `(lambda (,string ,start ,end)
186 (declare ;;#.cl-user:+optimize-very-fast+
187 (type fixnum ,start ,end)
188 (type ,string-type ,string))
189 (block ,match
190 (let ,(mapcan (lambda (var) (list (symcat var "-START") (symcat var "-END"))) vars)
191 (labels (,(with-unique-names (string j end)
192 `(,continuation (,string ,j ,end)
193 (declare (type fixnum ,j ,end)
194 (type ,string-type ,string))
195 (declare (ignore ,string))
196 (if (= ,j ,end)
197 (let ()
198 (return-from ,match ,action))))))
199 (,cf ,string ,start ,end)))
200 nil)))))))
202 (defmacro if-match ((string &key start end type (test '#'eql)) srx &body actions)
203 (let ((str (gensym "str")))
204 `(let ((,str ,string))
205 (,(compile-srx srx `(progn .,actions)
206 :string-type (or type 'vector)
207 :test test)
208 ,str
209 ,(if start start 0)
210 ,(if end end `(length ,str))))))