Use CXML's rune implementation and XML parser.
[closure-html.git] / src / util / clex.lisp
blobbc54942b293a52e5fe8858ef8492b6090aec39ca
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CLEX; -*-
2 ;;; --------------------------------------------------------------------------------------
3 ;;; Title: A flex like scanner generator for Common LISP
4 ;;; Created: 1997-10-12
5 ;;; Author: Gilbert Baumann <gilbert@base-engineering.com>
6 ;;; License: LGPL (See file COPYING for details).
7 ;;; --------------------------------------------------------------------------------------
8 ;;; (c) copyright 1997-1999 by Gilbert Baumann
10 ;;; Permission is hereby granted, free of charge, to any person obtaining
11 ;;; a copy of this software and associated documentation files (the
12 ;;; "Software"), to deal in the Software without restriction, including
13 ;;; without limitation the rights to use, copy, modify, merge, publish,
14 ;;; distribute, sublicense, and/or sell copies of the Software, and to
15 ;;; permit persons to whom the Software is furnished to do so, subject to
16 ;;; the following conditions:
17 ;;;
18 ;;; The above copyright notice and this permission notice shall be
19 ;;; included in all copies or substantial portions of the Software.
20 ;;;
21 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 ;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
25 ;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26 ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27 ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 (defpackage :clex
30 (:use :glisp :runes)
31 (:export
32 #:deflexer #:backup #:begin #:initial #:bag))
34 (in-package :CLEX)
36 ;;; NOTE -- It turns out that this code is a magintude slower under CMUCL
37 ;;; compared to CLISP or ACL. Probably they do not have a good implementation of
38 ;;; bit vectors.
40 ;;; We encode our FSA's directly as linked datastructures; A state is represented by:
42 (defstruct (state (:type vector))
43 (final 0)
44 transitions ;simple alist of (sigma . next-state)
45 id ;numeric id of state
46 eps-transitions) ;list of all states reached by epsilon (empty transitions)
48 (defun state-add-link (this char that)
49 "Add a transition to state `this'; reading `char' proceeds to `that'."
50 (cond ((eq char 'eps)
51 (pushnew that (state-eps-transitions this)))
53 (dolist (k (state-transitions this)
54 (push (cons (list char) that) (state-transitions this)))
55 (when (eq (cdr k) that)
56 (pushnew char (car k))
57 (return nil))) )))
59 ;;; When constructing FSA's from regular expressions we abstract by the notation
60 ;;; of FSA's as boxen with an entry and an exit state.
62 (defstruct fsa
63 start ;entry state
64 end) ;exit state
66 (defun fsa-empty ()
67 "Accepts the empty word."
68 (let ((q (make-state)))
69 (make-fsa :start q :end q)))
71 (defun fsa-trivial (char)
72 "Accepts the trivial word consisting out of exactly one `char'."
73 (let ((q0 (make-state))
74 (q1 (make-state)))
75 (state-add-link q0 char q1)
76 (make-fsa :start q0 :end q1)))
78 (defun fsa-concat (a1 a2)
79 "Concatenation of `a1' and `a2'. Hence `a1 a2'."
80 (state-add-link (fsa-end a1) 'eps (fsa-start a2))
81 (make-fsa :start (fsa-start a1)
82 :end (fsa-end a2)))
84 (defun fsa-iterate (a)
85 "Iteration of `a'. Hence `a*'"
86 (let ((q0 (make-state))
87 (q1 (make-state)))
88 (state-add-link q0 'eps (fsa-start a))
89 (state-add-link q0 'eps q1)
90 (state-add-link q1 'eps q0)
91 (state-add-link (fsa-end a) 'eps q1)
92 (make-fsa :start q0 :end q1)))
94 (defun fsa-branch (&rest as)
95 "Alternation of a0..an; Hence `a0 | a1 | ... | an'."
96 (let ((q0 (make-state))
97 (q1 (make-state)))
98 (dolist (a as)
99 (state-add-link q0 'eps (fsa-start a))
100 (state-add-link (fsa-end a) 'eps q1))
101 (make-fsa :start q0 :end q1)))
103 ;;;; ----------------------------------------------------------------------------------------------------
104 ;;;; Converting regular expressions to (ND)FSA
105 ;;;;
107 ;;; However we choose here a Lispy syntax for regular expressions:
109 ;;; a singelton
110 ;;; (and a0 .. an) concatation
111 ;;; (or a0 .. an) alternation
112 ;;; (* a) iteration
114 ;;; Further the abbrevs.:
115 ;;; (+ a) == (and a (* a))
116 ;;; (? a) == (or a (and))
117 ;;; (a0 ... an) == (and a0 ... an)
119 ;;; When a string embeded into a regular expression is seen, the list
120 ;;; of characters is spliced in. So formally:
121 ;;; (a0 .. ai "xyz" aj .. an) == (a0 .. ai #\x #\y #\z aj .. an)
123 ;;; This is useful for matching words:
124 ;;; "foo" --> (and "foo") --> (and #\f #\o #\o) == The word 'foo'
125 ;;; or for denoting small sets:
126 ;;; (or "+-") --> (or #\+ #\-) == One of '+' or '-'
128 (defun loose-eq (x y)
129 (cond ((eq x y))
130 ((and (symbolp x) (symbolp y))
131 (string= (symbol-name x) (symbol-name y)))))
133 (defun regexp->fsa (term)
134 (setf term (regexp-expand-splicing term))
135 (cond ((and (atom term) (not (stringp term)))
136 (fsa-trivial term))
137 ((loose-eq (car term) 'AND) (regexp/and->fsa term))
138 ((loose-eq (car term) 'OR) (regexp/or->fsa term))
139 ((loose-eq (car term) '*) (fsa-iterate (regexp->fsa (cadr term))))
140 ((loose-eq (car term) '+) (regexp->fsa `(AND ,(cadr term) (* ,(cadr term)))))
141 ((loose-eq (car term) '?) (regexp->fsa `(OR (AND) ,(cadr term))))
142 ((loose-eq (car term) 'RANGE)
143 (regexp->fsa `(OR .,(loop for i from (char-code (cadr term)) to (char-code (caddr term))
144 collect (code-char i)))))
146 (regexp->fsa `(AND .,term))) ))
148 (defun regexp/or->fsa (term)
149 ;; I optimize here a bit: I recognized, that ORs are mainly just
150 ;; (large) sets of characters. The extra epsilon transitions are not
151 ;; neccessary on single atoms, so I omit them here. -- This reduces the
152 ;; number of states quite a bit in the first place.
153 (let ((q0 (make-state))
154 (q1 (make-state)))
155 (dolist (a (cdr term))
156 (cond ((atom a)
157 (state-add-link q0 a q1))
158 ((let ((a (regexp->fsa a)))
159 (state-add-link q0 'eps (fsa-start a))
160 (state-add-link (fsa-end a) 'eps q1)))))
161 (make-fsa :start q0 :end q1)))
163 (defun regexp/and->fsa (term)
164 (cond ((null (cdr term)) (fsa-empty))
165 ((null (cddr term)) (regexp->fsa (cadr term)))
166 ((fsa-concat (regexp->fsa (cadr term)) (regexp->fsa `(and .,(cddr term)))))))
168 (defun regexp-expand-splicing (term)
169 (cond ((consp term)
170 (mapcan #'(lambda (x)
171 (cond ((stringp x) (coerce x 'list))
172 ((list x))))
173 term))
174 (t term)))
176 ;;;; ----------------------------------------------------------------------------------------------------
177 ;;;; Converting a ND-FSA to a D-FSA
178 ;;;;
180 ;;; Since we have to compare and unionfy sets of states a lot, I use bit-vectors
181 ;;; to represent these sets for speed. However let me abstract that a bit:
183 ;;; (All these are defined as macros simply for speed. Inlining would be an
184 ;;; option here, when it would be reliable. With defining macros I enforce
185 ;;; inlining).
187 (defmacro make-empty-set (n)
188 "Create the empty set on the domain [0,n)."
189 `(make-array ,n :element-type 'bit :initial-element 0))
191 (defmacro nset-put (bag new)
192 "Destructively calculate bag = bag U {new}."
193 `(setf (sbit (the (simple-array bit (*)) ,bag) (the fixnum ,new)) 1))
195 (defmacro element-of-set-p (elm set)
196 "Determine whether `elm' is element of the set `set'."
197 `(eq 1 (sbit (the (simple-array bit (*)) ,set) (the fixnum ,elm))))
199 (defmacro set-size (set)
200 "Return the upper bound of the domain of `set'."
201 `(length ,set))
203 (defmacro do-bits ((var set &optional result) &body body)
204 "Iterate body with `var' over all elements of `set'."
205 (let ((g/set (gensym)))
206 `(let ((,g/set ,set))
207 (dotimes (,var (set-size ,g/set) ,result)
208 (when (element-of-set-p ,var ,g/set)
209 ,@body)))))
211 ;;; Since the sets we defined above only take non-negative integers, we have to
212 ;;; number our states. This is done once by NUMBER-STATES.
214 (defun number-states (starts)
215 "Number all state reachable form `starts', continuosly from 0. Each state got
216 it's number stuck into the `id' slot.
217 Returns two values: `n' the number of states and `tab' a table to lookup a
218 state given the number it got attached to."
219 (let ((n 0)
220 (tab (make-array 0 :adjustable t :fill-pointer 0 :initial-element nil)))
221 (labels ((walk (x)
222 (unless (state-id x)
223 (vector-push-extend x tab 300)
224 (setf (state-id x) (prog1 n (incf n)))
225 (dolist (tr (state-transitions x))
226 (walk (cdr tr)))
227 (dolist (y (state-eps-transitions x))
228 (walk y)))))
229 (dolist (s starts) (walk s))
230 (values n tab))))
232 ;;; We need to calculate the epsilon closure of a given state. Due to the
233 ;;; precise workings of our algorithm below, we only need this augmenting
234 ;;; version.
236 (defun fsa-epsilon-closure/set (x state-set)
237 "Augment the epsilon closure of the state `state' into `state-set'."
238 (unless (element-of-set-p (state-id x) state-set)
239 (nset-put state-set (state-id x))
240 (dolist (k (state-eps-transitions x))
241 (fsa-epsilon-closure/set k state-set))))
243 (defun ndfsa->dfsa (starts)
244 (let ((batch nil)
245 (known nil))
246 (multiple-value-bind (n tab) (number-states starts)
247 (labels ((name-state-set (state-set)
248 (or (cdr (assoc state-set known :test #'equal))
249 (let ((new (make-state)))
250 (push (cons state-set new) known)
251 (push state-set batch)
252 new)))
253 (add-state-set (state-set)
254 (let ((new-tr nil)
255 (new-tr-real nil)
256 (name (name-state-set state-set))
257 (new-final 0))
258 (do-bits (s0 state-set)
259 (let ((s (aref tab s0)))
260 (setf new-final (max new-final (state-final s)))
261 (dolist (tr (state-transitions s))
262 (let ((to (cdr tr)))
263 (dolist (z (car tr))
264 (let ((looked (getf new-tr z nil)))
265 (if looked
266 (fsa-epsilon-closure/set to looked)
267 (let ((sts (make-empty-set n)))
268 (fsa-epsilon-closure/set to sts)
269 (setf (getf new-tr z) sts) ))))))))
270 (setq new-tr (frob2 new-tr))
271 (do ((q new-tr (cddr q)))
272 ((null q))
273 (let ((z (car q))
274 (to (cadr q)))
275 (push (cons z (name-state-set to)) new-tr-real)))
276 (setf (state-transitions name) new-tr-real
277 (state-final name) new-final))))
278 (prog1
279 (mapcar #'(lambda (s)
280 (name-state-set (let ((sts (make-empty-set n)))
281 (fsa-epsilon-closure/set s sts)
282 sts)))
283 starts)
284 (do ()
285 ((null batch))
286 (add-state-set (pop batch)))) ))))
288 (defun frob2 (res &aux res2)
289 (do ((q res (cddr q)))
290 ((null q) res2)
291 (do ((p res2 (cddr p)))
292 ((null p)
293 (setf res2 (list* (list (car q)) (cadr q) res2)))
294 (when (equal (cadr q) (cadr p))
295 (setf (car p) (cons (car q) (car p)))
296 (return)))))
298 ;;;; ----------------------------------------------------------------------------------------------------
299 ;;;; API
300 ;;;;
302 ;;; Features to think about:
303 ;;; - case insensitive scanner
304 ;;; - compression of tables
305 ;;; - debugging aids
306 ;;; - non-interactive high speed scanning?
307 ;;; - make BAG a macro? So that non used bags are not considered?
308 ;;; - REJECT?
309 ;;; - support for include?
310 ;;; - support for putting back input?
311 ;;; - count lines/columns? Track source?
312 ;;; - richer set of regexp primitives e.g. "[a-z]" style sets
313 ;;; - could we offer complement regexp?
314 ;;; - trailing context
315 ;;; - sub-state stacks?
316 ;;; - user variables to include ['global' / 'lexical']
317 ;;; - identifing sub-expression of regexps (ala \(..\) and \n)
320 #-(OR CMU GCL)
321 (defun loadable-states-form (starts)
322 `',starts)
324 #+(OR CMU GCL)
325 ;; Leider ist das CMUCL so dumm, dass es scheinbar nicht faehig ist die
326 ;; selbstbezuegliche Structur ',starts in ein FASL file zu dumpen ;-(
327 ;; Deswegen hier dieser read-from-string Hack.
328 (defun loadable-states-form (starts)
329 `(LET ((*PACKAGE* (FIND-PACKAGE ',(package-name *package*))))
330 (READ-FROM-STRING ',(let ((*print-circle* t)
331 (*print-readably* t)
332 (*print-pretty* nil))
333 (prin1-to-string starts)))))
335 (defmacro old/deflexer (name macro-defs &rest rule-defs)
336 (let ((macros nil) starts clauses (n-fin 0))
337 (dolist (k macro-defs)
338 (push (cons (car k) (sublis macros (cadr k))) macros))
339 ;;canon clauses -- each element of rule-defs becomes (start expr end action)
340 (setq rule-defs
341 (mapcar #'(lambda (x)
342 (cond ((and (consp (car x)) (eq (caar x) 'in))
343 (list (cadar x) (sublis macros (caddar x)) (progn (incf n-fin) n-fin) (cdr x)))
344 ((list 'initial (sublis macros (car x)) (progn (incf n-fin) n-fin) (cdr x)))))
345 (reverse rule-defs)))
346 ;;collect all start states in alist (<name> . <state>)
347 (setq starts (mapcar #'(lambda (name)
348 (cons name (make-state)))
349 (remove-duplicates (mapcar #'car rule-defs))))
350 ;;build the nd-fsa's
351 (dolist (r rule-defs)
352 (destructuring-bind (start expr end action) r
353 (let ((q0 (cdr (assoc start starts)))
354 (fsa (regexp->fsa `(and ,expr))))
355 ;;link start state
356 (state-add-link q0 'eps (fsa-start fsa))
357 ;;mark final state
358 (setf (state-final (fsa-end fsa)) end)
359 ;; build a clause for CASE
360 (push `((,end) .,action) clauses))))
361 ;; hmm... we have to sort the final states after building the dfsa
362 ;; or introduce fixnum identifier and instead of union take the minimum
363 ;; above in ndfsa->dfsa.
364 (progn
365 (mapcar #'(lambda (x y) (setf (cdr x) y))
366 starts (ndfsa->dfsa (mapcar #'cdr starts))))
367 ;; (print (number-states starts))
368 `(DEFUN ,(intern (format nil "MAKE-~A-LEXER" name)) (INPUT)
369 (LET* ((STARTS ,(loadable-states-form starts))
370 (SUB-STATE 'INITIAL)
371 (STATE NIL)
372 (LOOK-AHEAD NIL)
373 (BAGG/CH (G/MAKE-STRING 100 :FILL-POINTER 0 :ADJUSTABLE T))
374 (BAGG/STATE (MAKE-ARRAY 100 :FILL-POINTER 0 :ADJUSTABLE T))
375 (CH NIL))
376 #'(LAMBDA ()
377 (BLOCK NIL
378 (LABELS ((BEGIN (X)
379 (SETQ SUB-STATE X))
380 (BACKUP (CH)
381 (COND ((STRINGP CH)
382 (WHEN (> (LENGTH CH) 0)
383 (PUSH (CONS 0 CH) LOOK-AHEAD)))
384 (T (PUSH CH LOOK-AHEAD))))
385 (PUSH* (CH STATE)
386 (VECTOR-PUSH-EXTEND CH BAGG/CH 10)
387 (VECTOR-PUSH-EXTEND STATE BAGG/STATE 10) )
388 (POP*/CH ()
389 (LET ((FP (LENGTH BAGG/CH)))
390 (PROG1 (AREF BAGG/CH (1- FP))
391 (SETF (FILL-POINTER BAGG/STATE) (1- FP))
392 (SETF (FILL-POINTER BAGG/CH) (1- FP)))))
393 (TOS*/STATE ()
394 (AREF BAGG/STATE (1- (LENGTH BAGG/STATE))) )
395 (EMPTY*? ()
396 (= (LENGTH BAGG/CH) 0))
397 (REWIND* ()
398 (SETF (FILL-POINTER BAGG/CH) 0)
399 (SETF (FILL-POINTER BAGG/STATE) 0) )
400 (STRING* ()
401 (COPY-SEQ BAGG/CH))
402 #+(OR)
403 (FIND-NEXT-STATE (CH STATE)
404 (DOLIST (K (STATE-TRANSITIONS STATE))
405 (WHEN (MEMBER CH (CAR K))
406 (RETURN (CDR K)))))
407 (GETCH ()
408 (COND ((NULL LOOK-AHEAD) (READ-CHAR INPUT NIL NIL))
409 ((CONSP (CAR LOOK-AHEAD))
410 (LET ((S (CDAR LOOK-AHEAD)))
411 (PROG1
412 (AREF S (CAAR LOOK-AHEAD))
413 (INCF (CAAR LOOK-AHEAD))
414 (WHEN (= (CAAR LOOK-AHEAD) (LENGTH S))
415 (POP LOOK-AHEAD)))))
416 (T (POP LOOK-AHEAD)) )))
417 (DECLARE (INLINE BACKUP GETCH))
418 (TAGBODY
419 START (SETQ STATE (CDR (ASSOC SUB-STATE STARTS)))
420 (WHEN (NULL STATE)
421 (ERROR "Sub-state ~S is not defined." SUB-STATE))
422 (REWIND*)
423 LOOP (SETQ CH (GETCH))
424 (LET ((NEXT-STATE
425 (BLOCK FOO
426 (DOLIST (K (STATE-TRANSITIONS STATE))
427 (DOLIST (Q (CAR K))
428 (WHEN (EQL CH Q)
429 (RETURN-FROM FOO (CDR K)))))) ))
430 (COND ((NULL NEXT-STATE)
431 (BACKUP CH)
432 (DO ()
433 ((OR (EMPTY*?) (NOT (EQ 0 (TOS*/STATE)))))
434 (BACKUP (POP*/CH)))
435 (COND ((AND (EMPTY*?) (NULL CH))
436 (RETURN :EOF))
437 ((EMPTY*?)
438 (ERROR "oops ~S ~S" ch (mapcar #'car (state-transitions state))))
440 (LET ((HALTING-STATE (TOS*/STATE)))
441 (LET ((BAG* NIL))
442 (SYMBOL-MACROLET ((BAG (IF BAG*
443 BAG*
444 (SETF BAG* (STRING*)))))
445 (CASE HALTING-STATE
446 ,@clauses)))
447 (GO START)))))
449 (PUSH* CH (STATE-FINAL NEXT-STATE))
450 (SETQ STATE NEXT-STATE)
451 (GO LOOP))))))))))))
453 ;;;; ----------------------------------------------------------------------------------------------------
454 ;;;;
456 (defun parse-char-set (string i)
457 (let ((res nil)
458 (complement-p nil))
459 (incf i) ;skip '['
460 ;;the first char is special
461 (cond ((char= (char string i) #\]) (incf i) (push #\] res))
462 ((char= (char string i) #\^) (incf i) (setq complement-p t))
463 ((char= (char string i) #\-) (incf i) (push #\- res)))
464 (do ()
465 ((char= (char string i) #\])
466 (values (if complement-p (cons 'cset res) (cons 'set res)) (+ i 1)))
467 (cond ((char= (char string (+ i 1)) #\-)
468 ;;it's a range
469 (push (cons (char string i) (char string (+ i 2))) res)
470 (incf i 3))
472 ;;singleton
473 (push (char string i) res)
474 (incf i))))))
476 ;;;; ------------------------------------------------------------------------------------------
478 (defparameter *full-table-p* t)
480 (defun mungle-transitions (trs)
481 (if *full-table-p*
482 (let ((res (make-array 256 :initial-element nil)))
483 (dolist (tr trs)
484 (dolist (ch (car tr))
485 (setf (aref res (char-code ch)) (cdr tr))))
486 res)
487 trs))
489 (defun over-all-states (fun starts)
490 ;; Apply `fun' to each state reachable from starts.
491 (let ((yet nil))
492 (labels ((walk (q)
493 (unless (member q yet)
494 (push q yet)
495 (let ((trs (state-transitions q)))
496 (funcall fun q)
497 (dolist (tr trs)
498 (walk (cdr tr)))))))
499 (mapc #'walk starts))))
501 (defmacro deflexer (name macro-defs &rest rule-defs)
502 (let ((macros nil) starts clauses (n-fin 0))
503 (dolist (k macro-defs)
504 (push (cons (car k) (sublis macros (cadr k))) macros))
505 ;;canon clauses -- each element of rule-defs becomes (start expr end action)
506 (setq rule-defs
507 (mapcar #'(lambda (x)
508 (cond ((and (consp (car x)) (string-equal (caar x) :in))
509 (list (cadar x) (sublis macros (caddar x)) (progn (incf n-fin) n-fin) (cdr x)))
510 ((list 'initial (sublis macros (car x)) (progn (incf n-fin) n-fin) (cdr x)))))
511 (reverse rule-defs)))
512 ;;collect all start states in alist (<name> . <state>)
513 (setq starts (mapcar #'(lambda (name)
514 (cons name (make-state)))
515 (remove-duplicates (mapcar #'car rule-defs))))
516 ;;build the nd-fsa's
517 (dolist (r rule-defs)
518 (destructuring-bind (start expr end action) r
519 (let ((q0 (cdr (assoc start starts)))
520 (fsa (regexp->fsa `(and ,expr))))
521 ;;link start state
522 (state-add-link q0 'eps (fsa-start fsa))
523 ;;mark final state
524 (setf (state-final (fsa-end fsa)) end)
525 ;; build a clause for CASE
526 (push `((,end) .,action) clauses))))
527 ;; hmm... we have to sort the final states after building the dfsa
528 ;; or introduce fixnum identifier and instead of union take the minimum
529 ;; above in ndfsa->dfsa.
530 (progn
531 (mapcar #'(lambda (x y) (setf (cdr x) y))
532 starts (ndfsa->dfsa (mapcar #'cdr starts))))
533 ;;(terpri)(princ `(,(number-states starts) states))(finish-output)
534 (let ((n 0))
535 (over-all-states (lambda (state)
536 (incf n)
537 (setf (state-transitions state)
538 (mungle-transitions (state-transitions state))))
539 (mapcar #'cdr starts))
540 (format T "~&~D states." n))
541 `(DEFUN ,(intern (format nil "MAKE-~A-LEXER" name)) (INPUT)
542 (LET* ((STARTS ,(loadable-states-form starts))
543 (SUB-STATE 'INITIAL)
544 (STATE NIL)
545 (LOOK-AHEAD NIL)
546 (BAGG/CH (G/MAKE-STRING 100 :FILL-POINTER 0 :ADJUSTABLE T))
547 (BAGG/STATE (MAKE-ARRAY 100 :FILL-POINTER 0 :ADJUSTABLE T))
548 (CH NIL))
549 #'(LAMBDA ()
550 (BLOCK NIL
551 (LABELS ((BEGIN (X)
552 (SETQ SUB-STATE X))
553 (BACKUP (CH)
554 (COND ((STRINGP CH)
555 (WHEN (> (LENGTH CH) 0)
556 (PUSH (CONS 0 CH) LOOK-AHEAD)))
557 (T (PUSH CH LOOK-AHEAD))))
558 (PUSH* (CH STATE)
559 (VECTOR-PUSH-EXTEND CH BAGG/CH 10)
560 (VECTOR-PUSH-EXTEND STATE BAGG/STATE 10) )
561 (POP*/CH ()
562 (LET ((FP (LENGTH BAGG/CH)))
563 (PROG1 (CHAR BAGG/CH (1- FP))
564 (SETF (FILL-POINTER BAGG/STATE) (1- FP))
565 (SETF (FILL-POINTER BAGG/CH) (1- FP)))))
566 (TOS*/STATE ()
567 (AREF BAGG/STATE (1- (LENGTH BAGG/STATE))) )
568 (EMPTY*? ()
569 (= (LENGTH BAGG/CH) 0))
570 (REWIND* ()
571 (SETF (FILL-POINTER BAGG/CH) 0)
572 (SETF (FILL-POINTER BAGG/STATE) 0) )
573 (STRING* ()
574 (COPY-SEQ BAGG/CH))
575 (GETCH ()
576 (COND ((NULL LOOK-AHEAD) (READ-CHAR INPUT NIL NIL))
577 ((CONSP (CAR LOOK-AHEAD))
578 (LET ((S (CDAR LOOK-AHEAD)))
579 (PROG1
580 (CHAR S (CAAR LOOK-AHEAD))
581 (INCF (CAAR LOOK-AHEAD))
582 (WHEN (= (CAAR LOOK-AHEAD) (LENGTH S))
583 (POP LOOK-AHEAD)))))
584 (T (POP LOOK-AHEAD)) ))
585 ,(if *full-table-p*
586 `(FIND-NEXT-STATE (STATE CH)
587 (IF (CHARACTERP CH)
588 (SVREF (STATE-TRANSITIONS STATE) (CHAR-CODE CH))
589 NIL))
590 `(FIND-NEXT-STATE (STATE CH)
591 (BLOCK FOO
592 (DOLIST (K (STATE-TRANSITIONS STATE))
593 (DOLIST (Q (CAR K))
594 (WHEN (CHAR= CH Q)
595 (RETURN-FROM FOO (CDR K)))))))) )
596 (DECLARE (INLINE BACKUP GETCH FIND-NEXT-STATE))
597 (TAGBODY
598 START (SETQ STATE (CDR (ASSOC SUB-STATE STARTS)))
599 (WHEN (NULL STATE)
600 (ERROR "Sub-state ~S is not defined." SUB-STATE))
601 (REWIND*)
602 LOOP (SETQ CH (GETCH))
603 (LET ((NEXT-STATE (FIND-NEXT-STATE STATE CH)) )
604 (COND ((NULL NEXT-STATE)
605 (BACKUP CH)
606 (DO ()
607 ((OR (EMPTY*?) (NOT (EQ 0 (TOS*/STATE)))))
608 (BACKUP (POP*/CH)))
609 (COND ((AND (EMPTY*?) (NULL CH))
610 (RETURN :EOF))
611 ((EMPTY*?)
612 (ERROR "oops ~S ~S" ch (mapcar #'car (state-transitions state))))
614 (LET ((HALTING-STATE (TOS*/STATE)))
615 (LET ((BAG* NIL))
616 (SYMBOL-MACROLET ((BAG (IF BAG*
617 BAG*
618 (SETF BAG* (STRING*)))))
619 (CASE HALTING-STATE
620 ,@clauses)))
621 (GO START)))))
623 (PUSH* CH (STATE-FINAL NEXT-STATE))
624 (SETQ STATE NEXT-STATE)
625 (GO LOOP))))))))))))