1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: LALR; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: LALR parser generator
5 ;;; Author: Mark Johnson <mj@cs.brown.edu>
6 ;;; ---------------------------------------------------------------------------
7 ;;; (c) 1988 Mark Johnson
10 (:use
:cl
:glisp
:runes
)
11 (:export
#:DEFINE-GRAMMAR
))
17 ;;; This is an LALR parser generator.
18 ;;; (c) 1988 Mark Johnson. mj@cs.brown.edu
19 ;;; This is *not* the property of Xerox Corporation!
21 ;;; Modified to cache the first terminals, the epsilon derivations
22 ;;; the rules that expand a category, and the items that expand
25 ;;; There is a sample grammar at the end of this file.
26 ;;; Use your text-editor to search for "Test grammar" to find it.
28 ;;; (in-package 'LALR)
29 ;;; (export '(make-parser lalr-parser *lalr-debug* grammar lexforms $ parse))
31 ;;; (shadow '(first rest))
32 ;;; (defmacro first (x) `(car ,x))
33 ;;; (defmacro rest (x) `(cdr ,x))
35 ;;; The external interface is MAKE-PARSER. It takes three arguments, a
36 ;;; CFG grammar, a list of the lexical or terminal categories, and an
37 ;;; atomic end marker. It produces a list which is the Lisp code for
38 ;;; an LALR(1) parser for that grammar. If that list is compiled, then
39 ;;; the function LALR-PARSER is defined. LALR-PARSER is a function with
40 ;;; two arguments, NEXT-INPUT and PARSE-ERROR.
42 ;;; The first argument to LALR-PARSER, NEXT-INPUT must be a function with
43 ;;; zero arguments; every time NEXT-INPUT is called it should return
44 ;;; two values, the first is the category of the next lexical
45 ;;; form in the input and the second is the value of that form.
46 ;;; Each call to NEXT-INPUT should advance one lexical item in the
47 ;;; input. When the input is consumed, NEXT-INPUT should return a
48 ;;; CONS whose CAR is the atomic end marker used in the call to MAKE-PARSER.
50 ;;; The second argument to LALR-PARSER, PARSE-ERROR will be called
51 ;;; if the parse fails because the input is ill-formed.
54 ;;; There is a sample at the end of this file.
56 ;;; definitions of constants and global variables used
58 (defconstant *TOPCAT
* '$Start
)
68 (defvar *lalr-debug
* NIL
"Inserts debugging code into parser if non-NIL")
69 (defvar stateList
'())
71 (defvar *first-terminals-cache
* nil
)
73 (defmacro fixnum
= (x y
) `(= (the fixnum
,x
) (the fixnum
,y
)))
75 (defun make-parser (grammar lex endMarker
&key
(name 'lalr-parser
))
76 "Takes a grammar and produces the Lisp code for a parser for that grammar"
77 (setq *ENDMARKER
* endMarker
)
79 ;;; cache some data that will be useful later
81 (setq gstart
(caar grammar
))
82 (setq grules
(let ((i 0))
83 (mapcar #'(lambda (r) (transform-rule r
(incf i
)))
85 (setq gcats
(get-all-cats))
88 (setq gexpansions
(make-hash-table :test
#'eq
))
89 (setq *first-terminals-cache
* (make-hash-table :test
#'equal
))
91 (setf (gethash cat gexpansions
) (compute-expansion cat
))))
93 (setq gepsilons
(remove-if-not #'derives-eps gcats
))
96 (setq gstarts
(make-hash-table :test
#'eq
))
97 (setf (gethash *ENDMARKER
* gstarts
) (list *ENDMARKER
*))
99 (setf (gethash cat gstarts
) (first-terms (list cat
)))) )
100 ;;; now actually build the parser
102 (when (and (listp *lalr-debug
*) (member 'print-table
*lalr-debug
*))
103 (Print-Table stateList
))
104 (format T
"~%; Table ready (total of ~R rules --> ~R states)."
107 (format T
"~%; Dumping: ")
108 (prog1 (build-parser name
)
111 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
113 ;;; Rules and Grammars
116 (defstruct rule no mother daughters action
)
118 (defun transform-rule (rule no
)
121 :daughters
(butlast (cddr rule
))
122 :action
(car (last rule
))))
124 (defun compute-expansion (cat)
125 (remove-if-not #'(lambda (rule)
126 (eq (rule-mother rule
) cat
))
129 (defmacro expand
(cat)
130 `(gethash ,cat gexpansions
) )
132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ;;; Properties of grammars
136 (defun get-all-cats ()
137 (labels ((try (dejaVu cat
)
138 (if (find cat dejaVu
)
140 (tryRules (cons cat dejaVu
) (compute-expansion cat
))))
141 (tryRules (dejaVu rules
)
143 (tryRules (tryCats dejaVu
(rule-daughters (car rules
)))
146 (tryCats (dejaVu cats
)
148 (tryCats (try dejaVu
(car cats
)) (cdr cats
))
152 (defun derives-eps (c)
153 "t if c can be rewritten as the null string"
154 (labels ((try (dejaVu cat
)
155 (unless (find cat dejaVu
)
157 (every #'(lambda (c1) (try (cons cat dejaVu
) c1
))
162 (defmacro derives-epsilon
(c)
163 "looks up the cache to see if c derives the null string"
164 `(member ,c gepsilons
))
166 (defun first-terms (catList)
167 "the leading terminals of an expansion of catList"
168 (labels ((firstDs (cats)
170 (if (derives-epsilon (car cats
))
171 (cons (car cats
) (firstDs (cdr cats
)))
174 (if (member cat dejaVu
)
176 (tryList (cons cat dejaVu
)
177 (mapcan #'(lambda (r)
178 (firstDs (rule-daughters r
)))
180 (tryList (dejaVu cats
)
182 (tryList (try dejaVu
(car cats
)) (cdr cats
))
184 (remove-if-not #'(lambda (term)
185 (or (eq *ENDMARKER
* term
)
187 (tryList '() (firstDs catList
)))))
189 (defun first-terminals (cat-list)
191 (if (derives-epsilon (first cat-list
))
192 (union (gethash (first cat-list
) gstarts
)
193 (first-terminals (rest cat-list
)))
194 (gethash (first cat-list
) gstarts
))
198 (defun first-terminals* (cat-list-0 cat-1
)
199 (let ((key (cons cat-list-0 cat-1
)))
200 (multiple-value-bind (v found?
) (gethash key
*first-terminals-cache
*)
202 (t (setf (gethash key
*first-terminals-cache
*)
205 (dolist (c0 cat-list-0
)
206 (setf res
(union res
(gethash c0 gstarts
)))
207 (unless (derives-epsilon c0
)
208 (return-from foo res
)))
209 (union res
(gethash cat-1 gstarts
)) )))) ))))
211 (defmacro first-terminals
* (cat-list-0 cat-1
)
212 `(let ((cat-list-0 ,cat-list-0
)
214 (let ((key (cons cat-list-0 cat-1
)))
215 (multiple-value-bind (v found?
) (gethash key
*first-terminals-cache
*)
217 (t (setf (gethash key
*first-terminals-cache
*)
220 (dolist (c0 cat-list-0
)
221 (setf res
(union res
(gethash c0 gstarts
)))
222 (unless (derives-epsilon c0
)
223 (return-from foo res
)))
224 (union res
(gethash cat-1 gstarts
)) )))) )))))
226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
228 ;;; LALR(1) parsing table constructor
231 (defstruct item rule pos la
)
233 (defmacro item-daughters
(i) `(rule-daughters (item-rule ,i
)))
235 (defmacro item-right
(i) `(nthcdr (item-pos ,i
) (item-daughters ,i
)))
237 (defmacro item-equal
(i1 i2
)
238 `(and (eq (item-rule ,i1
) (item-rule ,i2
))
239 (fixnum= (item-pos ,i1
) (item-pos ,i2
))
240 (eq (item-la ,i1
) (item-la ,i2
))))
242 (defmacro item-core-equal
(c1 c2
)
243 "T if the cores of c1 and c2 are equal"
244 `(and (eq (item-rule ,c1
) (item-rule ,c2
))
245 (fixnum= (item-pos ,c1
) (item-pos ,c2
))))
247 (defun close-items (items)
248 "computes the closure of a set of items"
249 (declare (optimize (speed 3)))
252 (let ((i (pop to-do
)))
253 (let ((rgt (item-right i
)))
255 (dolist (la (first-terminals* (rest rgt
) (item-la i
) ))
256 (dolist (r (expand (first rgt
)))
257 (unless (dolist (i items
)
258 (if (and (eq r
(item-rule i
))
260 (fixnum= (item-pos i
) 0))
262 (let ((new (make-item :rule r
:pos
0 :la la
)))
264 (push new to-do
))))))))))
266 (defun shift-items (items cat
)
267 "shifts a set of items over cat"
268 (labels ((shift-item (item)
269 (if (eq (first (item-right item
)) cat
)
270 (make-item :rule
(item-rule item
)
271 :pos
(1+ (item-pos item
))
272 :la
(item-la item
)))))
273 (let ((new-items '()))
275 (let ((n (shift-item i
)))
277 (push n new-items
))))
280 (defun items-right (items)
281 "returns the set of categories appearing to the right of the dot"
284 (let ((d (first (item-right i
))))
285 (if (and d
(not (find d right
)))
289 (defun compact-items (items)
290 "collapses items with the same core to compact items"
293 (let ((ci (dolist (s soFar
)
294 (if (item-core-equal s i
)
297 (push (item-la i
) (item-la ci
))
298 (push (make-item :rule
(item-rule i
)
300 :la
(list (item-la i
)))
303 :key
#'(lambda (i) (rule-no (item-rule i
))))))
305 (defmacro expand-citems
(citems)
306 "expands a list of compact items into items"
309 (dolist (la (item-la ci
))
310 (push (make-item :rule
(item-rule ci
)
316 (defun subsumes-citems (ci1s ci2s
)
317 "T if the sorted set of items ci2s subsumes the sorted set ci1s"
318 (and (fixnum= (length ci1s
) (length ci2s
))
319 (every #'(lambda (ci1 ci2
)
320 (and (item-core-equal ci1 ci2
)
321 (subsetp (item-la ci1
) (item-la ci2
))))
324 (defun merge-citems (ci1s ci2s
)
325 "Adds the las of ci1s to ci2s. ci2s should subsume ci1s"
326 (mapcar #'(lambda (ci1 ci2
)
327 (setf (item-la ci2
) (nunion (item-la ci1
) (item-la ci2
))))
331 ;;; The actual table construction functions
333 (defstruct state name citems shifts conflict
)
334 (defstruct shift cat where
)
336 (defparameter nextStateNo -
1)
338 ;(defun lookup (citems)
339 ; "finds a state with the same core items as citems if it exits"
340 ; (find-if #'(lambda (state)
341 ; (and (= (length citems) (length (state-citems state)))
342 ; (every #'(lambda (ci1 ci2)
343 ; (item-core-equal ci1 ci2))
344 ; citems (state-citems state))
348 (defun lookup (citems)
349 "finds a state with the same core items as citems if it exits"
350 (dolist (state stateList
)
351 (if (and (fixnum= (length citems
) (length (state-citems state
)))
352 (do ((ci1s citems
(cdr ci1s
))
353 (ci2s (state-citems state
) (cdr ci2s
)))
355 (unless (item-core-equal (car ci1s
) (car ci2s
))
359 (defun add-state (citems)
360 "creates a new state and adds it to the state list"
362 (make-state :name
(intern (format nil
"STATE-~D" (incf nextStateNo
)))
364 (push newState stateList
)
367 (defun get-state-name (items)
368 "returns the state name for this set of items"
369 (let* ((citems (compact-items items
))
370 (state (lookup citems
)))
372 (setq state
(add-state citems
))
373 (build-state state items
))
374 ((subsumes-citems citems
(state-citems state
))
377 (merge-citems citems
(state-citems state
))
378 (follow-state items
)))
382 (defun build-state (state items
)
383 "creates the states that this state can goto"
384 (let ((closure (close-items items
)))
385 (dolist (cat (items-right closure
))
386 (push (make-shift :cat cat
387 :where
(get-state-name (shift-items closure cat
)))
388 (state-shifts state
)))))
390 (defun follow-state (items)
391 "percolates look-ahead onto descendant states of this state"
392 (let ((closure (close-items items
)))
393 (dolist (cat (items-right closure
))
394 (get-state-name (shift-items closure cat
)))))
396 (defun build-table ()
397 "Actually builds the table"
399 (setq nextStateNo -
1)
400 (get-state-name (list (make-item :rule
(make-rule :no
0
402 :daughters
(list gstart
))
405 (setq stateList
(nreverse stateList
)))
407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
409 ;;; LALR(1) parsing table printer
412 (defun print-table (stateList)
413 "Prints the state table"
414 (dolist (state stateList
)
415 (format t
"~%~%~a:" (state-name state
))
416 (dolist (citem (state-citems state
))
417 (format t
"~% ~a -->~{ ~a~} .~{ ~a~}, ~{~a ~}"
418 (rule-mother (item-rule citem
))
419 (subseq (rule-daughters (item-rule citem
)) 0 (item-pos citem
))
420 (subseq (rule-daughters (item-rule citem
)) (item-pos citem
))
422 (dolist (shift (state-shifts state
))
423 (format t
"~% On ~a shift ~a" (shift-cat shift
) (shift-where shift
)))
424 (dolist (reduce (compact-items
425 (delete-if #'(lambda (i) (item-right i
))
427 (expand-citems (state-citems state
))))))
428 (format t
"~% On~{ ~a~} reduce~{ ~a~} --> ~a"
430 (rule-daughters (item-rule reduce
))
431 (rule-mother (item-rule reduce
)))))
434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
436 ;;; LALR(1) parser constructor
439 ;;; next-input performs lexical analysis. It must return two values.
440 ;;; the category and the value.
442 (defmacro define-grammar
(name lex-forms
&rest grammar
)
443 (make-parser grammar lex-forms
:eof
:name name
))
445 (defun build-parser (name)
446 "returns an lalr(1) parser. next-input must return 2 values!"
447 `(DEFUN ,name
(NEXT-INPUT PARSE-ERROR
)
448 (LET ((CAT-LA '()) ;category lookahead
449 (VAL-LA '()) ;value lookahead
450 (VAL-STACK '()) ;value stack
451 (STATE-STACK '()) ;state stack
452 (CUR-STATE ',(state-name (first stateList
)))) ;current state
453 (LABELS ((INPUT-PEEK ()
455 (MULTIPLE-VALUE-SETQ (CAT-LA VAL-LA
) (FUNCALL NEXT-INPUT
))
456 (SETF CAT-LA
(LIST CAT-LA
)
457 VAL-LA
(LIST VAL-LA
)))
460 (PUSH NAME STATE-STACK
)
462 (PUSH (POP VAL-LA
) VAL-STACK
))
463 (REDUCE-CAT (NAME CAT NDAUGHTERS ACTION
)
464 (IF (EQ CAT
',*topcat
*)
465 (RETURN-FROM ,name
(POP VAL-STACK
))
466 (LET ((DAUGHTER-VALUES '())
468 (DOTIMES (I NDAUGHTERS
)
469 (PUSH (POP VAL-STACK
) DAUGHTER-VALUES
)
470 (SETQ STATE
(POP STATE-STACK
)))
472 (PUSH (APPLY ACTION DAUGHTER-VALUES
) VAL-LA
)
473 (SETQ CUR-STATE STATE
)))))
476 ,@(mapcar #'translate-state stateList
)) )))))
478 (defun translate-state (state)
479 "translates a state into lisp code that could appear in a TAGBODY form"
480 ;;(format T " ~(~S~)" (state-name state))
482 (let ((reduces (compact-items
483 (delete-if #'(lambda (i) (item-right i
))
485 (expand-citems (state-citems state
))))))
486 (symbols-sofar '())) ; to ensure that a symbol never occurs twice
487 (labels ((translate-shift (shift)
488 (push (shift-cat shift
) symbols-sofar
)
489 `(,(shift-cat shift
) ;case key
490 (SHIFT-FROM ',(state-name state
))
491 (SETQ CUR-STATE
',(shift-where shift
))))
493 (translate-reduce (item)
494 (when (intersection (item-la item
) symbols-sofar
)
495 (format t
"~&Warning, Not LALR(1)!!: ~a, ~a --> ~{~a ~}~%"
497 (rule-mother (item-rule item
))
498 (rule-daughters (item-rule item
)))
499 (setf (item-la item
) (nset-difference (item-la item
) symbols-sofar
)))
500 (dolist (la (item-la item
))
501 (push la symbols-sofar
))
503 `(,(item-la item
) ;case key
504 (REDUCE-CAT ',(state-name state
)
505 ',(rule-mother (item-rule item
))
507 ,(rule-action (item-rule item
))))))
508 `(,(state-name state
)
510 ,@(mapcar #'translate-shift
(state-shifts state
))
511 ,@(mapcar #'translate-reduce reduces
)
512 (otherwise (funcall parse-error
)))))))