1 ;;; LALR parser generator.
2 ;;; Julian Squires / 2005
6 ;;; When we preprocess the grammar, give every symbol a unique
7 ;;; integer, and then use bitvectors for all set operations. Keep a
8 ;;; bitvector to track terminal/nonterminal-ness.
9 ;;; (any benefit to doing this?)
11 ;;; Add a suite of tests using RT.
13 ;;; Write some usage information.
15 ;;; Add some operator precedence controls.
17 ;;; Code to convert yacc file into suitable grammar.
19 (in-package :lalr-parser-generator
)
21 ;;;; Special variables.
23 (defparameter *start-symbol
* 'start
)
24 (defparameter *end-symbol
* '$
)
27 "The default grammar used by the LALR parser generator; set by
29 (defvar *first-set
* nil
)
30 (defvar *follow-set
* nil
)
31 (defvar *nullable-set
* nil
)
35 ;;; We could use a structure instead of a list here, and it would
36 ;;; probably be much more efficient. For the moment, it doesn't
39 (defstruct item
(lhs) (rhs) (dot) (lookahead))
41 (defun dot-at-end-p (item) (endp (item-dot item
)))
43 (defun symbol-at-dot (item) (car (item-dot item
)))
45 (defun advance-dot (item)
46 "Returns the item dot, advanced by one symbol. Note:
48 (cdr (item-dot item
)))
51 ;;; item sets -- see also macros.lisp.
53 (defun make-item-set (&rest items
)
54 (let ((set (make-array '(0) :adjustable t
:fill-pointer
0)))
59 (defun add-to-set (item set
)
60 "Returns position of ITEM in SET."
61 (or (dotimes (i (length set
))
62 (when (and (equal (item-lhs item
) (item-lhs (aref set i
)))
63 (equal (item-rhs item
) (item-rhs (aref set i
)))
64 (equal (item-dot item
) (item-dot (aref set i
))))
65 (unless (equal (item-lookahead item
) (item-lookahead
67 (setf (item-lookahead (aref set i
))
68 (union (item-lookahead item
)
69 (item-lookahead (aref set i
)))))
71 ;(position item set :test #'equalp)
72 (vector-push-extend item set
)))
77 (defun process-grammar (grammar)
78 "Processes GRAMMAR, returns a grammar suitable for binding to
79 *GRAMMAR. Augments the grammar with a new start rule."
80 ;; split grammar into hash table of non-terminals, terminals.
82 ;; the grammar is a list of non-terminals followed by their
85 ;; we compile the basic hash table of non-terminals by iterating
86 ;; through the grammar, storing the lists of productions.
87 (let ((grammar-hash (make-hash-table)))
88 (do ((list-> grammar
(cddr list-
>)))
90 (setf (gethash (car list-
>) grammar-hash
)
93 ;; augment grammar with start symbol
94 (dolist (i (list *start-symbol
* *end-symbol
*))
95 (assert (null (gethash i grammar-hash
)) nil
96 "~A is a reserved non-terminal, unfortunately. Try
97 calling MAKE-PARSER with a different END-SYMBOL or START-SYMBOL
99 (setf (gethash *start-symbol
* grammar-hash
)
100 (list (list (car grammar
) *end-symbol
*)))
103 (defun non-terminal-p (symbol) (gethash symbol
*grammar
*))
104 (defun grammar-productions (symbol) (gethash symbol
*grammar
*))
107 ;;;; PARSE TABLE CONSTRUCTION
109 (defun first-sets (symbol-list)
110 (do* ((x-> symbol-list
(cdr x-
>))
111 (s (and x-
> (gethash (car x-
>) *first-set
*))
112 (union s
(and x-
> (gethash (car x-
>) *first-set
*)))))
113 ((or (null x-
>) (not (gethash (car x-
>) *nullable-set
*)))
116 (defun lalr-closure (item-set)
117 "Returns the closure of ITEM-SET."
118 (do-until-unchanged (item-set)
119 (do-for-each-item (i item-set
)
120 (when (non-terminal-p (symbol-at-dot i
))
121 (dolist (r (grammar-productions (symbol-at-dot i
)))
122 (add-to-set (make-item :lhs
(symbol-at-dot i
) :rhs r
:dot r
123 :lookahead
(union (first-sets (advance-dot i
))
128 (defun lalr-goto (item-set grammar-symbol
)
129 "Returns the closure of ITEM-SET after having read GRAMMAR-SYMBOL."
130 (let ((j (make-item-set)))
131 (do-for-each-item (i item-set
)
132 (when (eql (symbol-at-dot i
) grammar-symbol
)
133 (add-to-set (make-item :lhs
(item-lhs i
) :rhs
(item-rhs i
)
135 :lookahead
(item-lookahead i
))
139 (defun make-start-item ()
140 "Makes the item S' -> .S$, as appropriate for the grammar."
141 (make-item :lhs
*start-symbol
*
142 :rhs
(first (gethash *start-symbol
* *grammar
*))
143 :dot
(first (gethash *start-symbol
* *grammar
*))))
145 (defun make-almost-done-item ()
146 "Makes the item S' -> S.$, as appropriate for the grammar."
147 (let* ((start-item (make-start-item))
148 (dot (do ((dot (advance-dot start-item
) (cdr dot
)))
149 ((or (null dot
) (eql (car dot
) *end-symbol
*)) dot
))))
150 (assert (not (null dot
)))
151 (make-item :lhs
(item-lhs start-item
) :rhs
(item-rhs start-item
)
154 ;;; The code gets progressively uglier as I refine the data
155 ;;; structures. Shame on me.
158 (defun item-set-equal (set-a set-b
)
159 (do ((a set-a
(cdr a
))
161 ((and (endp a
) (endp b
)) t
)
162 (unless (and (equal (item-lhs (car a
)) (item-lhs (car b
)))
163 (equal (item-rhs (car a
)) (item-rhs (car b
)))
164 (equal (item-dot (car a
)) (item-dot (car b
))))
167 (defun item-set-equal-ignoring-la (set-a set-b
)
169 (when (= (length set-a
) (length set-b
))
170 (dotimes (i (length set-a
))
171 (unless (and (equal (item-lhs (aref set-a i
))
172 (item-lhs (aref set-b i
)))
173 (equal (item-rhs (aref set-a i
))
174 (item-rhs (aref set-b i
)))
175 (equal (item-dot (aref set-a i
))
176 (item-dot (aref set-b i
))))
177 (return-from body nil
)))
181 (defun merge-la-in-sets (dst src
)
182 (dotimes (i (length dst
))
183 (unless (equal (item-lookahead (aref dst i
))
184 (item-lookahead (aref src i
)))
185 (setf (item-lookahead (aref dst i
))
186 (union (item-lookahead (aref dst i
))
187 (item-lookahead (aref src i
)))))))
189 (defun add-to-states (set states
)
191 (dotimes (i (length states
))
192 (when (item-set-equal-ignoring-la set
(aref states i
))
193 ;; find items which are same but for LA, merge LA.
194 (merge-la-in-sets (aref states i
) set
)
196 (vector-push-extend set states
)))
198 (defun compute-shifts ()
199 "Compute shift actions and states for the generated parser. Returns
200 a list of shift actions and the state table."
201 (let ((shift-table nil
)
202 (states (make-array '(1) :adjustable t
:fill-pointer
1
205 (make-item-set (make-start-item))))))
206 (do-until-unchanged (states shift-table
)
207 (dotimes (i (length states
))
208 (do-for-each-item (item (aref states i
))
209 (when (and (not (dot-at-end-p item
))
210 (not (eql (symbol-at-dot item
) *end-symbol
*)))
211 (let* ((x (symbol-at-dot item
))
212 (new-set (lalr-goto (aref states i
) x
))
213 (j (add-to-states new-set states
)))
214 (pushnew (list i x j
) shift-table
:test
#'equalp
))))))
215 (values shift-table states
)))
218 (defun compute-reductions (states)
219 "Compute reduce actions for the generated parser. Depends on
220 *STATE* already being filled, and returns the reduce actions."
221 (let ((reduce-table nil
))
222 (dotimes (i (length states
))
223 (do-for-each-item (item (aref states i
))
224 (when (dot-at-end-p item
)
225 (dolist (j (item-lookahead item
))
226 (pushnew (list i j item
) reduce-table
:test
#'equalp
)))))
230 (defun add-accept-actions (parse-table states
)
232 (n-states (length states
))
233 (item (make-almost-done-item)))
235 (when (find item
(aref states i
) :test
#'equalp
)
236 (add-to-parse-table parse-table n-states i
*end-symbol
* `(accept)))))
239 (defun add-to-parse-table (parse-table n-states i x action
)
240 "Adds ACTION to the parse table at (X,I). Applies braindead
241 conflict resolution rule to any conflicts detected."
242 (sunless (gethash x parse-table
)
243 (setf it
(make-array (list n-states
) :initial-element nil
)))
244 (aif (aref (gethash x parse-table
) i
)
245 ;; XXX should probably collate the number of conflicts
247 ;; XXX should resolve reduce/reduce conflicts by reducing by
249 (warn "~&Conflict at ~A,~A -> last action ~A, new action ~A."
251 ;; (assert (null (aref (gethash x parse-table) i)))
252 (setf (aref (gethash x parse-table
) i
) action
)))
255 (defun create-parse-table (shifts reductions states
)
256 "Constructs a parse table usable by PARSE, from the list of shift
257 and reduce actions, and the set of parse states."
258 (let ((parse-table (make-hash-table))
259 (n-states (length states
)))
260 (dolist (shift shifts
)
261 (destructuring-bind (i x j
) shift
262 (add-to-parse-table parse-table n-states i x
263 (list (if (non-terminal-p x
) 'goto
'shift
) j
))))
265 (dolist (reduce reductions
)
266 (destructuring-bind (i x j
) reduce
267 (add-to-parse-table parse-table n-states i x
268 `(reduce ,(item-lhs j
) ,(length (item-rhs j
))))))
269 (add-accept-actions parse-table states
)
274 ;;; XXX certainly not the most attractive way to do this, but I've
276 (defun write-parser-function (table package stream fn-name
)
277 (let* ((*package
* (find-package "LALR-PARSER-GENERATOR"))
278 (fn-name (intern (if (stringp fn-name
)
280 (symbol-name fn-name
)))))
281 (format stream
";; Automatically generated by LALR-PARSER-GENERATOR.")
282 (format stream
"~&(in-package ~S)~%" (package-name package
))
283 (pprint `(flet ((unmash (entries)
284 (let ((ht (make-hash-table)))
286 (setf (gethash (car e
) ht
) (cdr e
)))
288 (let ((table (unmash ',(let ((untable))
289 (maphash (lambda (k v
)
294 (defun ,fn-name
(next-token)
295 "NEXT-TOKEN is a function which returns a cons of the next token in
296 the input (the CAR being the symbol name, the CDR being any
297 information the lexer would like to preserve), and advances the input
298 one token. Returns what might pass for a parse tree in some
300 (do* ((stack (list 0))
301 (token (funcall next-token
))
303 (row (gethash (car token
) table
)
304 (gethash (car token
) table
)))
307 (error "~A is not a valid token in this grammar." token
))
308 (let ((action (aref row
(first stack
))))
311 (push token result-stack
)
312 (setf token
(funcall next-token
))
313 (push (second action
) stack
))
315 (push (list (second action
)) result-stack
)
316 (dotimes (i (third action
))
318 (push (pop (cdr result-stack
)) (cdar result-stack
)))
319 (destructuring-bind (goto state
)
320 (aref (gethash (second action
) table
) (first stack
))
321 (assert (eql goto
'goto
) (state) "Malformed parse table!")
323 (accept (return (car result-stack
)))
324 (t (error "Parse error at ~A" token
)))))))))))
327 (defun parse (table next-token
)
328 "TABLE is a table generated by CREATE-PARSE-TABLE, NEXT-TOKEN is a
329 function which returns a cons of the next token in the input (the CAR
330 being the symbol name, the CDR being any information the lexer would
331 like to preserve), and advances the input one token. Returns what
332 might pass for a parse tree in some countries."
333 (declare (optimize (debug 3)))
334 (do* ((stack (list 0))
335 (token (funcall next-token
))
337 (row (gethash (car token
) table
)
338 (gethash (car token
) table
)))
341 (error "~A is not a valid token in this grammar." token
))
342 (let ((action (aref row
(first stack
))))
345 (push token result-stack
)
346 (setf token
(funcall next-token
))
347 (push (second action
) stack
))
349 (push (list (second action
)) result-stack
)
350 (dotimes (i (third action
))
352 (push (pop (cdr result-stack
)) (cdar result-stack
)))
353 (destructuring-bind (goto state
)
354 (aref (gethash (second action
) table
) (first stack
))
355 (assert (eql goto
'goto
) (state) "Malformed parse table!")
357 (accept (return (car result-stack
)))
358 (t (error "Parse error at ~A" token
))))))
361 ;;;; External functions
363 (defun make-parser (grammar &key end-symbol start-symbol
364 (stream *standard-output
*)
367 (awhen end-symbol
(setf *end-symbol
* it
))
368 (awhen start-symbol
(setf *start-symbol
* it
))
369 (let ((*grammar
* (process-grammar grammar
)))
370 (process-grammar grammar
)
371 (multiple-value-bind (*first-set
* *follow-set
* *nullable-set
*)
372 (compute-prediction-sets *grammar
*)
373 (multiple-value-bind (shifts states
) (compute-shifts)
374 (let ((table (create-parse-table shifts
375 (compute-reductions states
)
377 (write-parser-function table package stream fn-name
))))))
382 (defun test-parser (grammar string
)
383 (let ((*grammar
* (process-grammar grammar
)))
384 (multiple-value-bind (*first-set
* *follow-set
* *nullable-set
*)
385 (compute-prediction-sets *grammar
*)
386 (with-input-from-string (*standard-input
* string
)
387 (multiple-value-bind (shifts states
) (compute-shifts)
388 (parse (create-parse-table shifts
389 (compute-reductions states
)
391 (lambda () (cons (read) nil
))))))))
393 (defparameter *lr0-test-grammar
*
394 '(sentence ((open list close
)
397 (list comma sentence
))))
399 (defparameter *slr-test-grammar
*
403 (defparameter *simple-nullable-test-grammar
*
411 (defparameter *simple-lalr-test-grammar
*
414 T
((n) (OPEN E CLOSE
))))