1 ;;; LALR parser generator.
2 ;;; Julian Squires / 2005
6 ;;; Add some operator precedence controls.
8 ;;; Write some usage information.
10 ;;; When we preprocess the grammar, give every symbol a unique
11 ;;; integer, and then use bitvectors for all set operations. Keep a
12 ;;; bitvector to track terminal/nonterminal-ness.
14 ;;; Code to convert yacc file into suitable grammar.
18 (in-package :lalr-parser-generator
)
20 ;;;; Special variables.
22 (defparameter *start-symbol
* 'start
)
23 (defparameter *end-symbol
* '$
)
26 "The default grammar used by the LALR parser generator; set by
28 (defvar *first-set
* nil
)
29 (defvar *follow-set
* nil
)
30 (defvar *nullable-set
* nil
)
35 (defstruct item
(lhs) (rhs) (dot) (lookahead))
37 ;;; XXX should these dot functions operate on the dot itself, rather
38 ;;; than calling item-dot? That would make it easier to hide the fact
39 ;;; that dot is just a list.
40 (defun dot-at-end-p (item) (endp (item-dot item
)))
41 (defun symbol-at-dot (item) (car (item-dot item
)))
42 (defun advance-dot (item)
43 "Returns the item dot, advanced by one symbol. Note:
45 (cdr (item-dot item
)))
48 ;;; item sets -- see also macros.lisp.
50 (defun make-item-set (&rest items
)
51 (let ((set (make-array '(0) :adjustable t
:fill-pointer
0)))
52 (dolist (i items
) (add-to-set i set
))
56 (defun items-equal-except-lookahead-p (a b
)
57 (every (lambda (x) (equal (funcall x a
) (funcall x b
)))
58 (list #'item-lhs
#'item-rhs
#'item-dot
)))
60 (defun add-to-set (item set
)
61 "Returns position of ITEM in SET."
62 (let* ((i (or (position item set
:test
#'items-equal-except-lookahead-p
)
63 (vector-push-extend item set
)))
64 (la-of-a (item-lookahead item
))
65 (la-of-b (item-lookahead (aref set i
))))
66 (unless (equal la-of-a la-of-b
)
67 (unionf (item-lookahead (aref set i
)) la-of-a
))))
69 (defun item-set-equal-ignoring-la (set-a set-b
)
70 (when (= (length set-a
) (length set-b
))
71 (every #'items-equal-except-lookahead-p set-a set-b
)))
76 (defun process-grammar (grammar)
77 "Processes GRAMMAR, returns a grammar suitable for binding to
78 *GRAMMAR*. Augments the grammar with a new start rule."
79 ;; we compile the basic hash table of non-terminals by iterating
80 ;; through the grammar, storing the lists of productions.
81 (let ((grammar-hash (make-hash-table)))
82 (do ((list-> grammar
(cddr list-
>)))
84 (setf (gethash (car list-
>) grammar-hash
)
87 (augment-grammar grammar-hash
(car grammar
))
90 (defun augment-grammar (hash first-real-symbol
)
91 ;; augment grammar with start symbol
92 (dolist (i (list *start-symbol
* *end-symbol
*))
93 (assert (null (gethash i hash
)) nil
94 "~A is a reserved non-terminal, unfortunately. Try
95 calling MAKE-PARSER with a different END-SYMBOL or START-SYMBOL
97 (setf (gethash *start-symbol
* hash
)
98 (list (list first-real-symbol
*end-symbol
*))))
100 (defun non-terminal-p (symbol) (gethash symbol
*grammar
*))
101 (defun grammar-productions (symbol) (gethash symbol
*grammar
*))
104 ;;;; PARSE TABLE CONSTRUCTION
106 (defun first-sets (symbol-list)
107 "Returns the union of the first sets of each symbol in SYMBOL-LIST,
108 until either a nullable symbol is found or we run out of symbols."
109 (do* ((x-> symbol-list
(cdr x-
>))
110 (s (and x-
> (gethash (car x-
>) *first-set
*))
111 (union s
(and x-
> (gethash (car x-
>) *first-set
*)))))
112 ((or (null x-
>) (not (gethash (car x-
>) *nullable-set
*)))
115 (defun lalr-closure (item-set)
116 "Returns the closure of ITEM-SET."
117 (do-until-unchanged (item-set)
118 (do-for-each-item (i item-set
)
119 (when (non-terminal-p (symbol-at-dot i
))
120 (dolist (r (grammar-productions (symbol-at-dot i
)))
121 (add-to-set (make-item :lhs
(symbol-at-dot i
) :rhs r
:dot r
122 :lookahead
(union (first-sets (advance-dot i
))
127 (defun lalr-goto (item-set grammar-symbol
)
128 "Returns the closure of ITEM-SET after having read GRAMMAR-SYMBOL."
129 (let ((j (make-item-set)))
130 (do-for-each-item (i item-set
)
131 (when (eql (symbol-at-dot i
) grammar-symbol
)
132 (add-to-set (make-item :lhs
(item-lhs i
) :rhs
(item-rhs i
)
134 :lookahead
(item-lookahead i
))
138 (defun make-start-item ()
139 "Makes the item S' -> .S$, as appropriate for the grammar."
140 (make-item :lhs
*start-symbol
*
141 :rhs
(first (gethash *start-symbol
* *grammar
*))
142 :dot
(first (gethash *start-symbol
* *grammar
*))))
144 (defun make-almost-done-item ()
145 "Makes the item S' -> S.$, as appropriate for the grammar."
146 (let* ((start-item (make-start-item))
147 (dot (do ((dot (advance-dot start-item
) (cdr dot
)))
148 ((or (null dot
) (eql (car dot
) *end-symbol
*)) dot
))))
149 (assert (not (null dot
)))
150 (make-item :lhs
(item-lhs start-item
) :rhs
(item-rhs start-item
)
155 (defun merge-lookahead-in-sets (src dst
)
156 (macrolet ((la (set) `(item-lookahead (aref ,set i
))))
157 (dotimes (i (length dst
))
158 (unless (equal (la dst
) (la src
))
159 (unionf (la dst
) (la src
))))))
161 (defun add-to-states (set states
)
162 "Adds SET to STATES, either by merging it with another set which is
163 identical save for look-ahead, or push it onto the end. Returns the
165 (flet ((merge-existing ()
166 (loop for i below
(length states
)
167 and other-set across states
168 when
(item-set-equal-ignoring-la set other-set
)
169 do
(merge-lookahead-in-sets set other-set
)
171 (or (merge-existing) (vector-push-extend set states
))))
173 (defun make-initial-state ()
174 (lalr-closure (make-item-set (make-start-item))))
176 (defun compute-shifts (table)
177 "Computes shift actions and states for the generated parser. Adds
178 shifts to the parse table as we find them. Returns the state table."
179 (let ((states (make-array '(1) :adjustable t
:fill-pointer
1
180 :initial-element
(make-initial-state))))
181 (do-until-unchanged (states) ;; XXX also table?
182 (dotimes (i (length states
))
183 (do-for-each-item (item (aref states i
))
184 (maybe-shift table states item i
))))
187 ;; XXX awful name; refactor.
188 (defun maybe-shift (table states item i
)
189 (unless (or (dot-at-end-p item
)
190 (eql (symbol-at-dot item
) *end-symbol
*))
191 (let* ((symbol (symbol-at-dot item
))
192 (new-set (lalr-goto (aref states i
) symbol
))
193 (j (add-to-states new-set states
))
194 (action (list (if (non-terminal-p symbol
) 'goto
'shift
) j
)))
195 (add-to-parse-table table i symbol action
))))
198 (defun compute-reductions (table states
)
199 "Compute reduce actions for the generated parser, based on STATES.
200 Fills in TABLE with the reduce actions."
201 (dotimes (i (length states
))
202 (do-for-each-item (item (aref states i
))
203 (when (dot-at-end-p item
)
204 (dolist (symbol (item-lookahead item
))
205 (let ((action `(reduce ,(item-lhs item
)
206 ,(length (item-rhs item
)))))
207 (add-to-parse-table table i symbol action
)))))))
210 (defun add-accept-actions (parse-table states
)
211 "Finds states whose next token should be $ (EOF) and adds accept
212 actions to the parse table for those states."
213 (loop with n-states
= (length states
)
214 and item
= (make-almost-done-item)
215 for i from
0 below n-states
216 when
(find item
(aref states i
) :test
#'equalp
)
217 do
(add-to-parse-table parse-table i
*end-symbol
* '(accept))))
220 (defun add-to-parse-table (parse-table state symbol action
)
221 "Adds ACTION to the parse table at (SYMBOL,STATE). Applies braindead
222 conflict resolution rule to any conflicts detected."
223 (sunless (gethash symbol parse-table
)
224 (setf it
(make-hash-table :test
'equal
)))
225 (let ((row (gethash symbol parse-table
)))
226 (awhen (gethash state row
)
227 (when (equal action it
) (return-from add-to-parse-table
))
228 (setf action
(resolve-collision action it symbol state
))
229 (warn "Preferring ~A." action
))
230 (setf (gethash state row
) action
)))
233 (defun resolve-collision (new-action old-action symbol state
)
234 ;; XXX should probably collate the number of conflicts
236 (warn "~&Conflict at ~A,~A -> last action ~A, new action ~A."
237 symbol state old-action new-action
)
238 (cond ((and (eql (car old-action
) 'shift
) (eql (car new-action
) 'reduce
))
239 old-action
) ; S/R => prefer shift.
240 ((and (eql (car old-action
) 'reduce
) (eql (car new-action
) 'reduce
))
241 ;; R/R => prefer longer reduction.
242 (if (>= (third new-action
) (third old-action
))
245 (t (error "This is an unexpected conflict (~A, ~A). Call a wizard."
246 old-action new-action
))))
249 (defun create-parse-table ()
250 "Constructs a parse table usable by PARSE."
251 (let* ((parse-table (make-hash-table))
252 (states (compute-shifts parse-table
)))
253 (compute-reductions parse-table states
)
254 (add-accept-actions parse-table states
)
259 (defun hash->tree
(table)
261 (maphash #'(lambda (k v
) (push (cons k
(typecase v
262 (hash-table (hash->tree v
))
268 (defun tree->hash
(tree)
269 (let ((ht (make-hash-table)))
271 (setf (gethash (car x
) ht
)
272 (if (listp (second x
))
277 ;;; XXX certainly not the most attractive way to do this, but I've
279 (defun write-parser-function (table package stream fn-name
)
280 (let* ((*package
* (find-package "LALR-PARSER-GENERATOR"))
281 (fn-name (intern (if (stringp fn-name
)
283 (symbol-name fn-name
)))))
284 (format stream
";; Automatically generated by LALR-PARSER-GENERATOR.")
285 (format stream
"~&(in-package ~S)~%" (package-name package
))
286 (pprint `(labels ((tree->hash
(tree)
287 (let ((ht (make-hash-table)))
289 (setf (gethash (car x
) ht
)
290 (if (listp (second x
))
294 (let ((table (tree->hash
',(hash->tree table
))))
295 (defun ,fn-name
(next-token)
296 "NEXT-TOKEN is a function which returns a cons of the next token in
297 the input (the CAR being the symbol name, the CDR being any
298 information the lexer would like to preserve), and advances the input
299 one token. Returns what might pass for a parse tree in some
301 (loop with stack
= (list 0)
302 and token
= (funcall next-token
)
304 for row
= (gethash (car token
) table
)
306 (gethash (first stack
) row
)
307 (error "~A is not a valid token in this grammar."
309 do
(case (first action
)
310 (shift (push token result-stack
)
311 (setf token
(funcall next-token
))
312 (push (second action
) stack
))
313 (reduce (push (list (second action
)) result-stack
)
314 (dotimes (i (third action
))
316 (push (pop (cdr result-stack
)) (cdar result-stack
)))
317 (destructuring-bind (goto state
)
318 (gethash (first stack
) (gethash (second action
) table
))
319 (assert (eql goto
'goto
) () "Malformed parse table!")
321 (accept (return (car result-stack
)))
322 (t (error "Parse error at ~A" token
))))))) stream
)))
325 (defun parse (table next-token
)
326 "TABLE is a table generated by CREATE-PARSE-TABLE, NEXT-TOKEN is a
327 function which returns a cons of the next token in the input (the CAR
328 being the symbol name, the CDR being any information the lexer would
329 like to preserve), and advances the input one token. Returns what
330 might pass for a parse tree in some countries."
331 (loop with stack
= (list 0)
332 and token
= (funcall next-token
)
334 for row
= (gethash (car token
) table
)
336 (gethash (first stack
) row
)
337 (error "~A is not a valid token in this grammar."
339 do
(case (first action
)
340 (shift (push token result-stack
)
341 (setf token
(funcall next-token
))
342 (push (second action
) stack
))
343 (reduce (push (list (second action
)) result-stack
)
344 (dotimes (i (third action
))
346 (push (pop (cdr result-stack
)) (cdar result-stack
)))
347 (destructuring-bind (goto state
)
348 (gethash (first stack
) (gethash (second action
) table
))
349 (assert (eql goto
'goto
) () "Malformed parse table!")
351 (accept (return (car result-stack
)))
352 (t (error "Parse error at ~A" token
)))))
354 ;;;; External functions
356 ;; XXX document this, improve interface
357 (defun make-parser (grammar &key end-symbol start-symbol
358 (stream *standard-output
*)
361 "Writes a parser for GRAMMAR onto STREAM, with symbols in PACKAGE;
362 notably, with the parser name being FN-NAME (default of PARSE)."
363 (awhen end-symbol
(setf *end-symbol
* it
))
364 (awhen start-symbol
(setf *start-symbol
* it
))
365 (let ((*grammar
* (process-grammar grammar
)))
366 (multiple-value-bind (*first-set
* *follow-set
* *nullable-set
*)
367 (compute-prediction-sets *grammar
*)
368 (let ((table (create-parse-table)))
369 (write-parser-function table package stream fn-name
)))))
374 (defun test-parser (grammar string
)
375 (let ((*grammar
* (process-grammar grammar
))
377 (multiple-value-bind (*first-set
* *follow-set
* *nullable-set
*)
378 (compute-prediction-sets *grammar
*)
379 (with-input-from-string (*standard-input
* string
)
380 (parse (create-parse-table)
381 #'(lambda () (cons (handler-case (read)
382 (end-of-file () *end-symbol
*))