Load GIF images using the Skippy library, instead of the external
[closure-html.git] / src / util / lalr.lisp
blobf0302d2a95b5ede645b1602c31a09f27b0172ef3
1 ;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: LALR; -*-
2 ;;; ---------------------------------------------------------------------------
3 ;;; Title: LALR parser generator
4 ;;; Created: 1988
5 ;;; Author: Mark Johnson <mj@cs.brown.edu>
6 ;;; ---------------------------------------------------------------------------
7 ;;; (c) 1988 Mark Johnson
9 (defpackage :lalr
10 (:use :cl :glisp :runes)
11 (:export #:DEFINE-GRAMMAR))
13 (in-package :LALR)
15 ;;; lalr.lisp
16 ;;;
17 ;;; This is an LALR parser generator.
18 ;;; (c) 1988 Mark Johnson. mj@cs.brown.edu
19 ;;; This is *not* the property of Xerox Corporation!
21 ;;; Modified to cache the first terminals, the epsilon derivations
22 ;;; the rules that expand a category, and the items that expand
23 ;;; a category
25 ;;; There is a sample grammar at the end of this file.
26 ;;; Use your text-editor to search for "Test grammar" to find it.
28 ;;; (in-package 'LALR)
29 ;;; (export '(make-parser lalr-parser *lalr-debug* grammar lexforms $ parse))
31 ;;; (shadow '(first rest))
32 ;;; (defmacro first (x) `(car ,x))
33 ;;; (defmacro rest (x) `(cdr ,x))
35 ;;; The external interface is MAKE-PARSER. It takes three arguments, a
36 ;;; CFG grammar, a list of the lexical or terminal categories, and an
37 ;;; atomic end marker. It produces a list which is the Lisp code for
38 ;;; an LALR(1) parser for that grammar. If that list is compiled, then
39 ;;; the function LALR-PARSER is defined. LALR-PARSER is a function with
40 ;;; two arguments, NEXT-INPUT and PARSE-ERROR.
41 ;;;
42 ;;; The first argument to LALR-PARSER, NEXT-INPUT must be a function with
43 ;;; zero arguments; every time NEXT-INPUT is called it should return
44 ;;; two values, the first is the category of the next lexical
45 ;;; form in the input and the second is the value of that form.
46 ;;; Each call to NEXT-INPUT should advance one lexical item in the
47 ;;; input. When the input is consumed, NEXT-INPUT should return a
48 ;;; CONS whose CAR is the atomic end marker used in the call to MAKE-PARSER.
49 ;;;
50 ;;; The second argument to LALR-PARSER, PARSE-ERROR will be called
51 ;;; if the parse fails because the input is ill-formed.
52 ;;;
53 ;;;
54 ;;; There is a sample at the end of this file.
56 ;;; definitions of constants and global variables used
58 (defconstant *TOPCAT* '$Start)
59 (defvar *ENDMARKER*)
60 (defvar glex)
61 (defvar grules)
62 (defvar gstart)
63 (defvar gstarts)
64 (defvar gcats)
65 (defvar gfirsts)
66 (defvar gepsilons)
67 (defvar gexpansions)
68 (defvar *lalr-debug* NIL "Inserts debugging code into parser if non-NIL")
69 (defvar stateList '())
71 (defvar *first-terminals-cache* nil)
73 (defmacro fixnum= (x y) `(= (the fixnum ,x) (the fixnum ,y)))
75 (defun make-parser (grammar lex endMarker &key (name 'lalr-parser))
76 "Takes a grammar and produces the Lisp code for a parser for that grammar"
77 (setq *ENDMARKER* endMarker)
79 ;;; cache some data that will be useful later
80 (setq glex lex)
81 (setq gstart (caar grammar))
82 (setq grules (let ((i 0))
83 (mapcar #'(lambda (r) (transform-rule r (incf i)))
84 grammar)))
85 (setq gcats (get-all-cats))
87 (progn
88 (setq gexpansions (make-hash-table :test #'eq))
89 (setq *first-terminals-cache* (make-hash-table :test #'equal))
90 (dolist (cat gcats)
91 (setf (gethash cat gexpansions) (compute-expansion cat))))
93 (setq gepsilons (remove-if-not #'derives-eps gcats))
95 (progn
96 (setq gstarts (make-hash-table :test #'eq))
97 (setf (gethash *ENDMARKER* gstarts) (list *ENDMARKER*))
98 (dolist (cat gcats)
99 (setf (gethash cat gstarts) (first-terms (list cat)))) )
100 ;;; now actually build the parser
101 (build-table)
102 (when (and (listp *lalr-debug*) (member 'print-table *lalr-debug*))
103 (Print-Table stateList))
104 (format T "~%; Table ready (total of ~R rules --> ~R states)."
105 (length grammar)
106 (length stateList))
107 (format T "~%; Dumping: ")
108 (prog1 (build-parser name)
109 (format T "~&") ))
111 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
113 ;;; Rules and Grammars
116 (defstruct rule no mother daughters action)
118 (defun transform-rule (rule no)
119 (make-rule :no no
120 :mother (first rule)
121 :daughters (butlast (cddr rule))
122 :action (car (last rule))))
124 (defun compute-expansion (cat)
125 (remove-if-not #'(lambda (rule)
126 (eq (rule-mother rule) cat))
127 grules))
129 (defmacro expand (cat)
130 `(gethash ,cat gexpansions) )
132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
134 ;;; Properties of grammars
136 (defun get-all-cats ()
137 (labels ((try (dejaVu cat)
138 (if (find cat dejaVu)
139 dejaVu
140 (tryRules (cons cat dejaVu) (compute-expansion cat))))
141 (tryRules (dejaVu rules)
142 (if rules
143 (tryRules (tryCats dejaVu (rule-daughters (car rules)))
144 (cdr rules))
145 dejaVu))
146 (tryCats (dejaVu cats)
147 (if cats
148 (tryCats (try dejaVu (car cats)) (cdr cats))
149 dejaVu)))
150 (try '() gstart)))
152 (defun derives-eps (c)
153 "t if c can be rewritten as the null string"
154 (labels ((try (dejaVu cat)
155 (unless (find cat dejaVu)
156 (some #'(lambda (r)
157 (every #'(lambda (c1) (try (cons cat dejaVu) c1))
158 (rule-daughters r)))
159 (expand cat)))))
160 (try '() c)))
162 (defmacro derives-epsilon (c)
163 "looks up the cache to see if c derives the null string"
164 `(member ,c gepsilons))
166 (defun first-terms (catList)
167 "the leading terminals of an expansion of catList"
168 (labels ((firstDs (cats)
169 (if cats
170 (if (derives-epsilon (car cats))
171 (cons (car cats) (firstDs (cdr cats)))
172 (list (car cats)))))
173 (try (dejaVu cat)
174 (if (member cat dejaVu)
175 dejaVu
176 (tryList (cons cat dejaVu)
177 (mapcan #'(lambda (r)
178 (firstDs (rule-daughters r)))
179 (expand cat)))))
180 (tryList (dejaVu cats)
181 (if cats
182 (tryList (try dejaVu (car cats)) (cdr cats))
183 dejaVu)))
184 (remove-if-not #'(lambda (term)
185 (or (eq *ENDMARKER* term)
186 (find term glex)))
187 (tryList '() (firstDs catList)))))
189 (defun first-terminals (cat-list)
190 (if cat-list
191 (if (derives-epsilon (first cat-list))
192 (union (gethash (first cat-list) gstarts)
193 (first-terminals (rest cat-list)))
194 (gethash (first cat-list) gstarts))
195 '()))
197 #+IGNORE
198 (defun first-terminals* (cat-list-0 cat-1)
199 (let ((key (cons cat-list-0 cat-1)))
200 (multiple-value-bind (v found?) (gethash key *first-terminals-cache*)
201 (cond (found? v)
202 (t (setf (gethash key *first-terminals-cache*)
203 (block foo
204 (let ((res nil))
205 (dolist (c0 cat-list-0)
206 (setf res (union res (gethash c0 gstarts)))
207 (unless (derives-epsilon c0)
208 (return-from foo res)))
209 (union res (gethash cat-1 gstarts)) )))) ))))
211 (defmacro first-terminals* (cat-list-0 cat-1)
212 `(let ((cat-list-0 ,cat-list-0)
213 (cat-1 ,cat-1))
214 (let ((key (cons cat-list-0 cat-1)))
215 (multiple-value-bind (v found?) (gethash key *first-terminals-cache*)
216 (cond (found? v)
217 (t (setf (gethash key *first-terminals-cache*)
218 (block foo
219 (let ((res nil))
220 (dolist (c0 cat-list-0)
221 (setf res (union res (gethash c0 gstarts)))
222 (unless (derives-epsilon c0)
223 (return-from foo res)))
224 (union res (gethash cat-1 gstarts)) )))) )))))
226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
228 ;;; LALR(1) parsing table constructor
231 (defstruct item rule pos la)
233 (defmacro item-daughters (i) `(rule-daughters (item-rule ,i)))
235 (defmacro item-right (i) `(nthcdr (item-pos ,i) (item-daughters ,i)))
237 (defmacro item-equal (i1 i2)
238 `(and (eq (item-rule ,i1) (item-rule ,i2))
239 (fixnum= (item-pos ,i1) (item-pos ,i2))
240 (eq (item-la ,i1) (item-la ,i2))))
242 (defmacro item-core-equal (c1 c2)
243 "T if the cores of c1 and c2 are equal"
244 `(and (eq (item-rule ,c1) (item-rule ,c2))
245 (fixnum= (item-pos ,c1) (item-pos ,c2))))
247 (defun close-items (items)
248 "computes the closure of a set of items"
249 (declare (optimize (speed 3)))
250 (do ((to-do items))
251 ((null to-do) items)
252 (let ((i (pop to-do)))
253 (let ((rgt (item-right i)))
254 (when rgt
255 (dolist (la (first-terminals* (rest rgt) (item-la i) ))
256 (dolist (r (expand (first rgt)))
257 (unless (dolist (i items)
258 (if (and (eq r (item-rule i))
259 (eq (item-la i) la)
260 (fixnum= (item-pos i) 0))
261 (return t)))
262 (let ((new (make-item :rule r :pos 0 :la la)))
263 (push new items)
264 (push new to-do))))))))))
266 (defun shift-items (items cat)
267 "shifts a set of items over cat"
268 (labels ((shift-item (item)
269 (if (eq (first (item-right item)) cat)
270 (make-item :rule (item-rule item)
271 :pos (1+ (item-pos item))
272 :la (item-la item)))))
273 (let ((new-items '()))
274 (dolist (i items)
275 (let ((n (shift-item i)))
276 (if n
277 (push n new-items))))
278 new-items)))
280 (defun items-right (items)
281 "returns the set of categories appearing to the right of the dot"
282 (let ((right '()))
283 (dolist (i items)
284 (let ((d (first (item-right i))))
285 (if (and d (not (find d right)))
286 (push d right))))
287 right))
289 (defun compact-items (items)
290 "collapses items with the same core to compact items"
291 (let ((soFar '()))
292 (dolist (i items)
293 (let ((ci (dolist (s soFar)
294 (if (item-core-equal s i)
295 (return s)))))
296 (if ci
297 (push (item-la i) (item-la ci))
298 (push (make-item :rule (item-rule i)
299 :pos (item-pos i)
300 :la (list (item-la i)))
301 soFar))))
302 (sort soFar #'<
303 :key #'(lambda (i) (rule-no (item-rule i))))))
305 (defmacro expand-citems (citems)
306 "expands a list of compact items into items"
307 `(let ((items '()))
308 (dolist (ci ,citems)
309 (dolist (la (item-la ci))
310 (push (make-item :rule (item-rule ci)
311 :pos (item-pos ci)
312 :la la)
313 items)))
314 items))
316 (defun subsumes-citems (ci1s ci2s)
317 "T if the sorted set of items ci2s subsumes the sorted set ci1s"
318 (and (fixnum= (length ci1s) (length ci2s))
319 (every #'(lambda (ci1 ci2)
320 (and (item-core-equal ci1 ci2)
321 (subsetp (item-la ci1) (item-la ci2))))
322 ci1s ci2s)))
324 (defun merge-citems (ci1s ci2s)
325 "Adds the las of ci1s to ci2s. ci2s should subsume ci1s"
326 (mapcar #'(lambda (ci1 ci2)
327 (setf (item-la ci2) (nunion (item-la ci1) (item-la ci2))))
328 ci1s ci2s)
329 ci2s)
331 ;;; The actual table construction functions
333 (defstruct state name citems shifts conflict)
334 (defstruct shift cat where)
336 (defparameter nextStateNo -1)
338 ;(defun lookup (citems)
339 ; "finds a state with the same core items as citems if it exits"
340 ; (find-if #'(lambda (state)
341 ; (and (= (length citems) (length (state-citems state)))
342 ; (every #'(lambda (ci1 ci2)
343 ; (item-core-equal ci1 ci2))
344 ; citems (state-citems state))
345 ; ))
346 ; stateList))
348 (defun lookup (citems)
349 "finds a state with the same core items as citems if it exits"
350 (dolist (state stateList)
351 (if (and (fixnum= (length citems) (length (state-citems state)))
352 (do ((ci1s citems (cdr ci1s))
353 (ci2s (state-citems state) (cdr ci2s)))
354 ((null ci1s) t)
355 (unless (item-core-equal (car ci1s) (car ci2s))
356 (return nil))))
357 (return state))))
359 (defun add-state (citems)
360 "creates a new state and adds it to the state list"
361 (let ((newState
362 (make-state :name (intern (format nil "STATE-~D" (incf nextStateNo)))
363 :citems citems)))
364 (push newState stateList)
365 newState))
367 (defun get-state-name (items)
368 "returns the state name for this set of items"
369 (let* ((citems (compact-items items))
370 (state (lookup citems)))
371 (cond ((null state)
372 (setq state (add-state citems))
373 (build-state state items))
374 ((subsumes-citems citems (state-citems state))
375 nil)
377 (merge-citems citems (state-citems state))
378 (follow-state items)))
379 (state-name state)))
382 (defun build-state (state items)
383 "creates the states that this state can goto"
384 (let ((closure (close-items items)))
385 (dolist (cat (items-right closure))
386 (push (make-shift :cat cat
387 :where (get-state-name (shift-items closure cat)))
388 (state-shifts state)))))
390 (defun follow-state (items)
391 "percolates look-ahead onto descendant states of this state"
392 (let ((closure (close-items items)))
393 (dolist (cat (items-right closure))
394 (get-state-name (shift-items closure cat)))))
396 (defun build-table ()
397 "Actually builds the table"
398 (setq stateList '())
399 (setq nextStateNo -1)
400 (get-state-name (list (make-item :rule (make-rule :no 0
401 :mother *TOPCAT*
402 :daughters (list gstart))
403 :pos 0
404 :la *ENDMARKER*)))
405 (setq stateList (nreverse stateList)))
407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
409 ;;; LALR(1) parsing table printer
412 (defun print-table (stateList)
413 "Prints the state table"
414 (dolist (state stateList)
415 (format t "~%~%~a:" (state-name state))
416 (dolist (citem (state-citems state))
417 (format t "~% ~a -->~{ ~a~} .~{ ~a~}, ~{~a ~}"
418 (rule-mother (item-rule citem))
419 (subseq (rule-daughters (item-rule citem)) 0 (item-pos citem))
420 (subseq (rule-daughters (item-rule citem)) (item-pos citem))
421 (item-la citem)))
422 (dolist (shift (state-shifts state))
423 (format t "~% On ~a shift ~a" (shift-cat shift) (shift-where shift)))
424 (dolist (reduce (compact-items
425 (delete-if #'(lambda (i) (item-right i))
426 (close-items
427 (expand-citems (state-citems state))))))
428 (format t "~% On~{ ~a~} reduce~{ ~a~} --> ~a"
429 (item-la reduce)
430 (rule-daughters (item-rule reduce))
431 (rule-mother (item-rule reduce)))))
432 (format t "~%"))
434 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
436 ;;; LALR(1) parser constructor
439 ;;; next-input performs lexical analysis. It must return two values.
440 ;;; the category and the value.
442 (defmacro define-grammar (name lex-forms &rest grammar)
443 (make-parser grammar lex-forms :eof :name name))
445 (defun build-parser (name)
446 "returns an lalr(1) parser. next-input must return 2 values!"
447 `(DEFUN ,name (NEXT-INPUT PARSE-ERROR)
448 (LET ((CAT-LA '()) ;category lookahead
449 (VAL-LA '()) ;value lookahead
450 (VAL-STACK '()) ;value stack
451 (STATE-STACK '()) ;state stack
452 (CUR-STATE ',(state-name (first stateList)))) ;current state
453 (LABELS ((INPUT-PEEK ()
454 (UNLESS CAT-LA
455 (MULTIPLE-VALUE-SETQ (CAT-LA VAL-LA) (FUNCALL NEXT-INPUT))
456 (SETF CAT-LA (LIST CAT-LA)
457 VAL-LA (LIST VAL-LA)))
458 (FIRST CAT-LA))
459 (SHIFT-FROM (NAME)
460 (PUSH NAME STATE-STACK)
461 (POP CAT-LA)
462 (PUSH (POP VAL-LA) VAL-STACK))
463 (REDUCE-CAT (NAME CAT NDAUGHTERS ACTION)
464 (IF (EQ CAT ',*topcat*)
465 (RETURN-FROM ,name (POP VAL-STACK))
466 (LET ((DAUGHTER-VALUES '())
467 (STATE NAME))
468 (DOTIMES (I NDAUGHTERS)
469 (PUSH (POP VAL-STACK) DAUGHTER-VALUES)
470 (SETQ STATE (POP STATE-STACK)))
471 (PUSH CAT CAT-LA)
472 (PUSH (APPLY ACTION DAUGHTER-VALUES) VAL-LA)
473 (SETQ CUR-STATE STATE)))))
474 (loop
475 (case cur-state
476 ,@(mapcar #'translate-state stateList)) )))))
478 (defun translate-state (state)
479 "translates a state into lisp code that could appear in a TAGBODY form"
480 ;;(format T " ~(~S~)" (state-name state))
481 (princ #\.)
482 (let ((reduces (compact-items
483 (delete-if #'(lambda (i) (item-right i))
484 (close-items
485 (expand-citems (state-citems state))))))
486 (symbols-sofar '())) ; to ensure that a symbol never occurs twice
487 (labels ((translate-shift (shift)
488 (push (shift-cat shift) symbols-sofar)
489 `(,(shift-cat shift) ;case key
490 (SHIFT-FROM ',(state-name state))
491 (SETQ CUR-STATE ',(shift-where shift))))
493 (translate-reduce (item)
494 (when (intersection (item-la item) symbols-sofar)
495 (format t "~&Warning, Not LALR(1)!!: ~a, ~a --> ~{~a ~}~%"
496 (state-name state)
497 (rule-mother (item-rule item))
498 (rule-daughters (item-rule item)))
499 (setf (item-la item) (nset-difference (item-la item) symbols-sofar)))
500 (dolist (la (item-la item))
501 (push la symbols-sofar))
503 `(,(item-la item) ;case key
504 (REDUCE-CAT ',(state-name state)
505 ',(rule-mother (item-rule item))
506 ,(item-pos item)
507 ,(rule-action (item-rule item))))))
508 `(,(state-name state)
509 (case (input-peek)
510 ,@(mapcar #'translate-shift (state-shifts state))
511 ,@(mapcar #'translate-reduce reduces)
512 (otherwise (funcall parse-error)))))))