From c405fb41bb39e497eebf2978fb4d692f2e3d3715 Mon Sep 17 00:00:00 2001 From: Julian Squires Date: Sun, 17 Jul 2005 04:40:30 +0100 Subject: [PATCH] Some cleanups, improved #'MAKE-PARSER. --- lalr-parser-generator.asd | 6 +- macros.lisp | 2 +- parser.lisp | 321 ++++++++++++++++------------------------------ prediction.lisp | 102 +++++++++++++++ 4 files changed, 219 insertions(+), 212 deletions(-) create mode 100644 prediction.lisp diff --git a/lalr-parser-generator.asd b/lalr-parser-generator.asd index c97d460..549f18d 100644 --- a/lalr-parser-generator.asd +++ b/lalr-parser-generator.asd @@ -7,8 +7,10 @@ :depends-on (:anaphora) :components ((:file "package") - (:file "macros" :depends-on ("package")) - (:file "parser" :depends-on ("package" "macros")))) + (:file "macros") + (:file "prediction") + (:file "parser")) + :serial t) (defsystem lalr-parser-generator-test :depends-on (:lalr-parser-generator :rt)) diff --git a/macros.lisp b/macros.lisp index 0147804..d9d5bc5 100644 --- a/macros.lisp +++ b/macros.lisp @@ -69,6 +69,7 @@ bound to the left-hand side and right-hand side (in the form of a list of tokens) of the grammar rule." (let ((value (gensym))) `(maphash (lambda (,lhs ,value) + (declare (ignorable ,lhs ,value)) (dolist (,rhs ,value) ,@body)) ,grammar))) @@ -77,7 +78,6 @@ of tokens) of the grammar rule." "Do BODY for each terminal symbol referenced in GRAMMAR." (let ((list (gensym)) (unused (gensym))) - (declare (ignore unused)) `(do-for-each-production (,unused ,list ,grammar) (dolist (,var ,list) (when (not (non-terminal-p ,var)) diff --git a/parser.lisp b/parser.lisp index 6ff006e..8308662 100644 --- a/parser.lisp +++ b/parser.lisp @@ -1,32 +1,31 @@ ;;; LALR parser generator. ;;; Julian Squires / 2005 -;;; -;;; Currently SLR, will be LALR after rewrite. ;;; notes for rewrite: ;;; ;;; When we preprocess the grammar, give every symbol a unique ;;; integer, and then use bitvectors for all set operations. Keep a ;;; bitvector to track terminal/nonterminal-ness. +;;; (any benefit to doing this?) ;;; ;;; Add a suite of tests using RT. ;;; -;;; write parse tables and functions to a file, so projects don't need -;;; to even depend on this package to use their parser. +;;; Write some usage information. +;;; +;;; Add some operator precedence controls. +;;; +;;; Code to convert yacc file into suitable grammar. (in-package :lalr-parser-generator) ;;;; Special variables. -(defparameter +start-symbol+ 'start) -(defparameter +end-symbol+ '$) +(defparameter *start-symbol* 'start) +(defparameter *end-symbol* '$) (defvar *grammar* nil "The default grammar used by the LALR parser generator; set by PROCESS-GRAMMAR.") -(defvar *states* nil - "A list of states seen by the parser generator. Constructed in -COMPUTE-SHIFTS, used in COMPUTE-REDUCTIONS.") (defvar *first-set* nil) (defvar *follow-set* nil) (defvar *nullable-set* nil) @@ -37,21 +36,11 @@ COMPUTE-SHIFTS, used in COMPUTE-REDUCTIONS.") ;;; probably be much more efficient. For the moment, it doesn't ;;; matter. -(defun make-item (lhs rhs dot lookahead) - (list lhs rhs dot lookahead)) +(defstruct item (lhs) (rhs) (dot) (lookahead)) -(defun item-lhs (item) (first item)) -(defun item-rhs (item) (second item)) -(defun item-dot (item) (third item)) -(defun item-la (item) (fourth item)) -(defun (setf item-la) (value item) - (setf (fourth item) value)) +(defun dot-at-end-p (item) (endp (item-dot item))) -(defun dot-at-end-p (item) - (endp (item-dot item))) - -(defun symbol-at-dot (item) - (car (item-dot item))) +(defun symbol-at-dot (item) (car (item-dot item))) (defun advance-dot (item) "Returns the item dot, advanced by one symbol. Note: @@ -73,9 +62,11 @@ non-destructive." (when (and (equal (item-lhs item) (item-lhs (aref set i))) (equal (item-rhs item) (item-rhs (aref set i))) (equal (item-dot item) (item-dot (aref set i)))) - (unless (equal (item-la item) (item-la (aref set i))) - (setf (item-la (aref set i)) (union (item-la item) - (item-la (aref set i))))) + (unless (equal (item-lookahead item) (item-lookahead + (aref set i))) + (setf (item-lookahead (aref set i)) + (union (item-lookahead item) + (item-lookahead (aref set i))))) (return i))) ;(position item set :test #'equalp) (vector-push-extend item set))) @@ -84,8 +75,8 @@ non-destructive." ;;;; GRAMMAR (defun process-grammar (grammar) - "Processes GRAMMAR, sets *GRAMMAR*. Augments the grammar with a new -start rule." + "Processes GRAMMAR, returns a grammar suitable for binding to +*GRAMMAR. Augments the grammar with a new start rule." ;; split grammar into hash table of non-terminals, terminals. ;; the grammar is a list of non-terminals followed by their @@ -100,59 +91,20 @@ start rule." (cadr list->))) ;; augment grammar with start symbol - (dolist (i (list +start-symbol+ +end-symbol+)) + (dolist (i (list *start-symbol* *end-symbol*)) (assert (null (gethash i grammar-hash)) nil "~A is a reserved non-terminal, unfortunately. Try calling MAKE-PARSER with a different END-SYMBOL or START-SYMBOL specified." i)) - (setf (gethash +start-symbol+ grammar-hash) - (list (list (car grammar) +end-symbol+))) - (setf *grammar* grammar-hash))) + (setf (gethash *start-symbol* grammar-hash) + (list (list (car grammar) *end-symbol*))) + grammar-hash)) (defun non-terminal-p (symbol) (gethash symbol *grammar*)) (defun grammar-productions (symbol) (gethash symbol *grammar*)) -;;;; FIRST and FOLLOW - -(defun compute-prediction-sets () - "Computes the first, follow, and nullable sets for *GRAMMAR*. -Sets *FIRST-SET*, *FOLLOW-SET*, and *NULLABLE-SET*." - (let ((nullable (make-hash-table)) - (follow (make-hash-table)) - (first (make-hash-table))) - (flet ((nullable-p (x) (gethash x nullable))) - (do-for-each-terminal (z *grammar*) - (setf (gethash z first) (list z))) - - (do-until-unchanged (first follow nullable) - (do-for-each-production (x ys *grammar*) - (when (every #'nullable-p ys) - (setf (gethash x nullable) t)) - - (do ((i 0 (1+ i)) - (k (length ys))) - ((>= i k)) - - ;; Note - subseq 0 0 is NIL, the intended effect here. - (when (every #'nullable-p (subseq ys 0 i)) - (setf (gethash x first) - (union (gethash x first) - (gethash (nth i ys) first)))) - - (when (every #'nullable-p (subseq ys (1+ i) k)) - (setf (gethash (nth i ys) follow) - (union (gethash (nth i ys) follow) - (gethash x follow)))) - - (loop for j from (1+ i) to k - when (every #'nullable-p (subseq ys (1+ i) j)) - do (setf (gethash (nth i ys) follow) - (union (gethash (nth i ys) follow) - (gethash (nth j ys) first))))))) - - (setf *first-set* first *follow-set* follow *nullable-set* nullable) - (values first follow nullable)))) +;;;; PARSE TABLE CONSTRUCTION (defun first-sets (symbol-list) (do* ((x-> symbol-list (cdr x->)) @@ -161,78 +113,15 @@ Sets *FIRST-SET*, *FOLLOW-SET*, and *NULLABLE-SET*." ((or (null x->) (not (gethash (car x->) *nullable-set*))) s))) -;;; The following three functions are just for testing. Combined, -;;; they perform the same functions as COMPUTE-PREDICTION-SETS - -(defun list-nullable () - (let ((nullable nil)) - (do-until-unchanged (nullable) - (do-for-each-production (lhs rhs *grammar*) - (when (or (null rhs) - (every #'(lambda (x) (member x nullable)) rhs)) - (pushnew lhs nullable)))) - nullable)) - -(defun list-first-set (nullable) - (let ((first-set (make-hash-table))) - (do-for-each-terminal (x *grammar*) - (setf (gethash x first-set) (list x))) - (do-until-unchanged (first-set) - (do-for-each-production (lhs rhs *grammar*) - (do ((r-> rhs (cdr r->)) - (done-p nil)) - ((or done-p (null r->))) - (when (not (member (car r->) nullable)) - (setf (gethash lhs first-set) - (union (gethash lhs first-set) - (gethash (car r->) first-set))) - (setf done-p t)))) - - (do-for-each-production (lhs rhs *grammar*) - (do ((r-> rhs (cdr r->)) - (done-p nil)) - ((or done-p (null r->))) - (when (not (member (car r->) nullable)) - (setf (gethash lhs first-set) - (union (gethash lhs first-set) - (gethash (car r->) first-set))) - (setf done-p t))))) - first-set)) - -(defun list-follow-set (nullable first-set) - (let ((follow-set (make-hash-table))) - (do-until-unchanged (follow-set) - (do-for-each-production (lhs rhs *grammar*) - (do ((r-> rhs (cdr r->)) - (done-p nil)) - ((or done-p (null r->))) - (when (every (lambda (x) (member x nullable)) (cdr r->)) - (setf (gethash (car r->) follow-set) - (union (gethash (car r->) follow-set) - (gethash lhs follow-set)))) - - (loop for j from 1 to (length r->) - do (progn - (when (every (lambda (x) (member x nullable)) - (and (> j 1) (subseq r-> 1 (1- j)))) - (setf (gethash (car r->) follow-set) - (union (gethash (car r->) follow-set) - (gethash (nth j r->) first-set))))))))) - follow-set)) - - - -;;;; PARSE TABLE CONSTRUCTION - (defun lalr-closure (item-set) "Returns the closure of ITEM-SET." (do-until-unchanged (item-set) (do-for-each-item (i item-set) (when (non-terminal-p (symbol-at-dot i)) (dolist (r (grammar-productions (symbol-at-dot i))) - (add-to-set (make-item (symbol-at-dot i) r r - (union (first-sets (advance-dot i)) - (item-la i))) + (add-to-set (make-item :lhs (symbol-at-dot i) :rhs r :dot r + :lookahead (union (first-sets (advance-dot i)) + (item-lookahead i))) item-set))))) item-set) @@ -241,26 +130,26 @@ Sets *FIRST-SET*, *FOLLOW-SET*, and *NULLABLE-SET*." (let ((j (make-item-set))) (do-for-each-item (i item-set) (when (eql (symbol-at-dot i) grammar-symbol) - (add-to-set (make-item (item-lhs i) (item-rhs i) (advance-dot i) - (item-la i)) + (add-to-set (make-item :lhs (item-lhs i) :rhs (item-rhs i) + :dot (advance-dot i) + :lookahead (item-lookahead i)) j))) (lalr-closure j))) (defun make-start-item () "Makes the item S' -> .S$, as appropriate for the grammar." - (make-item +start-symbol+ - (first (gethash +start-symbol+ *grammar*)) - (first (gethash +start-symbol+ *grammar*)) - nil)) + (make-item :lhs *start-symbol* + :rhs (first (gethash *start-symbol* *grammar*)) + :dot (first (gethash *start-symbol* *grammar*)))) (defun make-almost-done-item () "Makes the item S' -> S.$, as appropriate for the grammar." (let* ((start-item (make-start-item)) (dot (do ((dot (advance-dot start-item) (cdr dot))) - ((or (null dot) (eql (car dot) +end-symbol+)) dot)))) + ((or (null dot) (eql (car dot) *end-symbol*)) dot)))) (assert (not (null dot))) - (make-item (item-lhs start-item) (item-rhs start-item) dot - nil))) + (make-item :lhs (item-lhs start-item) :rhs (item-rhs start-item) + :dot dot))) ;;; The code gets progressively uglier as I refine the data ;;; structures. Shame on me. @@ -291,9 +180,11 @@ Sets *FIRST-SET*, *FOLLOW-SET*, and *NULLABLE-SET*." (defun merge-la-in-sets (dst src) (dotimes (i (length dst)) - (unless (equal (item-la (aref dst i)) (item-la (aref src i))) - (setf (item-la (aref dst i)) (union (item-la (aref dst i)) - (item-la (aref src i))))))) + (unless (equal (item-lookahead (aref dst i)) + (item-lookahead (aref src i))) + (setf (item-lookahead (aref dst i)) + (union (item-lookahead (aref dst i)) + (item-lookahead (aref src i))))))) (defun add-to-states (set states) (or @@ -305,52 +196,51 @@ Sets *FIRST-SET*, *FOLLOW-SET*, and *NULLABLE-SET*." (vector-push-extend set states))) (defun compute-shifts () - "Compute shift actions for the generated parser. Fills the *STATE* -variable and returns a list of shift actions." - (setf *states* (make-array '(1) :adjustable t :fill-pointer 1 + "Compute shift actions and states for the generated parser. Returns +a list of shift actions and the state table." + (let ((shift-table nil) + (states (make-array '(1) :adjustable t :fill-pointer 1 :initial-element - (lalr-closure (make-item-set (make-start-item))))) - - (let ((shift-table nil)) - (do-until-unchanged (*states* shift-table) - (dotimes (i (length *states*)) - (do-for-each-item (item (aref *states* i)) + (lalr-closure + (make-item-set (make-start-item)))))) + (do-until-unchanged (states shift-table) + (dotimes (i (length states)) + (do-for-each-item (item (aref states i)) (when (and (not (dot-at-end-p item)) - (not (eql (symbol-at-dot item) +end-symbol+))) + (not (eql (symbol-at-dot item) *end-symbol*))) (let* ((x (symbol-at-dot item)) - (new-set (lalr-goto (aref *states* i) x)) - (j (add-to-states new-set *states*))) + (new-set (lalr-goto (aref states i) x)) + (j (add-to-states new-set states))) (pushnew (list i x j) shift-table :test #'equalp)))))) - shift-table)) + (values shift-table states))) -(defun compute-reductions () +(defun compute-reductions (states) "Compute reduce actions for the generated parser. Depends on *STATE* already being filled, and returns the reduce actions." (let ((reduce-table nil)) - (dotimes (i (length *states*)) - (do-for-each-item (item (aref *states* i)) + (dotimes (i (length states)) + (do-for-each-item (item (aref states i)) (when (dot-at-end-p item) - (dolist (j (item-la item)) - (pushnew (list i j item) - reduce-table :test #'equalp))))) + (dolist (j (item-lookahead item)) + (pushnew (list i j item) reduce-table :test #'equalp))))) reduce-table)) -(defun add-accept-actions (parse-table) +(defun add-accept-actions (parse-table states) (do* ((i 0 (1+ i)) + (n-states (length states)) (item (make-almost-done-item))) - ((>= i (length *states*))) - (when (find item (aref *states* i) :test #'equalp) - (add-to-parse-table parse-table i +end-symbol+ (list 'accept))))) + ((>= i n-states)) + (when (find item (aref states i) :test #'equalp) + (add-to-parse-table parse-table n-states i *end-symbol* `(accept))))) -(defun add-to-parse-table (parse-table i x action) +(defun add-to-parse-table (parse-table n-states i x action) "Adds ACTION to the parse table at (X,I). Applies braindead conflict resolution rule to any conflicts detected." - (anaphora:sunless (gethash x parse-table) - (setf anaphora:it (make-array (list (length *states*)) - :initial-element nil))) + (sunless (gethash x parse-table) + (setf it (make-array (list n-states) :initial-element nil))) (aif (aref (gethash x parse-table) i) ;; XXX should probably collate the number of conflicts ;; somewhere. @@ -362,41 +252,46 @@ conflict resolution rule to any conflicts detected." (setf (aref (gethash x parse-table) i) action))) -(defun create-parse-table (shifts reductions) +(defun create-parse-table (shifts reductions states) "Constructs a parse table usable by PARSE, from the list of shift -and reduce actions supplied as arguments, and from the set of states -stored in *STATES*, which COMPUTE-SHIFTS fills in." - (let ((parse-table (make-hash-table))) +and reduce actions, and the set of parse states." + (let ((parse-table (make-hash-table)) + (n-states (length states))) (dolist (shift shifts) (destructuring-bind (i x j) shift - (add-to-parse-table parse-table i x + (add-to-parse-table parse-table n-states i x (list (if (non-terminal-p x) 'goto 'shift) j)))) (dolist (reduce reductions) (destructuring-bind (i x j) reduce - (add-to-parse-table parse-table i x (list 'reduce - (item-lhs j) - (length (item-rhs j)))))) - (add-accept-actions parse-table) + (add-to-parse-table parse-table n-states i x + `(reduce ,(item-lhs j) ,(length (item-rhs j)))))) + (add-accept-actions parse-table states) parse-table)) -(defun write-parser-function (table package stream) - (let ((*package* package)) - (pprint `(in-package ,(package-name package)) stream) - (pprint `(labels ((unmash (entries) - (let ((ht (make-hash-table))) - (dolist (e entries) - (setf (gethash (car e) ht) (cdr e))) - ht))) +;;; XXX certainly not the most attractive way to do this, but I've +;;; done worse... +(defun write-parser-function (table package stream fn-name) + (let* ((*package* (find-package "LALR-PARSER-GENERATOR")) + (fn-name (intern (if (stringp fn-name) + fn-name + (symbol-name fn-name))))) + (format stream ";; Automatically generated by LALR-PARSER-GENERATOR.") + (format stream "~&(in-package ~S)~%" (package-name package)) + (pprint `(flet ((unmash (entries) + (let ((ht (make-hash-table))) + (dolist (e entries) + (setf (gethash (car e) ht) (cdr e))) + ht))) (let ((table (unmash ',(let ((untable)) (maphash (lambda (k v) (push (cons k v) untable)) table) untable)))) - (defun parse (next-token) + (defun ,fn-name (next-token) "NEXT-TOKEN is a function which returns a cons of the next token in the input (the CAR being the symbol name, the CDR being any information the lexer would like to preserve), and advances the input @@ -426,9 +321,7 @@ countries." (assert (eql goto 'goto) (state) "Malformed parse table!") (push state stack))) (accept (return (car result-stack))) - (t (error "Parse error at ~A" token)))))))) - stream))) - + (t (error "Parse error at ~A" token))))))))))) (defun parse (table next-token) @@ -469,23 +362,33 @@ might pass for a parse tree in some countries." (defun make-parser (grammar &key end-symbol start-symbol (stream *standard-output*) - (package *package*)) - (awhen end-symbol (setf +end-symbol+ it)) - (awhen start-symbol (setf +start-symbol+ it)) - (process-grammar grammar) - (compute-prediction-sets) - (let ((table (create-parse-table (compute-shifts) (compute-reductions)))) - (write-parser-function table package stream))) + (package *package*) + (fn-name 'parse)) + (awhen end-symbol (setf *end-symbol* it)) + (awhen start-symbol (setf *start-symbol* it)) + (let ((*grammar* (process-grammar grammar))) + (process-grammar grammar) + (multiple-value-bind (*first-set* *follow-set* *nullable-set*) + (compute-prediction-sets *grammar*) + (multiple-value-bind (shifts states) (compute-shifts) + (let ((table (create-parse-table shifts + (compute-reductions states) + states))) + (write-parser-function table package stream fn-name)))))) ;;;; Testing stuff. (defun test-parser (grammar string) - (process-grammar grammar) - (compute-prediction-sets) - (with-input-from-string (*standard-input* string) - (parse (create-parse-table (compute-shifts) (compute-reductions)) - (lambda () (cons (read) nil))))) + (let ((*grammar* (process-grammar grammar))) + (multiple-value-bind (*first-set* *follow-set* *nullable-set*) + (compute-prediction-sets *grammar*) + (with-input-from-string (*standard-input* string) + (multiple-value-bind (shifts states) (compute-shifts) + (parse (create-parse-table shifts + (compute-reductions states) + states) + (lambda () (cons (read) nil)))))))) (defparameter *lr0-test-grammar* '(sentence ((open list close) diff --git a/prediction.lisp b/prediction.lisp new file mode 100644 index 0000000..2dca2a2 --- /dev/null +++ b/prediction.lisp @@ -0,0 +1,102 @@ + +(in-package :lalr-parser-generator) + +;;;; FIRST and FOLLOW + +(defun compute-prediction-sets (grammar) + "Computes and returns the first, follow, and nullable sets for +GRAMMAR." + (let ((nullable (make-hash-table)) + (follow (make-hash-table)) + (first (make-hash-table))) + (flet ((nullable-p (x) (gethash x nullable))) + (do-for-each-terminal (z grammar) + (setf (gethash z first) (list z))) + + (do-until-unchanged (first follow nullable) + (do-for-each-production (x ys grammar) + (when (every #'nullable-p ys) + (setf (gethash x nullable) t)) + + (do ((i 0 (1+ i)) + (k (length ys))) + ((>= i k)) + + ;; Note - subseq 0 0 is NIL, the intended effect here. + (when (every #'nullable-p (subseq ys 0 i)) + (setf (gethash x first) + (union (gethash x first) + (gethash (nth i ys) first)))) + + (when (every #'nullable-p (subseq ys (1+ i) k)) + (setf (gethash (nth i ys) follow) + (union (gethash (nth i ys) follow) + (gethash x follow)))) + + (loop for j from (1+ i) to k + when (every #'nullable-p (subseq ys (1+ i) j)) + do (setf (gethash (nth i ys) follow) + (union (gethash (nth i ys) follow) + (gethash (nth j ys) first))))))) + + (values first follow nullable)))) + +;;; The following three functions are just for testing. Combined, +;;; they perform the same functions as COMPUTE-PREDICTION-SETS + +(defun list-nullable (grammar) + (let ((nullable nil)) + (do-until-unchanged (nullable) + (do-for-each-production (lhs rhs grammar) + (when (or (null rhs) + (every #'(lambda (x) (member x nullable)) rhs)) + (pushnew lhs nullable)))) + nullable)) + +(defun list-first-set (grammar nullable) + (let ((first-set (make-hash-table))) + (do-for-each-terminal (x grammar) + (setf (gethash x first-set) (list x))) + (do-until-unchanged (first-set) + (do-for-each-production (lhs rhs grammar) + (do ((r-> rhs (cdr r->)) + (done-p nil)) + ((or done-p (null r->))) + (when (not (member (car r->) nullable)) + (setf (gethash lhs first-set) + (union (gethash lhs first-set) + (gethash (car r->) first-set))) + (setf done-p t)))) + + (do-for-each-production (lhs rhs grammar) + (do ((r-> rhs (cdr r->)) + (done-p nil)) + ((or done-p (null r->))) + (when (not (member (car r->) nullable)) + (setf (gethash lhs first-set) + (union (gethash lhs first-set) + (gethash (car r->) first-set))) + (setf done-p t))))) + first-set)) + +(defun list-follow-set (grammar nullable first-set) + (let ((follow-set (make-hash-table))) + (do-until-unchanged (follow-set) + (do-for-each-production (lhs rhs grammar) + (do ((r-> rhs (cdr r->)) + (done-p nil)) + ((or done-p (null r->))) + (when (every (lambda (x) (member x nullable)) (cdr r->)) + (setf (gethash (car r->) follow-set) + (union (gethash (car r->) follow-set) + (gethash lhs follow-set)))) + + (loop for j from 1 to (length r->) + do (progn + (when (every (lambda (x) (member x nullable)) + (and (> j 1) (subseq r-> 1 (1- j)))) + (setf (gethash (car r->) follow-set) + (union (gethash (car r->) follow-set) + (gethash (nth j r->) first-set))))))))) + follow-set)) + -- 2.11.4.GIT