From 4d4489739599d67242cecf8f9c716f00c802de01 Mon Sep 17 00:00:00 2001 From: Utz-Uwe Haus Date: Sat, 4 Oct 2008 23:18:54 +0200 Subject: [PATCH] Implement Memoization. Signed-off-by: Utz-Uwe Haus --- TODO | 3 ++- package.lisp | 4 ++-- pegutils.lisp | 44 +++++++++++++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/TODO b/TODO index 7360e40..a101b10 100644 --- a/TODO +++ b/TODO @@ -1,10 +1,11 @@ -* Add better tracing/debugging of parser progress and failure * Fix opossum.peg until it - works for example.peg - can bootstrap itself * Implement memoization in generated functions +- partially done (needs to be done for built-ins too; should be done + through macro) * profile diff --git a/package.lisp b/package.lisp index 13b870f..044af6f 100644 --- a/package.lisp +++ b/package.lisp @@ -92,8 +92,8 @@ CL-USER>(eval *t*) @begin[Literature]{section} PEG parsers are a family of recursive descent parsers suitable to parsing context free grammars, - in linear time if memoization is used. OPOSSUM currently does not implement memoization, but this will - be fixed soon. See @a[http://pdos.csail.mit.edu/~baford/packrat/]{Bryan Ford}'s home page for details. + in linear time if memoization is used. + See @a[http://pdos.csail.mit.edu/~baford/packrat/]{Bryan Ford}'s home page for details. @end{section} @begin[Authors]{section} This code is released under the GNU Lesser Public License with Lisp clarifications and diff --git a/pegutils.lisp b/pegutils.lisp index b1be478..9ff4272 100644 --- a/pegutils.lisp +++ b/pegutils.lisp @@ -45,6 +45,9 @@ (dst-package :accessor dst-package :initarg :dst-package :initform nil :type 'package :documentation "The package into which symbols generated during the parse are interned.") + (memotab :accessor memotab :initarg :memotab :initform (make-hash-table :test #'equal) + :type 'hash-table + :documentation "Hash-table used to memoize parsing result. Keyed on (fun . offset) pairs.") ;; these slots are shared by all cloned copies of a context -- use only STORE-ACTION to guarantee consistency (actions :accessor actions :initarg :actions :initform (make-list 1 :initial-element NIL) :type 'list @@ -86,6 +89,7 @@ (make-instance 'context :input (input ctx) :dst-package (dst-package ctx) + :memotab (memotab ctx) :actions (actions ctx) :action-counter (action-counter ctx) :parent ctx @@ -125,6 +129,19 @@ ;; (value ctx) (start-index ctx) (end-index ctx))) ctx)) +(defun find-memoized-value (name offset &optional (ctx *context*)) + "Return a memoized value for FUN at OFFSET, or NIL." + (let ((res (gethash `(,name . ,offset) (memotab ctx)))) + (when *trace* + (format *trace-output* "~vT~A memoized result.~%" (depth ctx) (if res "Found" "No"))) + res)) + +(defun memoizing (name offset result-ctx &optional (ctx *context*)) + (when *trace* + (format *trace-output* "~vT(memoizing for ~A/~D)~%" (depth ctx) name offset)) + (setf (gethash `(,name . ,offset) (memotab ctx)) + result-ctx) + result-ctx) ;; (defun make-name (rule-string) @@ -153,19 +170,20 @@ ,(format nil "Parse a ~A at the given OFFSET." name) (let ((indent (depth *context*))) (when *trace* (format *trace-output* "~vTTrying to parse a ~A at pos ~D~%" indent ,name offset)) - (let* ((*context* (clone-ctx *context* ,name)) - (result (funcall ,parser offset))) - (unless result - (error "Parser function ~A did not return a value" ,parser)) - (if (ctx-failed-p result) - (progn - (when *trace* (format *trace-output* "~vT... no ~A at pos ~D~%" indent ,name offset)) - (fail)) - (progn - (when *trace* (format *trace-output* "~vT... found ~A at ~D:~D~%" - indent - ,name (start-index result) (end-index result))) - (succeed *context* (value result) (start-index result) (end-index result)))))))) + (or (find-memoized-value ,name offset) + (let* ((*context* (clone-ctx *context* ,name)) + (result (funcall ,parser offset))) + (unless result + (error "Parser function ~A did not return a value" ,parser)) + (if (ctx-failed-p result) + (progn + (when *trace* (format *trace-output* "~vT... no ~A at pos ~D~%" indent ,name offset)) + (memoizing ,name offset (fail))) + (progn + (when *trace* (format *trace-output* "~vT... found ~A at ~D:~D~%" + indent + ,name (start-index result) (end-index result))) + (memoizing ,name offset (succeed *context* (value result) (start-index result) (end-index result)))))))))) -- 2.11.4.GIT