First stab at (possibly broken) better lexer based on cl-ppcre.
[m68k-assembler.git] / deflexer.lisp
blob88f914eafa4a23a605dcfc459a5217dd752f9aff
1 ;;; deflexer using cl-ppcre
2 ;;;
3 ;;; Heavily stolen from
4 ;;; http://common-lisp.net/pipermail/cl-ppcre-devel/2004-June/000041.html
5 ;;; by Edi Weitz, combined with other stuff found in the CL-PPCRE code.
6 ;;;
7 ;;; Hacked together by Julian Squires <julian@cipht.net> / 2006.
8 ;;; XXX Needs heavy refactoring!
10 (defpackage :cl-ppcre-lex
11 (:use :cl :cl-ppcre :anaphora)
12 (:import-from :cl-ppcre #:nsubseq)
13 (:export #:deflexer))
15 (in-package :cl-ppcre-lex)
17 (eval-when (:compile-toplevel :load-toplevel :execute)
18 (defmacro with-unique-names ((&rest bindings) &body body)
19 ;; see <http://www.cliki.net/Common%20Lisp%20Utilities>
20 `(let ,(mapcar #'(lambda (binding)
21 (check-type binding (or cons symbol))
22 (if (consp binding)
23 (destructuring-bind (var x) binding
24 (check-type var symbol)
25 `(,var (gensym ,(etypecase x
26 (symbol (symbol-name x))
27 (character (string x))
28 (string x)))))
29 `(,binding (gensym ,(symbol-name binding)))))
30 bindings)
31 ,@body))
33 (defun collect-bindings (var-list string reg-starts reg-ends)
34 (loop for var in var-list
35 for counter from 0
36 when var
37 collect `(,var (awhen (aref ,reg-starts ,counter)
38 (nsubseq ,string it
39 (aref ,reg-ends ,counter))))))
41 (defun gather-fns (list)
42 (with-unique-names (scanner string start match-start match-end
43 reg-starts reg-ends next-pos-fn)
44 (loop for x in list
45 collect
46 (destructuring-bind (regex (&rest var-list) &body body) x
47 (let ((bindings (collect-bindings var-list string reg-starts reg-ends)))
48 `(cons ,regex
49 (lambda (,scanner ,string ,start ,next-pos-fn)
50 (multiple-value-bind
51 (,match-start ,match-end ,reg-starts ,reg-ends)
52 (cl-ppcre:scan ,scanner ,string :start ,start)
53 ,@(unless bindings
54 `((declare (ignore ,reg-starts ,reg-ends))))
55 (when ,match-start
56 (prog1
57 (progn ,@(if bindings
58 `((let (,@bindings) ,@body))
59 body))
60 (funcall ,next-pos-fn ,match-end))))))))))))
62 (defmacro deflexer (name &body body)
63 (with-unique-names (regex-table regex sexpr-regex anchored-regex function)
64 `(let ((,regex-table
65 (loop for (,regex . ,function) in (list ,@(gather-fns body))
66 for ,sexpr-regex =
67 (etypecase ,regex
68 (function
69 (error "Compiled scanners are not allowed here"))
70 (string
71 (cl-ppcre::parse-string ,regex))
72 (list
73 ,regex))
74 for ,anchored-regex =
75 (cl-ppcre:create-scanner `(:sequence
76 :modeless-start-anchor
77 ,,sexpr-regex))
78 collect (cons ,anchored-regex ,function))))
79 (defun ,name (string next-pos-fn &key ((:start start) 0))
80 (loop for (scanner . function) in ,regex-table
81 for value = (funcall function scanner string start next-pos-fn)
82 when value do (return value))))))