Fixed the new lexer.
[m68k-assembler.git] / lexer.lisp
blobb3e0bd316dafffa905829b9aed26e2635adfbb78
2 (in-package :m68k-assembler)
4 ;;;; LEXER BOOKKEEPING
6 (defstruct lexer-state
7 (stream)
8 (filename)
9 (line 0)
10 (column 0)
11 (current-string nil))
13 (defvar *lexer-states* nil)
14 ;;; *l-s-o-w-t-l-p* doesn't need to keep multi-file state.
15 (defvar *lexer-seen-only-whitespace-this-line-p*)
17 (defmacro with-lexer ((filename) &body body)
18 `(unwind-protect (progn
19 (init-lexer ,filename)
20 ,@body)
21 (close-lexer)))
23 (defun init-lexer (filename)
24 (setf *lexer-states* nil)
25 (nested-lexing filename))
27 (defun close-lexer ()
28 (loop for s = (pop *lexer-states*)
29 while s do (close (lexer-state-stream s))))
31 (defun lexer-next-line ()
32 (setf (lexer-state-column (first *lexer-states*)) 0)
33 (incf (lexer-state-line (first *lexer-states*))))
35 (defun lexer-next-column ()
36 (incf (lexer-state-column (first *lexer-states*))))
38 (defun update-lexer-column (c)
39 (assert (<= (lexer-state-column (first *lexer-states*)) c))
40 (setf (lexer-state-column (first *lexer-states*)) c))
42 (defun nested-lexing (filename)
43 (cond ((find filename *lexer-states* :test #'string-equal
44 :key #'lexer-state-filename)
45 (warn "~A is already in the chain of INCLUDES! Ignoring it..."
46 filename))
48 (setf *lexer-seen-only-whitespace-this-line-p* t)
49 (push (make-lexer-state :stream (open filename)
50 :filename filename)
51 *lexer-states*))))
53 ;;;; MAIN LEXER FUNCTION
55 (defun next-token ()
56 (unless *lexer-states*
57 (error 'end-of-file :stream nil))
58 (handler-case
59 (multiple-value-bind (string column)
60 (ensure-lexer-data (first *lexer-states*))
61 (when (and (zerop column) (not *lexer-seen-only-whitespace-this-line-p*))
62 (return-from next-token (maybe-return-$)))
63 (atypecase (devpac-lexer string #'update-lexer-column :start column)
64 (atom (next-token))
65 (t (setf *lexer-seen-only-whitespace-this-line-p* nil)
66 it)))
67 (end-of-file (condition)
68 (pop *lexer-states*)
69 (cond (*lexer-states* (next-token))
70 (*lexer-seen-only-whitespace-this-line-p*
71 (error condition))
72 (t (list '$ nil nil))))))
74 (defun ensure-lexer-data (state)
75 (symbol-macrolet ((string (lexer-state-current-string state)))
76 (unless (and string
77 (< (lexer-state-column state)
78 (length string)))
79 (setf string
80 (read-possibly-escaped-line (lexer-state-stream state))))
81 (values string (lexer-state-column state))))
83 (defun read-possibly-escaped-line (stream)
84 (loop for line = (read-line stream) then
85 (concatenate 'string
86 (make-array (list (1- (length line)))
87 :displaced-to line)
88 (read-line stream))
89 do (lexer-next-line)
90 unless (and (plusp (length line))
91 (char= (schar line (1- (length line))) #\\))
92 return line))
94 (cl-ppcre-lex:deflexer devpac-lexer
95 ("^[ \\t]*\\*.*$" () (when (zerop (lexer-state-column (first *lexer-states*)))
96 'whitespace))
97 ("[ \\t\\r\\n\\f]+|$" () 'whitespace)
98 (";.*$" () 'whitespace)
99 ("([-():,#+/*|&^~])"
100 (single) (assert single)
101 (let ((it (find (char single 0)
102 '((#\( open) (#\) close) (#\: colon) (#\, comma)
103 (#\# hash) (#\+ +) (#\- -) (#\/ /) (#\* *) (#\| or)
104 (#\& &) (#\^ ^) (#\~ ~))
105 :key #'car)))
106 (make-token (second it) (first it))))
107 ("([0-9]+)" (digits) (assert digits)
108 (make-token 'constant (parse-integer digits)))
109 ("\\$([0-9A-Fa-f]+)" (digits) (assert digits)
110 (make-token 'constant (parse-integer digits :radix 16)))
111 ("%([01]+)" (digits) (assert digits)
112 (make-token 'constant (parse-integer digits :radix 2)))
113 ("([A-Za-z0-9_\\\\=@.]+)(\\.[bBwWlL])"
114 (string modifier) (assert (and string modifier))
115 (setf modifier (when modifier (string-to-modifier modifier)))
116 (acond ((register-p (register-substitutions string))
117 (make-token 'register (list (register-substitutions string)
118 modifier)))
119 ((opcode-p string) (make-token 'opcode (list string modifier)))
120 ((pseudo-op-p string) (make-token 'pseudo-op (list string modifier)))
121 (t (make-token 'symbol string))))
122 ("([A-Za-z0-9_\\\\=@.]+)"
123 (string) (assert string)
124 (acond ((register-p (register-substitutions string))
125 (make-token 'register (list (register-substitutions string) nil)))
126 ((opcode-p string) (make-token 'opcode (list string nil)))
127 ((pseudo-op-p string) (make-token 'pseudo-op (list string nil)))
128 (t (make-token 'symbol string))))
129 ("\"([^\"]*)\"" (string) (make-token 'constant string))
130 ("<<" () (make-token '<< nil))
131 (">>" () (make-token '>> nil))
132 ("<([^<>]*)>" (string) (make-token 'symbol string))
133 ("." () (error "Unknown character.")))
135 ;;;; LEXER HELPERS
137 (defun make-token (symbol value)
138 (list symbol value (copy-lexer-state (first *lexer-states*))))
140 (defun terminal-p (symbol)
141 (member symbol '(open close colon comma hash + - / * or & ^ ~ << >>
142 constant symbol register opcode pseudo-op $)))
144 (defun is-position-info-p (x) (lexer-state-p x))
146 (defun string-to-modifier (string)
147 (let ((start (if (char= (char string 0) #\.) 1 0)))
148 (cond ((string-equal string "b" :start1 start) 'byte)
149 ((string-equal string "w" :start1 start) 'word)
150 ((string-equal string "l" :start1 start) 'long))))
152 (defun maybe-return-$ ()
153 (cond (*lexer-seen-only-whitespace-this-line-p* 'whitespace)
154 (t (setf *lexer-seen-only-whitespace-this-line-p* t)
155 (make-token '$ nil))))