A vast number of cleanups, and rearrangement of code to make OpenMCL work.
[m68k-assembler.git] / lexer.lisp
blob7c240c6cb4e91480610cb39a402a7d72b7c01470
2 (in-package :m68k-assembler)
4 ;;;; TOKEN PARAMETERS
6 (defparameter *lexer-terminals*
7 '(open close colon comma hash + - / * or & ^ ~ << >>
8 constant symbol register opcode pseudo-op $))
10 (defparameter *lexer-single-char-tokens*
11 '((#\( open)
12 (#\) close)
13 (#\: colon)
14 (#\, comma)
15 (#\# hash)
16 (#\+ +) (#\- -) (#\/ /) (#\* *)
17 (#\| or) (#\& &) (#\^ ^) (#\~ ~)))
19 (defun list-char-range (start end)
20 (do ((x (char-code start) (1+ x))
21 (l nil))
22 ((> x (char-code end)) l)
23 (push (code-char x) l)))
25 (defparameter *lexer-word-characters*
26 `(,@(list-char-range #\A #\Z)
27 ,@(list-char-range #\a #\z)
28 ,@(list-char-range #\0 #\9)
29 #\_ #\. #\= #\\ #\@)
30 "Characters permitted in a symbol, register, or opcode.")
31 (defparameter *lexer-int-characters* `(,@(list-char-range #\0 #\9))
32 "Characters permitted in an integer.")
33 (defparameter *lexer-whitespace-characters* '(#\Space #\Tab))
36 ;;;; LEXER BOOKKEEPING
38 (defstruct lexer-state
39 (stream)
40 (filename)
41 (line 1)
42 (column 1))
44 (defvar *lexer-states* nil)
45 ;;; *l-s-o-w-t-l-p* doesn't need to keep multi-file state.
46 (defvar *lexer-seen-only-whitespace-this-line-p*)
48 (defmacro with-lexer ((filename) &body body)
49 `(unwind-protect (progn
50 (init-lexer ,filename)
51 ,@body)
52 (close-lexer)))
54 (defun init-lexer (filename)
55 (setf *lexer-states* nil)
56 (nested-lexing filename))
58 (defun close-lexer ()
59 (do ((s #1=(pop *lexer-states*) #1#))
60 ((null s))
61 (close (lexer-state-stream s))))
63 (defun lexer-next-line ()
64 (setf *lexer-seen-only-whitespace-this-line-p* t
65 (lexer-state-column (first *lexer-states*)) 1)
66 (incf (lexer-state-line (first *lexer-states*))))
68 (defun lexer-next-column ()
69 (incf (lexer-state-column (first *lexer-states*))))
71 (defun nested-lexing (filename)
72 (cond ((find filename *lexer-states* :test #'string-equal
73 :key #'lexer-state-filename)
74 (warn "~A is already in the chain of INCLUDES! Ignoring it..."
75 filename))
77 (setf *lexer-seen-only-whitespace-this-line-p* t)
78 (push (make-lexer-state :stream (open filename)
79 :filename filename)
80 *lexer-states*))))
82 ;;;; INTERMEDIARY LEXING FUNCTIONS (EATERS)
84 (defun eat-whitespace (stream)
85 (do ((next-char #1=(peek-char nil stream) #1#))
86 ((not (find next-char *lexer-whitespace-characters*)))
87 (read-char stream)
88 (lexer-next-column)))
90 (defun eat-string (stream &optional (start #\") (end #\"))
91 "Reads a backslash-escaped string from STREAM, delimited by the
92 characters START and END (which default to quotes)."
93 (assert (eql (read-char stream) start))
94 (lexer-next-column)
95 (do ((next-char #1=(progn (lexer-next-column)
96 (read-char stream)) #1#)
97 (string (make-array '(0) :element-type 'character
98 :adjustable t :fill-pointer 0)))
99 ((eql next-char end) string)
100 (acase next-char
101 (#\\ (vector-push-extend (read-char stream) string)
102 (lexer-next-column))
103 (t (vector-push-extend it string)))))
105 (defconstant +int-conversion-table+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")
106 (defun digit-to-int (char &optional (radix 10))
107 (position char +int-conversion-table+ :test #'char-equal :end radix))
109 (defun eat-integer (stream &optional (radix 10))
110 "If the next character is a digit, read digits until the next
111 character is not a digit."
112 (do ((next-char #1=(peek-char nil stream) #1#) int)
113 ((not (digit-to-int next-char radix)) int)
114 (setf int (+ (* (or int 0) radix)
115 (digit-to-int (read-char stream) radix)))
116 (lexer-next-column)))
118 (defun eat-hex (stream)
119 (assert (eql (read-char stream) #\$))
120 (lexer-next-column)
121 (eat-integer stream 16))
123 (defun eat-binary (stream)
124 (assert (eql (read-char stream) #\%))
125 (lexer-next-column)
126 (eat-integer stream 2))
128 (defun eat-symbol (stream)
129 (do ((next-char #1=(peek-char nil stream) #1#)
130 (symbol (make-array '(0) :element-type 'character
131 :adjustable t :fill-pointer 0)))
132 ((not (find next-char *lexer-word-characters*)) symbol)
133 (lexer-next-column)
134 (vector-push-extend (read-char stream) symbol)))
137 ;;;; MAIN LEXER FUNCTION
139 (defun next-token ()
140 (handler-case
141 (if *lexer-states*
142 (next-token-1 (lexer-state-stream (first *lexer-states*)))
143 (signal 'end-of-file))
144 (end-of-file nil
145 (pop *lexer-states*)
146 (if *lexer-states* (next-token) (signal 'end-of-file)))))
148 (defun next-token-1 (stream)
149 (eat-whitespace stream)
150 (let ((lookahead (peek-char nil stream)))
151 ;; XXX ugly first-column asterix hack
152 (unless (member lookahead '(#\* #\Return #\Newline #\;))
153 (setf *lexer-seen-only-whitespace-this-line-p* nil))
154 (acond
155 ((find lookahead *lexer-single-char-tokens* :key #'car)
156 ;; XXX ugly first-column asterix hack
157 (cond (*lexer-seen-only-whitespace-this-line-p*
158 (read-line stream)
159 (lexer-next-line)
160 (next-token-1 stream))
161 (t (read-char stream)
162 (make-token (cadr it) lookahead))))
163 ;; The order of the following few cases is significant.
164 ((member lookahead *lexer-int-characters*)
165 (make-token 'constant (eat-integer stream)))
166 ((eql lookahead #\$) (make-token 'constant (eat-hex stream)))
167 ((eql lookahead #\%) (make-token 'constant (eat-binary stream)))
168 ((member lookahead *lexer-word-characters*)
169 (let ((token (eat-symbol stream)))
170 (multiple-value-bind (string modifier) (munge-modifier token)
171 (setf modifier (string-to-modifier modifier))
172 (cond ((register-p (register-substitutions string))
173 (make-token 'register (list (register-substitutions string)
174 modifier)))
175 ((opcode-p string) (make-token 'opcode
176 (list string modifier)))
177 ((pseudo-op-p string) (make-token 'pseudo-op
178 (list string modifier)))
179 (t (make-token 'symbol token))))))
180 ((eql lookahead #\") ; string
181 (make-token 'constant (eat-string stream)))
183 ;; Little special cases.
184 ((eql lookahead #\<) ; expect < or macro parameter
185 (read-char stream)
186 (cond ((eql (peek-char nil stream) #\<)
187 (read-char stream)
188 (make-token '<< nil))
189 ;; XXX: one problem here is that we don't deal with
190 ;; escaped <'s inside the string, and I'm not too anxious
191 ;; to do so yet, either.
192 (t (unread-char #\< stream)
193 (make-token 'symbol (eat-string stream #\< #\>)))))
194 ((eql lookahead #\>) ; expect >
195 (read-char stream)
196 (assert (eql (read-char stream) #\>))
197 (make-token '>> nil))
198 ((eql lookahead #\\)
199 ;; if it's a macro parameter (\[1-9A-Za-z] or \@) store it as a
200 ;; symbol ... MACRO will know what to do with it.
201 ;; also there's \symbol and \$symbol but I'm not sure if we'll
202 ;; support them yet.
203 ;; otherwise, it might be a line continuation token. XXX
204 (read-char stream)
205 (when (or (char-equal (peek-char nil stream) #\@)
206 (member (peek-char nil stream) *lexer-word-characters*))
207 (make-token 'symbol (concatenate 'string "\\" (eat-symbol stream)))))
208 ((eql lookahead #\;) ; comment
209 (read-line stream)
210 (maybe-return-$ stream))
211 ((eql lookahead #\Return) ; stupid ^M
212 (read-line stream)
213 (maybe-return-$ stream))
214 ((eql lookahead #\Newline)
215 (read-line stream)
216 (maybe-return-$ stream)))))
218 ;;;; LEXER HELPERS
220 (defun maybe-return-$ (stream)
221 (cond (*lexer-seen-only-whitespace-this-line-p*
222 (lexer-next-line) (next-token-1 stream))
223 (t (lexer-next-line) (make-token '$ nil))))
225 (defun make-token (symbol value)
226 (list symbol value (copy-lexer-state (first *lexer-states*))))
228 (defun terminal-p (symbol) (member symbol *lexer-terminals*))
230 (defun is-position-info-p (x) (lexer-state-p x))
232 (defun string-to-modifier (string)
233 (cond ((string-equal string "b") 'byte)
234 ((string-equal string "w") 'word)
235 ((string-equal string "l") 'long)))