2 (in-package :m68k-assembler
)
6 (defparameter *lexer-terminals
*
7 '(open close colon comma hash
+ -
/ * or
& ^ ~
<< >>
8 constant symbol register opcode pseudo-op $
))
10 (defparameter *lexer-single-char-tokens
*
16 (#\
+ +) (#\- -
) (#\
/ /) (#\
* *)
17 (#\| or
) (#\
& &) (#\^ ^
) (#\~ ~
)))
19 (defun list-char-range (start end
)
20 (do ((x (char-code start
) (1+ x
))
22 ((> x
(char-code end
)) l
)
23 (push (code-char x
) l
)))
25 (defparameter *lexer-word-characters
*
26 `(,@(list-char-range #\A
#\Z
)
27 ,@(list-char-range #\a #\z
)
28 ,@(list-char-range #\
0 #\
9)
30 "Characters permitted in a symbol, register, or opcode.")
31 (defparameter *lexer-int-characters
* `(,@(list-char-range #\
0 #\
9))
32 "Characters permitted in an integer.")
33 (defparameter *lexer-whitespace-characters
* '(#\Space
#\Tab
))
36 ;;;; LEXER BOOKKEEPING
38 (defstruct lexer-state
44 (defvar *lexer-states
* nil
)
45 ;;; *l-s-o-w-t-l-p* doesn't need to keep multi-file state.
46 (defvar *lexer-seen-only-whitespace-this-line-p
*)
48 (defmacro with-lexer
((filename) &body body
)
49 `(unwind-protect (progn
50 (init-lexer ,filename
)
54 (defun init-lexer (filename)
55 (setf *lexer-states
* nil
)
56 (nested-lexing filename
))
59 (do ((s #1=(pop *lexer-states
*) #1#))
61 (close (lexer-state-stream s
))))
63 (defun lexer-next-line ()
64 (setf *lexer-seen-only-whitespace-this-line-p
* t
65 (lexer-state-column (first *lexer-states
*)) 1)
66 (incf (lexer-state-line (first *lexer-states
*))))
68 (defun lexer-next-column ()
69 (incf (lexer-state-column (first *lexer-states
*))))
71 (defun nested-lexing (filename)
72 (cond ((find filename
*lexer-states
* :test
#'string-equal
73 :key
#'lexer-state-filename
)
74 (warn "~A is already in the chain of INCLUDES! Ignoring it..."
77 (setf *lexer-seen-only-whitespace-this-line-p
* t
)
78 (push (make-lexer-state :stream
(open filename
)
82 ;;;; INTERMEDIARY LEXING FUNCTIONS (EATERS)
84 (defun eat-whitespace (stream)
85 (do ((next-char #1=(peek-char nil stream
) #1#))
86 ((not (find next-char
*lexer-whitespace-characters
*)))
90 (defun eat-string (stream &optional
(start #\") (end #\"))
91 "Reads a backslash-escaped string from STREAM, delimited by the
92 characters START and END (which default to quotes)."
93 (assert (eql (read-char stream
) start
))
95 (do ((next-char #1=(progn (lexer-next-column)
96 (read-char stream
)) #1#)
97 (string (make-array '(0) :element-type
'character
98 :adjustable t
:fill-pointer
0)))
99 ((eql next-char end
) string
)
101 (#\\ (vector-push-extend (read-char stream
) string
)
103 (t (vector-push-extend it string
)))))
105 (defconstant +int-conversion-table
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")
106 (defun digit-to-int (char &optional
(radix 10))
107 (position char
+int-conversion-table
+ :test
#'char-equal
:end radix
))
109 (defun eat-integer (stream &optional
(radix 10))
110 "If the next character is a digit, read digits until the next
111 character is not a digit."
112 (do ((next-char #1=(peek-char nil stream
) #1#) int
)
113 ((not (digit-to-int next-char radix
)) int
)
114 (setf int
(+ (* (or int
0) radix
)
115 (digit-to-int (read-char stream
) radix
)))
116 (lexer-next-column)))
118 (defun eat-hex (stream)
119 (assert (eql (read-char stream
) #\$
))
121 (eat-integer stream
16))
123 (defun eat-binary (stream)
124 (assert (eql (read-char stream
) #\%
))
126 (eat-integer stream
2))
128 (defun eat-symbol (stream)
129 (do ((next-char #1=(peek-char nil stream
) #1#)
130 (symbol (make-array '(0) :element-type
'character
131 :adjustable t
:fill-pointer
0)))
132 ((not (find next-char
*lexer-word-characters
*)) symbol
)
134 (vector-push-extend (read-char stream
) symbol
)))
137 ;;;; MAIN LEXER FUNCTION
142 (next-token-1 (lexer-state-stream (first *lexer-states
*)))
143 (signal 'end-of-file
))
146 (if *lexer-states
* (next-token) (signal 'end-of-file
)))))
148 (defun next-token-1 (stream)
149 (eat-whitespace stream
)
150 (let ((lookahead (peek-char nil stream
)))
151 ;; XXX ugly first-column asterix hack
152 (unless (member lookahead
'(#\
* #\Return
#\Newline
#\
;))
153 (setf *lexer-seen-only-whitespace-this-line-p
* nil
))
155 ((find lookahead
*lexer-single-char-tokens
* :key
#'car
)
156 ;; XXX ugly first-column asterix hack
157 (cond (*lexer-seen-only-whitespace-this-line-p
*
160 (next-token-1 stream
))
161 (t (read-char stream
)
162 (make-token (cadr it
) lookahead
))))
163 ;; The order of the following few cases is significant.
164 ((member lookahead
*lexer-int-characters
*)
165 (make-token 'constant
(eat-integer stream
)))
166 ((eql lookahead
#\$
) (make-token 'constant
(eat-hex stream
)))
167 ((eql lookahead
#\%
) (make-token 'constant
(eat-binary stream
)))
168 ((member lookahead
*lexer-word-characters
*)
169 (let ((token (eat-symbol stream
)))
170 (multiple-value-bind (string modifier
) (munge-modifier token
)
171 (setf modifier
(string-to-modifier modifier
))
172 (cond ((register-p (register-substitutions string
))
173 (make-token 'register
(list (register-substitutions string
)
175 ((opcode-p string
) (make-token 'opcode
176 (list string modifier
)))
177 ((pseudo-op-p string
) (make-token 'pseudo-op
178 (list string modifier
)))
179 (t (make-token 'symbol token
))))))
180 ((eql lookahead
#\") ; string
181 (make-token 'constant
(eat-string stream
)))
183 ;; Little special cases.
184 ((eql lookahead
#\
<) ; expect < or macro parameter
186 (cond ((eql (peek-char nil stream
) #\
<)
188 (make-token '<< nil
))
189 ;; XXX: one problem here is that we don't deal with
190 ;; escaped <'s inside the string, and I'm not too anxious
191 ;; to do so yet, either.
192 (t (unread-char #\
< stream
)
193 (make-token 'symbol
(eat-string stream
#\
< #\
>)))))
194 ((eql lookahead
#\
>) ; expect >
196 (assert (eql (read-char stream
) #\
>))
197 (make-token '>> nil
))
199 ;; if it's a macro parameter (\[1-9A-Za-z] or \@) store it as a
200 ;; symbol ... MACRO will know what to do with it.
201 ;; also there's \symbol and \$symbol but I'm not sure if we'll
203 ;; otherwise, it might be a line continuation token. XXX
205 (when (or (char-equal (peek-char nil stream
) #\
@)
206 (member (peek-char nil stream
) *lexer-word-characters
*))
207 (make-token 'symbol
(concatenate 'string
"\\" (eat-symbol stream
)))))
208 ((eql lookahead
#\
;) ; comment
210 (maybe-return-$ stream
))
211 ((eql lookahead
#\Return
) ; stupid ^M
213 (maybe-return-$ stream
))
214 ((eql lookahead
#\Newline
)
216 (maybe-return-$ stream
)))))
220 (defun maybe-return-$
(stream)
221 (cond (*lexer-seen-only-whitespace-this-line-p
*
222 (lexer-next-line) (next-token-1 stream
))
223 (t (lexer-next-line) (make-token '$ nil
))))
225 (defun make-token (symbol value
)
226 (list symbol value
(copy-lexer-state (first *lexer-states
*))))
228 (defun terminal-p (symbol) (member symbol
*lexer-terminals
*))
230 (defun is-position-info-p (x) (lexer-state-p x
))
232 (defun string-to-modifier (string)
233 (cond ((string-equal string
"b") 'byte
)
234 ((string-equal string
"w") 'word
)
235 ((string-equal string
"l") 'long
)))