Fixed the new lexer.
[m68k-assembler.git] / assembler.lisp
blobab6d331bbe78a77292066ea50508276696d65739
2 (in-package :m68k-assembler)
4 ;;;; "MAIN" STUFF
6 (defun assemble (input-name &key (object-name "mr-ed.o")
7 (object-format :aout))
8 "Assembles a source file named by INPUT-NAME, to an object file
9 named by OBJECT-NAME.
11 OBJECT-FORMAT can currently only be :AOUT for relocatable a.out object
12 files."
13 (wipe-symbol-table)
14 (setf ;; create backpatch list
15 *backpatch-list* nil
16 ;; init macro variables
17 *defining-macro-p* nil *defining-rept-p* nil
18 *defining-conditional-compilation-p* nil
19 ;; last label seen
20 *last-label* nil
21 *sections* nil)
22 ;; open file and start processing
23 (with-lexer (input-name)
24 (with-sections ('text 'data 'bss)
25 (setf *current-section* (cdr (assoc 'text *sections*)))
26 (process-current-file)
27 ;; Prior to backpatching, record section lengths, because
28 ;; temporary-files aren't really file streams the way CL would
29 ;; like them to be, so we can't just FILE-LENGTH them.
30 (let ((lengths (mapcar (lambda (x)
31 (cons (car x)
32 (section-length (cdr x))))
33 *sections*)))
34 (backpatch *backpatch-list*)
35 (ecase object-format
36 (:aout (finalize-object-file object-name lengths)))))))
40 (defun process-current-file ()
41 (handler-case
42 (loop
43 ;; we strip individual token position information here, which
44 ;; might suck for superfine debugging/warning precision, but
45 ;; we really don't need all that clutter.
46 (multiple-value-bind (line *source-position*)
47 (unclutter-line (parse #'next-token))
48 (assert (eq (pop line) 'line) ()
49 "Some internal parse tree is messed up.")
51 (using-section (*current-section*)
52 ;; if we're in a macro or repeat, just accumulate this
53 ;; line. otherwise, process it.
54 (cond ((or *defining-macro-p*
55 *defining-rept-p*
56 *defining-conditional-compilation-p*)
57 (if (and (eq (operation-type-of-line line) 'pseudo-op)
58 (string-equal (opcode-of-line line)
59 (cond (*defining-macro-p* "ENDM")
60 (*defining-rept-p* "ENDR")
61 (*defining-conditional-compilation-p* "ENDC")
62 (t (error "Bad block!")))))
63 (process-line line)
64 (push line *macro-buffer*)))
65 (t (process-line line))))))
66 (end-of-file nil)))
68 (defun operation-type-of-line (line)
69 (awhen (cadr (find 'operation line :key #'car)) (car it)))
71 (defun opcode-of-line (line)
72 (awhen (cadr (find 'operation line :key #'car)) (caadr it)))
74 (defun process-line (line)
75 (let ((label (find 'label line :key #'car))
76 (operation (cadr (find 'operation line :key #'car)))
77 (operands (mapcar #'simplify-operand
78 (extract-operands line))))
79 (cond ((eq (operation-type-of-line line) 'opcode)
80 (assemble-opcode label operation operands))
81 ((eq (operation-type-of-line line) 'pseudo-op)
82 (assemble-pseudo-op label operation operands))
84 (handle-label-normally label)))))
86 (defun handle-label-normally (label)
87 (when label
88 (let ((name (extract-string-from-tree label)))
89 (when (not (local-label-name-p name))
90 (setf *last-label* name))
91 ;; If the type is nil, the symbol isn't present.
92 (cond ((null (get-symbol-type name))
93 (add-to-symbol-table name *program-counter*))
94 ;; If the value is nil, it was probably declared as a
95 ;; global before it was actually defined. Patch value,
96 ;; type, debug-info.
97 ((null (get-symbol-value name))
98 (let ((sym (get-asm-symbol name)))
99 (assert (not (eq (asm-symbol-type sym) 'extern))
100 (name) "Trying to set a value for an EXTERN symbol!")
101 (setf (asm-symbol-value sym) *program-counter*
102 (asm-symbol-type sym) (section-name *current-section*)
103 (asm-symbol-debug-info sym) *source-position*)))
104 ;; Otherwise, it's a redefinition and I won't stand for it.
105 (t (warn "~A: tried to redefine label ~A -- ignoring."
106 *source-position* name))))))
108 (defun assemble-opcode (label operation operands)
109 (let ((opcode (caadr operation))
110 (modifier (cadadr operation)))
111 (handle-label-normally label)
112 (aif (find-matching-entry opcode operands modifier)
113 (let ((*defining-relocations-p* t))
114 (multiple-value-bind (data len)
115 (generate-code (second it) operands modifier)
116 (when (consp data)
117 (push (make-backpatch-item data len) *backpatch-list*)
118 ;; XXX m68kism.
119 (setf data #x4E714E71)) ; Output NOP if all else fails.
120 (output-data data len)))
121 (error "~S: no matching opcode for ~A!" *source-position*
122 opcode))))
125 (defun assemble-pseudo-op (label operation operands)
126 (let ((opcode (caadr operation))
127 (modifier (cadadr operation)))
128 (let ((*defining-relocations-p* t))
129 (acond ((get-pseudo-op opcode)
130 (when (functionp it)
131 (funcall it label opcode operands modifier)))
132 ((eql (get-symbol-type opcode) 'macro)
133 (execute-macro opcode operands modifier))
134 (t (error "~&~S: bad pseudo-op ~A!"
135 *source-position* opcode))))))
138 (defun output-data (data length)
139 "Outputs DATA in big-endian order to the currently active temporary
140 object file and updates the program counter. Returns the address to
141 which the data assembled."
142 #+nil (unless (zerop (mod length 8))
143 (setf length (ash (ceiling (/ length 8)) 3)))
144 (when (eq (section-name *current-section*) 'text)
145 (unless (zerop (mod length 16))
146 (format t "~&~A: ~A" *source-position* length)))
147 (incf *program-counter* (ash length -3))
148 (funcall (section-output-fn *current-section*)
149 *object-stream* data length))
152 ;;;; MACROS
154 ;; Devpac macros -- when we see a macro start, collect until the ENDM,
155 ;; so we can expand ourselves.
157 (defstruct asm-macro
158 (count 0)
159 (body))
161 (defun define-equate (label op operands modifier)
162 (declare (ignore op modifier))
163 (unless label
164 (error "~A: EQU always needs a label." *source-position*))
165 (assert (= (length operands) 1))
166 (let ((*defining-relocations-p* nil))
167 (add-to-symbol-table label (absolute-value (first operands))
168 :type 'absolute)))
170 (defun execute-macro (name operands modifier)
171 (labels ((sub (match &rest registers)
172 (declare (ignore registers))
173 (acase (char match 1)
174 (#\@ (format nil "_~D"
175 (asm-macro-count (get-symbol-value name))))
176 (#\0 (case modifier (byte ".B") (long ".L") (t ".W")))
177 ;; XXX ugly hack.
178 (t (extract-string-from-tree (nth (1- (digit-char-p it 36))
179 operands)))))
181 (seek+destroy (tree)
182 (typecase tree
183 (string
184 (cl-ppcre:regex-replace-all "\\\\[0-9A-Za-z@]" tree #'sub
185 :simple-calls t))
186 (cons (mapcar #'seek+destroy tree))
187 (t tree))))
189 (dolist (x (asm-macro-body (get-symbol-value name)))
190 (process-line (seek+destroy x)))
191 (incf (asm-macro-count (get-symbol-value name)))))
193 (defun start-macro (label op operands modifier)
194 (declare (ignore op operands modifier))
195 ;; if *defining-macro-p* is already set cry foul.
196 (assert (not *defining-macro-p*))
197 (assert label)
198 ;; otherwise clear out *macro-buffer*.
199 (setf *macro-buffer* (list (second label))
200 *defining-macro-p* t))
202 (defun end-macro (label op operands modifier)
203 (declare (ignore label op operands modifier))
204 (assert *defining-macro-p*)
205 (setf *macro-buffer* (nreverse *macro-buffer*)
206 *defining-macro-p* nil)
207 (add-to-symbol-table (first *macro-buffer*)
208 (make-asm-macro :body (cdr *macro-buffer*))
209 :type 'macro))
212 ;;;; BACKPATCHING
214 ;; backpatch structure
215 (defstruct backpatch
216 (template)
217 (length)
218 (program-counter)
219 (section)
220 (file-position)
221 (last-label)
222 (source-position))
224 (defun make-backpatch-item (data length)
225 (make-backpatch :template data :length length
226 :program-counter *program-counter*
227 :section *current-section*
228 :file-position (file-position
229 (section-object-stream *current-section*))
230 :last-label *last-label*
231 :source-position *source-position*))
233 (eval-when (:compile-toplevel :load-toplevel :execute)
234 (defmacro with-backpatch ((item) &body body)
235 `(let ((*program-counter* (backpatch-program-counter ,item))
236 (*current-section* (backpatch-section ,item))
237 (*last-label* (backpatch-last-label ,item))
238 (*source-position* (backpatch-source-position ,item))
239 (*object-stream* (section-object-stream (backpatch-section ,item))))
240 ,@body)))
242 (defun backpatch (backpatch-list)
243 ;; go through backpatch list, try to make all patches
244 (dolist (x backpatch-list)
245 (using-section ((backpatch-section x))
246 (with-backpatch (x)
247 (let ((*defining-relocations-p* t))
248 (multiple-value-bind (data len)
249 (generate-code (backpatch-template x) nil nil)
250 (when (consp data)
251 (error "~A: Failed to backpatch @ ~A: ~S."
252 *source-position* *program-counter* data))
253 (assert (= len (backpatch-length x)))
254 (assert (file-position (section-object-stream *current-section*)
255 (backpatch-file-position x)))
256 (output-data data len)))))))
259 ;;;; RELOCATION
261 ;; relocation info
262 (defstruct relocation
263 (address)
264 (size nil)
265 (extern-p nil)
266 (pc-relative-p nil)
267 ;; only one of symbol or segment can be selected at a time, so the
268 ;; symbol field provides both functions.
269 (symbol))
271 (defun relocation-segment (r) (relocation-symbol r))
272 (defun (setf relocation-segment) (v r) (setf (relocation-segment r) v))
275 (defun figure-out-reloc (symbol pc)
276 (let* ((sym (get-asm-symbol symbol))
277 (extern-p (eq (asm-symbol-type sym) 'extern)))
278 (make-relocation :address pc :extern-p extern-p
279 :symbol (if extern-p
280 (asm-symbol-name sym)
281 (asm-symbol-type sym)))))
283 (defun check-reloc-consistency (reloc-a reloc-b)
284 (assert (every (lambda (fn)
285 (eql (funcall fn reloc-a)
286 (funcall fn reloc-b)))
287 (list #'relocation-address
288 ;; XXX relocation-size
289 #'relocation-symbol
290 #'relocation-extern-p))))
292 (defun add-relocation (symbol)
293 (when (and *defining-relocations-p*
294 (not (eq (get-symbol-type symbol) 'absolute)))
295 (let ((reloc (figure-out-reloc symbol *program-counter*)))
296 (sif (gethash *program-counter* *relocation-table*)
297 (check-reloc-consistency reloc it)
298 (setf it reloc)))))
300 (defun pc-relativise-relocation ()
301 "If there's a relocation at this PC, mark it PC-relative."
302 (swhen (gethash *program-counter* *relocation-table*)
303 ;; If the symbol is not extern and segment is same as current
304 ;; section, then delete from relocation table (no need to relocate
305 ;; same-section relative references within the same file).
306 (if (or (relocation-extern-p it)
307 (not (eq (relocation-segment it)
308 (section-name *current-section*))))
309 (setf (relocation-pc-relative-p it) t)
310 (remhash *program-counter* *relocation-table*))))
312 (defun fix-relocation-size (size)
313 (swhen (gethash *program-counter* *relocation-table*)
314 (setf (relocation-size it) size)))
317 ;;;; HELPER FUNCTIONS
319 (defun extract-string-from-tree (tree)
320 "Return the first string we come to in TREE."
321 (tree-find-if #'stringp tree))
323 (defun resolve-expression (expression)
324 "Try and get a numerical value from this expression. Returns the
325 further simplified expression, which will be an atom if everything was
326 resolved."
327 (flet ((bin-op (sym fn)
328 (assert (= (length expression) 3))
329 (let ((a (resolve-expression (second expression)))
330 (b (resolve-expression (third expression))))
331 (if (and (integerp a) (integerp b))
332 (funcall fn a b)
333 (list sym a b)))))
334 (if (atom expression)
335 (typecase expression
336 (integer expression)
337 (string (string->int expression))
338 (t (error "Unknown expression atom: ~A" expression)))
339 (case (car expression)
340 (+ (bin-op '+ #'+))
341 (- (if (= (length expression) 2)
342 (let ((v (resolve-expression (second expression))))
343 (if (integerp v) (- v) expression))
344 (bin-op '- #'-)))
345 (* (bin-op '* #'*))
346 (/ (bin-op '/ #'/))
347 (& (bin-op '& #'logand))
348 (or (bin-op 'or #'logior))
349 (^ (bin-op '^ #'logxor))
350 (<< (bin-op '<< #'ash))
351 (>> (bin-op '>> #'(lambda (n count) (ash n (- count)))))
352 (~ (let ((v (resolve-expression (second expression))))
353 (if (integerp v)
354 (lognot v)
355 expression)))
356 (symbol (acond ((get-symbol-value expression)
357 (add-relocation expression)
359 (t expression)))
360 ;;; XXX if we got a constant, it's a bug. That should have
361 ;;; been picked out at the AST stage.
362 (constant (resolve-expression (second expression)))
363 (t expression)))))
366 ;;;; EOF assembler.lisp