a.out object file output (mostly).
[m68k-assembler.git] / assembler.lisp
blob48cba22ab61a4d2fc37c018f388adaf12970a0a3
2 (in-package :m68k-assembler)
4 ;;;; PSEUDO-OPS
6 (defparameter *asm-pseudo-op-table*
7 `(("SECTION" ,(lambda (label op operands modifier)
8 (declare (ignore op modifier))
9 (handle-label-normally label)
10 (assert (and (eql (caar operands) 'absolute)
11 (eql (caadar operands) 'symbol)))
12 (let ((section (intern (cadar (cdar operands))
13 (find-package "M68K-ASSEMBLER"))))
14 (assert (assoc section *object-streams*))
15 (setf *current-section* section))))
16 ("INCLUDE"
17 ,(lambda (label op operands modifier)
18 (declare (ignore op modifier))
19 (handle-label-normally label)
20 (nested-lexing (extract-string-from-operand (first operands)))))
21 ("INCBIN"
22 ,(lambda (label op operands modifier)
23 (declare (ignore op modifier))
24 (handle-label-normally label)
25 (with-open-file (stream (extract-string-from-operand
26 (first operands))
27 :direction :input
28 :element-type 'unsigned-byte)
29 (copy-stream-contents stream (current-obj-stream))
30 (incf *program-counter* (file-position stream)))))
31 ;; Note: not DevPAC.
32 ("ALIGN" ,(lambda (label op operands modifier)
33 (declare (ignore op modifier))
34 (handle-label-normally label)
35 (let ((align (absolute-value (first operands))))
36 (do ()
37 ((zerop (mod *program-counter* align)))
38 (output-data 0 8)))))
39 ("EVEN" ,(lambda (label op operands modifier)
40 (declare (ignore op modifier operands))
41 (handle-label-normally label)
42 (unless (evenp *program-counter*)
43 (output-data 0 8))))
44 ("CNOP") ; offset,align -- offset+(PC+align-1)&~(align-1)
45 ("MACRO"
46 ,(lambda (label op operands modifier)
47 (declare (ignore op operands modifier))
48 ;; if *defining-macro-p* is already set, cry foul.
49 (assert (not *defining-macro-p*))
50 (assert label)
51 ;; otherwise, clear out *macro-buffer*.
52 (setf *macro-buffer* (list (second label))
53 *defining-macro-p* t)))
54 ("ENDM"
55 ,(lambda (label op operands modifier)
56 (declare (ignore label op operands modifier))
57 (assert *defining-macro-p*)
58 (setf *macro-buffer* (nreverse *macro-buffer*)
59 *defining-macro-p* nil)
60 (add-to-symbol-table (first *macro-buffer*)
61 (list 0 (cdr *macro-buffer*))
62 :type 'macro)))
63 ("REPT"
64 ,(lambda (label op operands modifier)
65 (declare (ignore label op modifier))
66 (assert (not *defining-rept-p*))
67 (setf *macro-buffer* (list (absolute-value (first operands)))
68 *defining-rept-p* t)))
69 ("ENDR"
70 ,(lambda (label op operands modifier)
71 (declare (ignore label op operands modifier))
72 (assert *defining-rept-p*)
73 (setf *macro-buffer* (nreverse *macro-buffer*)
74 *defining-rept-p* nil)
75 (dotimes (i (pop *macro-buffer*))
76 (dolist (x *macro-buffer*)
77 (process-line x)))))
78 ("DC" ,(lambda (label op operands modifier)
79 (declare (ignore op))
80 (handle-label-normally label)
81 (unless modifier (setf modifier 'word))
82 (dolist (x operands)
83 (let ((data (absolute-value x))
84 (length (ecase modifier (byte 8) (word 16) (long 32))))
85 (when (consp data)
86 (push (make-backpatch-item
87 `((,length (absolute-value ,data)))
88 length)
89 *backpatch-list*)
90 (setf data #x4E714E71))
91 (output-data data length)))))
92 ("DS" ,(lambda (label op operands modifier)
93 (declare (ignore op))
94 (handle-label-normally label)
95 (unless modifier (setf modifier 'word))
96 (assert (eql (car (first operands)) 'absolute))
97 (let ((data (absolute-value (first operands)))
98 (length (ecase modifier (byte 8) (word 16) (long 32))))
99 (unless (integerp data)
100 (error "Error at ~A: Need to be able to resolve DS immediately." *source-position*))
101 (output-data 0 (* data length)))))
102 ("DCB") ; constant block -- number,value
103 ("EQU" #'define-equate)
104 ("=" #'define-equate)
105 ;; EQUR ? (register equate)
106 ;; IFEQ etc etc
107 ("XDEF")
108 ("XREF")
109 ("ORG" ,(lambda (label op operands modifier)
110 (declare (ignore label op modifier))
111 (assert (eql (car (first operands)) 'absolute))
112 (setf *program-counter* (absolute-value (first operands)))))
113 ("END")))
115 ;;;; MACROS
117 ;; Devpac macros -- when we see a macro start, collect until the ENDM,
118 ;; so we can expand ourselves. Note that macros have their names
119 ;; stored in a separate symbol table, for devpac compatibility.
121 (defvar *defining-macro-p* nil)
122 (defvar *defining-rept-p* nil)
123 (defvar *macro-buffer*)
125 (defun define-equate (label op operands modifier)
126 (declare (ignore op modifier))
127 (unless label
128 (error "~A: EQU always needs a label." *source-position*))
129 ;; XXX should probably check operands length etc
130 (add-to-symbol-table label (resolve-expression (first operands))
131 :type 'absolute))
133 (defun macro-count (macro)
134 (first macro))
135 (defun macro-body (macro)
136 (second macro))
137 (defun (setf macro-count) (value macro)
138 (setf (first macro) value))
141 (defun execute-macro (name operands modifier)
142 (labels ((sub (match &rest registers)
143 (declare (ignore registers))
144 (acase (char match 1)
145 (#\@ (format nil "_~D"
146 (macro-count (get-symbol-value name))))
147 (#\0 (case modifier
148 (byte ".B")
149 (long ".L")
150 ((word t) ".W")))
151 (t (nth (digit-to-int it 36) operands))))
152 (seek+destroy (tree)
153 (cond ((stringp tree)
154 (cl-ppcre:regex-replace-all "\\\\[0-9A-Za-z@]"
155 tree #'sub
156 :simple-calls t))
157 ((consp tree)
158 (let ((new-tree nil))
159 (dolist (branch tree)
160 (push (seek+destroy branch) new-tree))
161 (nreverse new-tree)))
162 (t tree))))
163 (dolist (x (macro-body (get-symbol-value name)))
164 (process-line (seek+destroy x)))
165 (incf (macro-count (get-symbol-value name)))))
168 ;;;; SYMBOL TABLE
170 (defvar *symbol-table* nil)
172 (defun maybe-local-label (sym)
173 (when (and (plusp (length sym)) (eql (char sym 0) #\.))
174 (concatenate 'string *last-label* sym)))
176 (defun add-to-symbol-table (sym value &key (type 'relative))
177 (cond ((consp sym)
178 (when (eql (first sym) 'label) (setf sym (second sym)))
179 (assert (eql (first sym) 'symbol))
180 (when (eql type 'relative)
181 (aif (maybe-local-label (second sym))
182 (setf (second sym) it)
183 (setf *last-label* (second sym))))
184 (setf (gethash (second sym) *symbol-table*)
185 (list type value *source-position*)))
186 (t (error "Not sure how to handle this symbol: ~A." sym))))
188 (defun get-symbol-value (sym)
189 (when (consp sym)
190 (when (eql (first sym) 'label) (setf sym (second sym)))
191 (setf sym (second sym)))
192 (awhen (or (gethash sym *symbol-table*)
193 (gethash (maybe-local-label sym) *symbol-table*))
194 (second it)))
196 (defun get-symbol-type (sym)
197 (when (consp sym)
198 (when (eql (first sym) 'label) (setf sym (second sym)))
199 (setf sym (second sym)))
200 (awhen (or (gethash sym *symbol-table*)
201 (gethash (maybe-local-label sym) *symbol-table*))
202 (first it)))
204 (defun asm-symbol-text (sym)
205 (if (eql (car sym) 'absolute)
206 (second (second sym))
207 (second sym)))
209 ;;;; BACKPATCHING
211 (defvar *backpatch-list* nil)
213 (defun make-backpatch-item (data length)
214 (list data length *program-counter*
215 *current-section* (file-position (current-obj-stream))
216 *last-label* *source-position*))
218 (defun backpatch ()
219 ;; go through backpatch list, try to make all patches
220 (dolist (x *backpatch-list*)
221 (destructuring-bind (template length *program-counter*
222 *current-section*
223 file-position *last-label*
224 *source-position*) x
225 (multiple-value-bind (data len) (generate-code template nil nil)
226 (when (consp data)
227 (error "Failed to backpatch @ ~A/~A: ~S."
228 *program-counter* *source-position* data))
229 (assert (= len length))
230 (assert (file-position (current-obj-stream) file-position))
231 (output-data data len)))))
234 ;;;; HELPER FUNCTIONS
236 (defun extract-string-from-operand (operand)
237 (dolist (x operand)
238 (cond ((stringp x) (return x))
239 ((consp x)
240 (awhen (extract-string-from-operand operand)
241 (return it))))))
243 (defun resolve-expression (expression)
244 "Try and get a numerical value from this expression. Returns the
245 further simplified expression, which will be an atom if everything was
246 resolved."
247 (flet ((bin-op (sym fn)
248 (assert (= (length expression) 3))
249 (let ((a (resolve-expression (second expression)))
250 (b (resolve-expression (third expression))))
251 (if (and (integerp a) (integerp b))
252 (funcall fn a b)
253 (list sym a b)))))
254 (if (atom expression)
255 (cond ((integerp expression) expression)
256 ((stringp expression) (string->int expression))
257 (t (error "Unknown expression atom: ~A" expression)))
258 (case (car expression)
259 (+ (bin-op '+ #'+))
260 (- (bin-op '- #'-))
261 (* (bin-op '* #'*))
262 (/ (bin-op '/ #'/))
263 (& (bin-op '& #'logand))
264 (or (bin-op 'or #'logior))
265 (^ (bin-op '^ #'logxor))
266 (<< (bin-op '<< #'ash))
267 (>> (bin-op '>> #'(lambda (n count) (ash n (- count)))))
268 (~ (aif (resolve-expression (second expression))
269 (lognot it)
270 expression))
271 (symbol (aif (get-symbol-value expression)
273 expression))
274 (t expression)))))
277 ;;;; "MAIN" STUFF
279 ;;; XXX should this be maintained by the lexer instead?
280 (defvar *file-list* nil
281 "Stack of file names that are actively open; checked by INCLUDE to
282 determine whether recursive nesting is occuring.")
284 (defvar *source-position*)
286 (defvar *object-streams*)
287 (defvar *program-counter*)
288 (defvar *last-label* nil "Last label seen by the assembler. This is
289 used for generating the prefix for local labels.")
290 (defvar *current-section* 'text "Current section we're assembling
291 in.")
293 (defmacro with-object-streams (sections &body body)
294 (if sections
295 (let ((symbol-of-the-day (gensym)))
296 `(osicat:with-temporary-file (,symbol-of-the-day
297 :element-type 'unsigned-byte)
298 (push (cons ,(car sections) ,symbol-of-the-day) *object-streams*)
299 (with-object-streams ,(cdr sections) ,@body)))
300 `(progn ,@body)))
303 (defun current-obj-stream ()
304 (cdr (assoc *current-section* *object-streams*)))
306 (defun assemble (input-name &key (object-name "mr-ed.o"))
307 ;; create symbol table
308 (setf *symbol-table* (make-hash-table :test 'equal)
309 ;; init program counter
310 *program-counter* 0
311 ;; create backpatch list
312 *backpatch-list* nil
313 ;; init macro variables
314 *defining-macro-p* nil *defining-rept-p* nil
315 ;; last label seen
316 *last-label* nil
317 *current-section* 'text
318 *object-streams* nil)
319 ;; open file and start processing
320 (with-lexer (input-name)
321 ;; Note that we create a file for BSS even though it should all be
322 ;; zeros just for my utterly lazy convenience. It's wasteful, but
323 ;; I don't feel like putting in all kinds of hideous special cases
324 ;; just right now.
325 (with-object-streams ('text 'data 'bss)
326 (process-current-file)
327 ;; Prior to backpatching, record section lengths, because
328 ;; temporary-files aren't really file streams the way CL would
329 ;; like them to be, so we can't just FILE-LENGTH them.
330 (let ((lengths (mapcar (lambda (x)
331 (cons (car x) (file-position (cdr x))))
332 *object-streams*)))
333 (backpatch)
334 (finalize-object-file object-name lengths)))))
337 (defun finalize-object-file (name section-lengths)
338 "Finalize object file (collect sections, write symbol table,
339 patch header)."
340 (with-open-file (output-stream name :direction :output
341 :element-type 'unsigned-byte
342 :if-exists :new-version
343 :if-does-not-exist :create)
344 (write-big-endian-data output-stream #x53544F26 32) ; Magic
345 ;; Text, Data, BSS
346 (mapcar (lambda (x)
347 (write-big-endian-data output-stream (cdr x) 32))
348 section-lengths)
349 ;; symbol table
350 (write-big-endian-data output-stream 0 32)
351 ;; entry point
352 (write-big-endian-data output-stream 0 32)
353 ;; text reloc size
354 (write-big-endian-data output-stream 0 32)
355 ;; data reloc size
356 (write-big-endian-data output-stream 0 32)
358 (copy-stream-contents (cdr (assoc 'text *object-streams*))
359 output-stream)
360 (copy-stream-contents (cdr (assoc 'data *object-streams*))
361 output-stream)
362 ;; symbol table
363 ;; relocations
367 (defun process-current-file ()
368 (handler-case
369 (loop
370 ;; we strip individual token position information here, which
371 ;; might suck for superfine debugging/warning precision, but
372 ;; we really don't need all that clutter.
373 (multiple-value-bind (line *source-position*)
374 (unclutter-line (parse #'next-token))
375 (assert (eql (pop line) 'line))
377 ;; if we're in a macro or repeat, accumulate this line.
378 (cond (*defining-macro-p*
379 (if (and (eql (operation-type-of-line line) 'pseudo-op)
380 (string-equal (opcode-of-line line) "ENDM"))
381 (process-line line)
382 (push line *macro-buffer*)))
383 (*defining-rept-p*
384 (if (and (eql (operation-type-of-line line) 'pseudo-op)
385 (string-equal (opcode-of-line line) "ENDR"))
386 (process-line line)
387 (push line *macro-buffer*)))
388 (t (process-line line))))) ; otherwise, process it.
389 (end-of-file nil (format t "~&all done now."))))
391 (defun operation-type-of-line (line)
392 (awhen (cadr (find 'operation line :key #'car))
393 (car it)))
395 (defun opcode-of-line (line)
396 (awhen (cadr (find 'operation line :key #'car))
397 (caadr it)))
399 (defun process-line (line)
400 (let ((label (find 'label line :key #'car))
401 (operation (cadr (find 'operation line :key #'car)))
402 (operands (mapcar #'simplify-operand
403 (extract-operands line))))
404 (cond ((eql (operation-type-of-line line) 'opcode)
405 (assemble-opcode label operation operands))
406 ((eql (operation-type-of-line line) 'pseudo-op)
407 (assemble-pseudo-op label operation operands))
409 (when label
410 ;; might be a macro or something... see if it
411 ;; exists, first. and complain about
412 ;; redefinition, except in the case of locals.
413 (if (get-symbol-type label)
414 (warn "~&~S: tried to redefine ~A (cowardly not allowing this)." *source-position* label)
415 (handle-label-normally label)))))))
417 (defun handle-label-normally (label)
418 (when (and label (null (get-symbol-type label)))
419 (add-to-symbol-table (second label) *program-counter*)))
421 (defun assemble-opcode (label operation operands)
422 (let ((opcode (caadr operation))
423 (modifier (cadadr operation)))
424 (handle-label-normally label)
425 (awhen (find-matching-entry opcode operands modifier)
426 (multiple-value-bind (data len)
427 (generate-code (second it) operands modifier)
428 (when (consp data)
429 (push (make-backpatch-item data len) *backpatch-list*)
430 (setf data #x4E714E71)) ; Output NOP if all else fails.
431 (output-data data len)))))
434 (defun assemble-pseudo-op (label operation operands)
435 (let ((opcode (caadr operation))
436 (modifier (cadadr operation)))
437 (acond ((find opcode *asm-pseudo-op-table*
438 :key #'car :test #'string-equal)
439 (when (functionp (second it))
440 (funcall (second it) label opcode operands modifier)))
441 ((eql (get-symbol-type opcode) 'macro)
442 (execute-macro opcode operands modifier))
443 (t (error "~&~S: bad pseudo-op ~A!" *source-position* opcode)))))
446 (defun output-data (data length)
447 "Outputs DATA in big-endian order to the currently active temporary
448 object file and updates the program counter. Returns the address to
449 which the data assembled."
450 #+nil (unless (zerop (mod length 8))
451 (setf length (ash (ceiling (/ length 8)) 3)))
452 (incf *program-counter* (ash length -3))
453 (write-big-endian-data (current-obj-stream) data length))
456 ;;;; EOF assembler.lisp