Various little a.out output-related bugfixes.
[m68k-assembler.git] / assembler.lisp
blob6e856b853cbc67ba807bcd7870f24f72f1191733
2 (in-package :m68k-assembler)
4 ;;;; PSEUDO-OPS
6 (eval-when (:compile-toplevel :load-toplevel)
7 (defparameter *asm-pseudo-op-table*
8 `(("SECTION" ,(lambda (label op operands modifier)
9 (declare (ignore op modifier))
10 (handle-label-normally label)
11 (assert (and (eql (caar operands) 'absolute)
12 (eql (caadar operands) 'symbol)))
13 (let ((section (intern (cadar (cdar operands))
14 (find-package "M68K-ASSEMBLER"))))
15 (assert (assoc section *object-streams*))
16 (setf *current-section* section))))
17 ("XDEF" #'define-global)
18 ("GLOBAL" #'define-global)
19 ("XREF" #'define-extern)
20 ("EXTERN" #'define-extern)
21 ("ORG" ,(lambda (label op operands modifier)
22 (declare (ignore label op modifier))
23 (assert (eql (car (first operands)) 'absolute))
24 (setf *program-counter* (absolute-value (first operands)))))
26 ("INCLUDE"
27 ,(lambda (label op operands modifier)
28 (declare (ignore op modifier))
29 (handle-label-normally label)
30 (nested-lexing (extract-string-from-tree (first operands)))))
31 ("INCBIN"
32 ,(lambda (label op operands modifier)
33 (declare (ignore op modifier))
34 (handle-label-normally label)
35 (with-open-file (stream (extract-string-from-tree
36 (first operands))
37 :direction :input
38 :element-type 'unsigned-byte)
39 (copy-stream-contents stream (current-obj-stream))
40 (incf *program-counter* (file-position stream)))))
42 ("ALIGN" ,(lambda (label op operands modifier)
43 (declare (ignore op modifier))
44 (handle-label-normally label)
45 (let ((align (absolute-value (first operands))))
46 (do ()
47 ((zerop (mod *program-counter* align)))
48 (output-data 0 8)))))
49 ("EVEN" ,(lambda (label op operands modifier)
50 (declare (ignore op modifier operands))
51 (handle-label-normally label)
52 (unless (evenp *program-counter*)
53 (output-data 0 8))))
54 ;; offset,align -- offset+(PC+align-1)&~(align-1)
55 ;; XXX untested.
56 ("CNOP" ,(lambda (label op operands modifier)
57 (declare (ignore op modifier))
58 (handle-label-normally label)
59 (let ((offset (absolute-value (first operands)))
60 (align (absolute-value (second operands))))
61 (do ()
62 ((zerop (mod (- *program-counter* offset) align)))
63 (output-data 0 8)))))
65 ("MACRO"
66 ,(lambda (label op operands modifier)
67 (declare (ignore op operands modifier))
68 ;; if *defining-macro-p* is already set, cry foul.
69 (assert (not *defining-macro-p*))
70 (assert label)
71 ;; otherwise, clear out *macro-buffer*.
72 (setf *macro-buffer* (list (second label))
73 *defining-macro-p* t)))
74 ("ENDM"
75 ,(lambda (label op operands modifier)
76 (declare (ignore label op operands modifier))
77 (assert *defining-macro-p*)
78 (setf *macro-buffer* (nreverse *macro-buffer*)
79 *defining-macro-p* nil)
80 (add-to-symbol-table (first *macro-buffer*)
81 (make-asm-macro :body (cdr *macro-buffer*))
82 :type 'macro)))
84 ("REPT"
85 ,(lambda (label op operands modifier)
86 (declare (ignore label op modifier))
87 (assert (not *defining-rept-p*))
88 (setf *macro-buffer* (list (absolute-value (first operands)))
89 *defining-rept-p* t)))
90 ("ENDR"
91 ,(lambda (label op operands modifier)
92 (declare (ignore label op operands modifier))
93 (assert *defining-rept-p*)
94 (setf *macro-buffer* (nreverse *macro-buffer*)
95 *defining-rept-p* nil)
96 (dotimes (i (pop *macro-buffer*))
97 (dolist (x *macro-buffer*)
98 (process-line x)))))
100 ("DC"
101 ,(lambda (label op operands modifier)
102 (declare (ignore op))
103 (handle-label-normally label)
104 (unless modifier (setf modifier 'word))
105 (dolist (x operands)
106 (let ((data (absolute-value x modifier))
107 (length (ecase modifier (byte 8) (word 16) (long 32))))
108 (when (consp data)
109 (push (make-backpatch-item
110 `((,length (absolute-value ,data ,modifier)))
111 length)
112 *backpatch-list*)
113 (setf data #x4E714E71))
114 (output-data data length)))))
115 ("DS"
116 ,(lambda (label op operands modifier)
117 (declare (ignore op))
118 (handle-label-normally label)
119 (unless modifier (setf modifier 'word))
120 (assert (eql (car (first operands)) 'absolute))
121 (let ((data (absolute-value (first operands) modifier))
122 (length (ecase modifier (byte 8) (word 16) (long 32))))
123 (unless (integerp data)
124 (error "~A: Need to be able to resolve DS immediately."
125 *source-position*))
126 (output-data 0 (* data length)))))
127 ("DCB") ; constant block -- number,value
129 ("EQU" #'define-equate)
130 ("=" #'define-equate)
131 ;; EQUR (register equate)
132 ;; IFEQ etc etc
134 ("END"))))
136 (defun pseudo-op-p (string)
137 (or (find string *asm-pseudo-op-table* :key #'car :test #'string-equal)
138 (eql (get-symbol-type string) 'macro)))
141 ;;;; MACROS
143 ;; Devpac macros -- when we see a macro start, collect until the ENDM,
144 ;; so we can expand ourselves.
146 (eval-when (:compile-toplevel :load-toplevel)
147 (defvar *defining-macro-p* nil)
148 (defvar *defining-rept-p* nil)
149 (defvar *macro-buffer*))
151 (defstruct asm-macro
152 (count 0)
153 (body))
155 (defun define-equate (label op operands modifier)
156 (declare (ignore op modifier))
157 (unless label
158 (error "~A: EQU always needs a label." *source-position*))
159 ;; XXX should probably check operands length etc
160 (add-to-symbol-table label (resolve-expression (first operands))
161 :type 'absolute))
163 (defun execute-macro (name operands modifier)
164 (labels ((sub (match &rest registers)
165 (declare (ignore registers))
166 (acase (char match 1)
167 (#\@ (format nil "_~D"
168 (asm-macro-count (get-symbol-value name))))
169 (#\0 (case modifier
170 (byte ".B")
171 (long ".L")
172 ((word t) ".W")))
173 (t (nth (digit-to-int it 36) operands))))
174 (seek+destroy (tree)
175 (cond ((stringp tree)
176 (cl-ppcre:regex-replace-all "\\\\[0-9A-Za-z@]"
177 tree #'sub
178 :simple-calls t))
179 ((consp tree)
180 (let ((new-tree nil))
181 (dolist (branch tree)
182 (push (seek+destroy branch) new-tree))
183 (nreverse new-tree)))
184 (t tree))))
185 (dolist (x (asm-macro-body (get-symbol-value name)))
186 (process-line (seek+destroy x)))
187 (incf (asm-macro-count (get-symbol-value name)))))
190 ;;;; SYMBOL TABLE
192 (eval-when (:compile-toplevel :load-toplevel)
193 (defvar *symbol-table* nil))
195 (defstruct asm-symbol
196 (name)
197 (type)
198 (value)
199 (debug-info)
200 (global-p nil))
202 (defun local-label-name-p (name)
203 (and (plusp (length name)) (eql (char name 0) #\.)))
205 (defun maybe-local-label (name)
206 (if (local-label-name-p name)
207 (concatenate 'string *last-label* name)
208 name))
210 (defun add-to-symbol-table (sym value &key (type *current-section*)
211 (global-p nil))
212 (let ((name (extract-sym-name sym)))
213 (setf (gethash name *symbol-table*)
214 (make-asm-symbol :name name
215 :type type
216 :value value
217 :debug-info *source-position*
218 :global-p global-p))))
220 (defun get-symbol-value (sym)
221 (awhen (get-asm-symbol sym) (asm-symbol-value it)))
223 (defun (setf get-symbol-value) (value sym)
224 (setf (asm-symbol-value (get-asm-symbol sym)) value))
226 (defun get-symbol-type (sym)
227 (awhen (get-asm-symbol sym) (asm-symbol-type it)))
229 (defun get-asm-symbol (sym)
230 (setf sym (extract-sym-name sym))
231 (gethash sym *symbol-table*))
233 (defun extract-sym-name (sym)
234 (maybe-local-label (cond ((atom sym) sym)
235 ((or (eql (car sym) 'absolute)
236 (eql (car sym) 'label))
237 (second (second sym)))
238 (t (second sym)))))
240 (defun define-extern (label op operands modifier)
241 (declare (ignore op modifier))
242 ;; add whatever to the symbol table, as an extern
243 (handle-label-normally label)
244 (let ((name (extract-string-from-tree (first operands))))
245 (add-to-symbol-table name nil :type 'extern :global-p t)))
247 (defun define-global (label op operands modifier)
248 (declare (ignore op modifier))
249 ;; add whatever to the symbol table, or just tweak
250 ;; its type if necessary.
251 (handle-label-normally label)
252 (let ((name (extract-string-from-tree (first operands))))
253 (aif (get-asm-symbol name)
254 (setf (asm-symbol-global-p it) t) ; XXX hope this works.
255 (add-to-symbol-table name nil :global-p t))))
257 ;;;; BACKPATCHING
259 (defvar *backpatch-list* nil)
261 ;; backpatch structure
262 (defstruct backpatch
263 (template)
264 (length)
265 (program-counter)
266 (section)
267 (file-position)
268 (last-label)
269 (source-position))
271 (defun make-backpatch-item (data length)
272 (make-backpatch :template data :length length
273 :program-counter *program-counter*
274 :section *current-section*
275 :file-position (file-position (current-obj-stream))
276 :last-label *last-label*
277 :source-position *source-position*))
279 (defmacro with-backpatch ((item) &body body)
280 `(let ((*program-counter* (backpatch-program-counter ,item))
281 (*current-section* (backpatch-section ,item))
282 (*last-label* (backpatch-last-label ,item))
283 (*source-position* (backpatch-source-position ,item)))
284 ,@body))
286 (defun backpatch ()
287 ;; go through backpatch list, try to make all patches
288 (dolist (x *backpatch-list*)
289 (with-backpatch (x)
290 (let ((*defining-relocations-p* t))
291 (multiple-value-bind (data len)
292 (generate-code (backpatch-template x) nil nil)
293 (when (consp data)
294 (error "~A: Failed to backpatch @ ~A: ~S."
295 *source-position* *program-counter* data))
296 (assert (= len (backpatch-length x)))
297 (assert (file-position (current-obj-stream)
298 (backpatch-file-position x)))
299 (output-data data len))))))
302 ;;;; RELOCATION
304 ;; relocation info
305 (defstruct relocation
306 (address)
307 (size nil)
308 (extern-p nil)
309 (pc-relative-p nil)
310 ;; only one of symbol or segment can be selected at a time, so the
311 ;; symbol field provides both functions.
312 (symbol))
314 (defun relocation-segment (r) (relocation-symbol r))
315 (defun (setf relocation-segment) (v r) (setf (relocation-segment r) v))
317 ;; Indexed by PC.
318 (defvar *relocation-table*)
319 (defvar *defining-relocations-p* nil)
322 (defun figure-out-reloc (symbol pc)
323 (let* ((sym (get-asm-symbol symbol))
324 (extern-p (eq (asm-symbol-type sym) 'extern)))
325 (make-relocation :address pc :extern-p extern-p
326 :symbol (if extern-p symbol (asm-symbol-type sym)))))
328 (defun check-reloc-consistency (reloc-a reloc-b)
329 (assert (every (lambda (fn)
330 (eql (funcall fn reloc-a)
331 (funcall fn reloc-b)))
332 (list #'relocation-address
333 ;; XXX relocation-size
334 #'relocation-symbol
335 #'relocation-extern-p))))
337 (defun add-relocation (symbol)
338 (when *defining-relocations-p*
339 ;; figure out what kind of relocation this is
340 (let ((reloc (figure-out-reloc symbol *program-counter*)))
341 (sif (gethash *program-counter* *relocation-table*)
342 (check-reloc-consistency reloc it)
343 (setf it reloc)))))
345 (defun pc-relativise-relocation ()
346 ;; if there's a reloc at this PC, make it pc-relative.
347 ;; XXX if the symbol is not extern, then delete from relocation
348 ;; table (no need to relocate relative references within the same
349 ;; file).
350 (swhen (gethash *program-counter* *relocation-table*)
351 (setf (relocation-pc-relative-p it) t)))
353 (defun fix-relocation-size (size)
354 (swhen (gethash *program-counter* *relocation-table*)
355 (setf (relocation-size it) size)))
358 ;;;; HELPER FUNCTIONS
360 (defun extract-string-from-tree (tree)
361 "Return the first string we come to in TREE."
362 (dolist (x tree)
363 (cond ((stringp x) (return x))
364 ((consp x)
365 (awhen (extract-string-from-tree x)
366 (return it))))))
368 (defun resolve-expression (expression)
369 "Try and get a numerical value from this expression. Returns the
370 further simplified expression, which will be an atom if everything was
371 resolved."
372 (flet ((bin-op (sym fn)
373 (assert (= (length expression) 3))
374 (let ((a (resolve-expression (second expression)))
375 (b (resolve-expression (third expression))))
376 (if (and (integerp a) (integerp b))
377 (funcall fn a b)
378 (list sym a b)))))
379 (if (atom expression)
380 (cond ((integerp expression) expression)
381 ((stringp expression) (string->int expression))
382 (t (error "Unknown expression atom: ~A" expression)))
383 (case (car expression)
384 (+ (bin-op '+ #'+))
385 (- (bin-op '- #'-))
386 (* (bin-op '* #'*))
387 (/ (bin-op '/ #'/))
388 (& (bin-op '& #'logand))
389 (or (bin-op 'or #'logior))
390 (^ (bin-op '^ #'logxor))
391 (<< (bin-op '<< #'ash))
392 (>> (bin-op '>> #'(lambda (n count) (ash n (- count)))))
393 (~ (aif (resolve-expression (second expression))
394 (lognot it)
395 expression))
396 (symbol (aif (get-symbol-value expression)
397 (progn
398 (add-relocation expression)
400 expression))
401 (t expression)))))
404 ;;;; "MAIN" STUFF
406 (defvar *source-position*)
407 (defvar *object-streams*)
408 (defvar *program-counter*)
409 (defvar *last-label* nil "Last label seen by the assembler. This is
410 used for generating the prefix for local labels.")
411 (defvar *current-section* 'text "Current section we're assembling
412 in.")
414 (defmacro with-object-streams (sections &body body)
415 (if sections
416 (let ((symbol-of-the-day (gensym)))
417 `(osicat:with-temporary-file (,symbol-of-the-day
418 :element-type 'unsigned-byte)
419 (push (cons ,(car sections) ,symbol-of-the-day) *object-streams*)
420 (with-object-streams ,(cdr sections) ,@body)))
421 `(progn ,@body)))
424 (defun current-obj-stream ()
425 (cdr (assoc *current-section* *object-streams*)))
427 (defun assemble (input-name &key (object-name "mr-ed.o"))
428 ;; create symbol table
429 (setf *symbol-table* (make-hash-table :test 'equal)
430 ;; init program counter
431 *program-counter* 0
432 ;; create backpatch list
433 *backpatch-list* nil
434 ;; init macro variables
435 *defining-macro-p* nil *defining-rept-p* nil
436 ;; last label seen
437 *last-label* nil
438 *current-section* 'text
439 *object-streams* nil
440 *relocation-table* (make-hash-table))
441 ;; open file and start processing
442 (with-lexer (input-name)
443 ;; Note that we create a file for BSS even though it should all be
444 ;; zeros just for my utterly lazy convenience. It's wasteful, but
445 ;; I don't feel like putting in all kinds of hideous special cases
446 ;; just right now.
447 (with-object-streams ('text 'data 'bss)
448 (process-current-file)
449 ;; Prior to backpatching, record section lengths, because
450 ;; temporary-files aren't really file streams the way CL would
451 ;; like them to be, so we can't just FILE-LENGTH them.
452 (let ((lengths (mapcar (lambda (x)
453 (cons (car x) (file-position (cdr x))))
454 *object-streams*)))
455 (backpatch)
456 (finalize-object-file object-name lengths)))))
458 (defun process-current-file ()
459 (handler-case
460 (loop
461 ;; we strip individual token position information here, which
462 ;; might suck for superfine debugging/warning precision, but
463 ;; we really don't need all that clutter.
464 (multiple-value-bind (line *source-position*)
465 (unclutter-line (parse #'next-token))
466 (assert (eql (pop line) 'line))
468 ;; if we're in a macro or repeat, accumulate this line.
469 (cond (*defining-macro-p*
470 (if (and (eql (operation-type-of-line line) 'pseudo-op)
471 (string-equal (opcode-of-line line) "ENDM"))
472 (process-line line)
473 (push line *macro-buffer*)))
474 (*defining-rept-p*
475 (if (and (eql (operation-type-of-line line) 'pseudo-op)
476 (string-equal (opcode-of-line line) "ENDR"))
477 (process-line line)
478 (push line *macro-buffer*)))
479 (t (process-line line))))) ; otherwise, process it.
480 (end-of-file nil)))
482 (defun operation-type-of-line (line)
483 (awhen (cadr (find 'operation line :key #'car))
484 (car it)))
486 (defun opcode-of-line (line)
487 (awhen (cadr (find 'operation line :key #'car))
488 (caadr it)))
490 (defun process-line (line)
491 (let ((label (find 'label line :key #'car))
492 (operation (cadr (find 'operation line :key #'car)))
493 (operands (mapcar #'simplify-operand
494 (extract-operands line))))
495 (cond ((eql (operation-type-of-line line) 'opcode)
496 (assemble-opcode label operation operands))
497 ((eql (operation-type-of-line line) 'pseudo-op)
498 (assemble-pseudo-op label operation operands))
500 (when label
501 ;; might be a macro or something... see if it
502 ;; exists, first. and complain about
503 ;; redefinition, except in the case of locals.
504 (if (get-symbol-type label)
505 (warn "~&~S: tried to redefine ~A (cowardly not allowing this)." *source-position* label)
506 (handle-label-normally label)))))))
508 (defun handle-label-normally (label)
509 (when label
510 (let ((name (extract-string-from-tree label)))
511 (when (not (local-label-name-p name))
512 (setf *last-label* name))
513 ;; If the type is nil, the symbol isn't present.
514 (cond ((null (get-symbol-type name))
515 (add-to-symbol-table name *program-counter*))
516 ;; If the value is nil, it was probably declared as a global
517 ;; before it was actually defined.
518 ((null (get-symbol-value name))
519 (assert (not (eq (get-symbol-type name) 'extern))
520 (name) "Trying to set a value for an EXTERN symbol!")
521 (setf (get-symbol-value name) *program-counter*))
522 ;; Otherwise, it's a redefinition and I won't stand for it.
523 (t (warn "~A: tried to redefine label ~A"
524 *source-position* name))))))
526 (defun assemble-opcode (label operation operands)
527 (let ((opcode (caadr operation))
528 (modifier (cadadr operation)))
529 (handle-label-normally label)
530 (awhen (find-matching-entry opcode operands modifier)
531 (let ((*defining-relocations-p* t))
532 (multiple-value-bind (data len)
533 (generate-code (second it) operands modifier)
534 (when (consp data)
535 (push (make-backpatch-item data len) *backpatch-list*)
536 (setf data #x4E714E71)) ; Output NOP if all else fails.
537 (output-data data len))))))
540 (defun assemble-pseudo-op (label operation operands)
541 (let ((opcode (caadr operation))
542 (modifier (cadadr operation)))
543 (acond ((find opcode *asm-pseudo-op-table*
544 :key #'car :test #'string-equal)
545 (when (functionp (second it))
546 (funcall (second it) label opcode operands modifier)))
547 ((eql (get-symbol-type opcode) 'macro)
548 (execute-macro opcode operands modifier))
549 (t (error "~&~S: bad pseudo-op ~A!" *source-position* opcode)))))
552 (defun output-data (data length)
553 "Outputs DATA in big-endian order to the currently active temporary
554 object file and updates the program counter. Returns the address to
555 which the data assembled."
556 #+nil (unless (zerop (mod length 8))
557 (setf length (ash (ceiling (/ length 8)) 3)))
558 (incf *program-counter* (ash length -3))
559 (write-big-endian-data (current-obj-stream) data length))
562 ;;;; EOF assembler.lisp