A few x86-64 disassembler cleanups.
[sbcl.git] / src / compiler / x86-64 / target-insts.lisp
blob9b177898b653a84ba77490148547181b408451f7
1 ;;;; target-only stuff from CMU CL's src/compiler/x86/insts.lisp
2 ;;;;
3 ;;;; i.e. stuff which was in CMU CL's insts.lisp file, but which in
4 ;;;; the SBCL build process can't be compiled into code for the
5 ;;;; cross-compilation host
7 ;;;; This software is part of the SBCL system. See the README file for
8 ;;;; more information.
9 ;;;;
10 ;;;; This software is derived from the CMU CL system, which was
11 ;;;; written at Carnegie Mellon University and released into the
12 ;;;; public domain. The software is in the public domain and is
13 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
14 ;;;; files for more information.
16 (in-package "SB!X86-64-ASM")
18 (defstruct (machine-ea (:include sb!disassem::filtered-arg)
19 (:copier nil)
20 (:predicate nil)
21 (:constructor %make-machine-ea))
22 base disp index scale)
24 ;;; Print to STREAM the name of the general-purpose register encoded by
25 ;;; VALUE and of size WIDTH. For robustness, the high byte registers
26 ;;; (AH, BH, CH, DH) are correctly detected, too, although the compiler
27 ;;; does not use them.
28 (defun print-reg-with-width (value width stream dstate)
29 (declare (type full-reg value)
30 (type stream stream)
31 (type disassem-state dstate))
32 (princ (if (and (eq width :byte)
33 (<= 4 value 7)
34 (not (dstate-get-inst-prop dstate +rex+)))
35 (aref *high-byte-reg-names* (- value 4))
36 (aref (ecase width
37 (:byte *byte-reg-names*)
38 (:word *word-reg-names*)
39 (:dword *dword-reg-names*)
40 (:qword *qword-reg-names*))
41 value))
42 stream)
43 ;; XXX plus should do some source-var notes
46 (defun print-reg (value stream dstate)
47 (print-reg-with-width value
48 (inst-operand-size dstate)
49 stream
50 dstate))
52 (defun print-reg-default-qword (value stream dstate)
53 (print-reg-with-width value
54 (inst-operand-size-default-qword dstate)
55 stream
56 dstate))
58 ;; Print a reg that can only be a :DWORD or :QWORD.
59 ;; Avoid use of INST-OPERAND-SIZE because it's wrong for this type of operand.
60 (defun print-d/q-word-reg (value stream dstate)
61 (print-reg-with-width value
62 (if (dstate-get-inst-prop dstate +rex-w+) :qword :dword)
63 stream
64 dstate))
66 (defun print-byte-reg (value stream dstate)
67 (print-reg-with-width value :byte stream dstate))
69 (defun print-addr-reg (value stream dstate)
70 (print-reg-with-width value +default-address-size+ stream dstate))
72 ;;; Print a register or a memory reference of the given WIDTH.
73 ;;; If SIZED-P is true, add an explicit size indicator for memory
74 ;;; references.
75 (defun print-reg/mem-with-width (value width sized-p stream dstate)
76 (declare (type (or machine-ea full-reg) value)
77 (type (member :byte :word :dword :qword) width)
78 (type boolean sized-p))
79 (if (typep value 'full-reg)
80 (print-reg-with-width value width stream dstate)
81 (print-mem-ref (if sized-p :sized-ref :ref) value width stream dstate)))
83 ;;; Print a register or a memory reference. The width is determined by
84 ;;; calling INST-OPERAND-SIZE.
85 (defun print-reg/mem (value stream dstate)
86 (print-reg/mem-with-width
87 value (inst-operand-size dstate) nil stream dstate))
89 ;; Same as print-reg/mem, but prints an explicit size indicator for
90 ;; memory references.
91 (defun print-sized-reg/mem (value stream dstate)
92 (print-reg/mem-with-width
93 value (inst-operand-size dstate) t stream dstate))
95 ;;; Same as print-sized-reg/mem, but with a default operand size of
96 ;;; :qword.
97 (defun print-sized-reg/mem-default-qword (value stream dstate)
98 (print-reg/mem-with-width
99 value (inst-operand-size-default-qword dstate) t stream dstate))
101 (defun print-sized-byte-reg/mem (value stream dstate)
102 (print-reg/mem-with-width value :byte t stream dstate))
104 (defun print-sized-word-reg/mem (value stream dstate)
105 (print-reg/mem-with-width value :word t stream dstate))
107 (defun print-sized-dword-reg/mem (value stream dstate)
108 (print-reg/mem-with-width value :dword t stream dstate))
110 (defun print-label (value stream dstate)
111 (declare (ignore dstate))
112 (princ16 value stream))
114 (defun print-xmmreg (value stream dstate)
115 (declare (type xmmreg value) (type stream stream) (ignore dstate))
116 (format stream "XMM~d" value))
118 (defun print-xmmreg/mem (value stream dstate)
119 (if (typep value 'xmmreg)
120 (print-xmmreg value stream dstate)
121 (print-mem-ref :ref value nil stream dstate)))
123 (defun print-imm/asm-routine (value stream dstate)
124 (if (or #!+immobile-space (maybe-note-lisp-callee value dstate)
125 (maybe-note-assembler-routine value nil dstate)
126 (maybe-note-static-symbol value dstate))
127 (write value :stream stream :base 16 :radix t)
128 (princ value stream)))
130 ;;; Return either a MACHINE-EA or a register (a fixnum).
131 ;;; VALUE is a list of the mod and r/m fields of the instruction's ModRM byte.
132 ;;; Depending on VALUE, a SIB byte and/or displacement may be read.
133 ;;; The REX.B and REX.X from dstate are appropriately consumed.
134 (defun prefilter-reg/mem (dstate mod r/m)
135 (declare (type disassem-state dstate)
136 (type (unsigned-byte 2) mod)
137 (type (unsigned-byte 3) r/m))
138 (flet ((make-machine-ea (base &optional disp index scale)
139 (let ((ea (the machine-ea
140 (sb!disassem::new-filtered-arg dstate #'%make-machine-ea))))
141 (setf (machine-ea-base ea) base
142 (machine-ea-disp ea) disp
143 (machine-ea-index ea) index
144 (machine-ea-scale ea) scale)
145 ea))
146 (displacement ()
147 (case mod
148 (#b01 (read-signed-suffix 8 dstate))
149 (#b10 (read-signed-suffix 32 dstate))))
150 (extend (bit-name reg)
151 (logior (if (dstate-get-inst-prop dstate bit-name) 8 0)
152 reg)))
153 (declare (inline extend))
154 (let ((full-reg (extend +rex-b+ r/m)))
155 (cond ((= mod #b11) full-reg) ; register direct mode
156 ((= r/m #b100) ; SIB byte - rex.b is "don't care"
157 (let* ((sib (the (unsigned-byte 8) (read-suffix 8 dstate)))
158 (index-reg (extend +rex-x+ (ldb (byte 3 3) sib)))
159 (base-reg (ldb (byte 3 0) sib)))
160 ;; mod=0 and base=RBP means no base reg
161 (make-machine-ea (unless (and (= mod #b00) (= base-reg #b101))
162 (extend +rex-b+ base-reg))
163 (cond ((/= mod #b00) (displacement))
164 ((= base-reg #b101) (read-signed-suffix 32 dstate)))
165 (unless (= index-reg #b100) index-reg) ; index can't be RSP
166 (ash 1 (ldb (byte 2 6) sib)))))
167 ((/= mod #b00) (make-machine-ea full-reg (displacement)))
168 ;; rex.b is not decoded in determining RIP-relative mode
169 ((= r/m #b101) (make-machine-ea :rip (read-signed-suffix 32 dstate)))
170 (t (make-machine-ea full-reg))))))
172 ;;; Prints a memory reference to STREAM. VALUE is a list of
173 ;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component may be
174 ;;; missing or nil to indicate that it's not used or has the obvious
175 ;;; default value (e.g., 1 for the index-scale). BASE-REG can be the
176 ;;; symbol RIP or a full register, INDEX-REG a full register. If WIDTH
177 ;;; is non-nil it should be one of the symbols :BYTE, :WORD, :DWORD or
178 ;;; :QWORD; a corresponding size indicator is printed if MODE is :SIZED-REF.
179 ;;; The rationale for supplying WIDTH while eliding a pointer-size qualifier
180 ;;; is that proper dereferencing of RIP-relative constants requires a size,
181 ;;; but in other cases would only add clutter, since a source/destination
182 ;;; register implies a size.
184 (defun print-mem-ref (mode value width stream dstate)
185 ;; :COMPUTE is used for the LEA instruction - it informs this function
186 ;; that we're not loading from the address and that the contents should not
187 ;; be printed. It'll usually be a reference to code within the disasembly
188 ;; segment, as LEA is employed to compute the entry point for local call.
189 (declare (type (member :ref :sized-ref :compute) mode)
190 (type machine-ea value)
191 (type (member nil :byte :word :dword :qword) width)
192 (type stream stream)
193 (type disassem-state dstate))
194 (let ((base-reg (machine-ea-base value))
195 (disp (machine-ea-disp value))
196 (index-reg (machine-ea-index value))
197 (firstp t))
198 (when (and width (eq mode :sized-ref))
199 (princ width stream)
200 (princ '| PTR | stream))
201 (write-char #\[ stream)
202 (when base-reg
203 (if (eql :rip base-reg)
204 (princ base-reg stream)
205 (print-addr-reg base-reg stream dstate))
206 (setq firstp nil))
207 (when index-reg
208 (unless firstp (write-char #\+ stream))
209 (print-addr-reg index-reg stream dstate)
210 (let ((scale (machine-ea-scale value)))
211 (unless (= scale 1)
212 (write-char #\* stream)
213 (princ scale stream)))
214 (setq firstp nil))
215 (when (and disp (or firstp (not (zerop disp))))
216 (unless (or firstp (minusp disp))
217 (write-char #\+ stream))
218 (cond ((eq (machine-ea-base value) :rip)
219 (princ disp stream)
220 (unless (eq mode :compute)
221 (let ((addr (+ disp (dstate-next-addr dstate))))
222 ;; The origin is zero when disassembling into a trace-file.
223 ;; Don't crash on account of it.
224 (when (plusp addr)
225 (or (nth-value
226 1 (note-code-constant-absolute addr dstate width))
227 (maybe-note-assembler-routine addr nil dstate)
228 ;; Show the absolute address and maybe the contents.
229 (note (format nil "[#x~x]~@[ = ~x~]"
230 addr
231 (case width
232 (:qword
233 (unboxed-constant-ref
234 dstate
235 (+ (dstate-next-offs dstate) disp)))))
236 dstate))))))
237 (firstp
238 (princ16 disp stream)
239 (or (minusp disp)
240 (nth-value 1 (note-code-constant-absolute disp dstate))
241 (maybe-note-assembler-routine disp nil dstate)
242 ;; Static symbols coming frorm CELL-REF
243 (maybe-note-static-symbol (+ disp (- other-pointer-lowtag
244 n-word-bytes))
245 dstate)))
247 (princ disp stream))))
248 (write-char #\] stream)
249 #!+sb-thread
250 (when (and (eql base-reg #.(ash (tn-offset sb!vm::thread-base-tn) -1))
251 (not index-reg) ; no index
252 (typep disp '(integer 0 *)) ; positive displacement
253 (seg-code (dstate-segment dstate)))
254 ;; Try to reverse-engineer which thread-local binding this is
255 (let* ((code (seg-code (dstate-segment dstate)))
256 (header-n-words (code-header-words code))
257 (tls-index (ash disp (- n-fixnum-tag-bits))))
258 (loop for word-num from code-constants-offset below header-n-words
259 for obj = (code-header-ref code word-num)
260 when (and (symbolp obj) (= (symbol-tls-index obj) tls-index))
261 do (return-from print-mem-ref
262 (note (lambda (stream) (format stream "tls: ~S" obj))
263 dstate))))
264 ;; Or maybe we're looking at the 'struct thread' itself
265 (when (< disp max-interrupts)
266 (let* ((thread-slots
267 (load-time-value
268 (primitive-object-slots
269 (find 'sb!vm::thread *primitive-objects*
270 :key #'primitive-object-name)) t))
271 (slot (find (ash disp (- word-shift)) thread-slots
272 :key #'slot-offset)))
273 (when slot
274 (return-from print-mem-ref
275 (note (lambda (stream)
276 (format stream "thread.~(~A~)" (slot-name slot)))
277 dstate))))))))
279 (defun lea-compute-label (value dstate)
280 ;; If VALUE should be regarded as a label, return the address.
281 ;; If not, just return VALUE.
282 (if (and (typep value 'machine-ea) (eq (machine-ea-base value) :rip))
283 (+ (dstate-next-addr dstate) (machine-ea-disp value))
284 value))
286 ;; Figure out whether LEA should print its EA with just the stuff in brackets,
287 ;; or additionally show the EA as either a label or a hex literal.
288 (defun lea-print-ea (value stream dstate)
289 (let ((width (inst-operand-size dstate))
290 (addr nil)
291 (fmt "= #x~x"))
292 (etypecase value
293 (machine-ea
294 ;; Indicate to PRINT-MEM-REF that this is not a memory access.
295 (print-mem-ref :compute value width stream dstate)
296 (when (eq (machine-ea-base value) :rip)
297 (setq addr (+ (dstate-next-addr dstate) (machine-ea-disp value)))))
299 ;; We're robust in allowing VALUE to be an integer (a register),
300 ;; though LEA Rx,Ry is an illegal instruction.
301 ;; Test this before INTEGER since the types overlap.
302 (full-reg
303 (print-reg-with-width value width stream dstate))
305 ((or string integer)
306 ;; A label for the EA should not print as itself, but as the decomposed
307 ;; addressing mode so that [ADDR] and [RIP+disp] are unmistakable.
308 ;; We can see an INTEGER here because LEA-COMPUTE-LABEL is always called
309 ;; on the operand to LEA, and it will compute an absolute address based
310 ;; off RIP when possible. If :use-labels NIL was specified, there is
311 ;; no hashtable of address to string, so we get the address.
312 ;; But ordinarily we get the string. Either way, the r/m arg reveals the
313 ;; EA calculation. DCHUNK-ZERO is a meaningless value - any would do -
314 ;; because the EA was computed in a prefilter.
315 (print-mem-ref :compute (reg-r/m-inst-r/m-arg dchunk-zero dstate)
316 width stream dstate)
317 (setq addr value)
318 (when (stringp value) (setq fmt "= ~A"))))
319 (when addr
320 (note (lambda (s) (format s fmt addr)) dstate))))
322 (defun unboxed-constant-ref (dstate segment-offset)
323 (let* ((seg (dstate-segment dstate))
324 (code-offset
325 (sb!disassem::segment-offs-to-code-offs segment-offset seg))
326 (unboxed-range (sb!disassem::seg-unboxed-data-range seg)))
327 (and unboxed-range
328 (<= (car unboxed-range) code-offset (cdr unboxed-range))
329 (sap-ref-int (dstate-segment-sap dstate)
330 segment-offset n-word-bytes
331 (dstate-byte-order dstate)))))
333 ;;;; interrupt instructions
335 (defun break-control (chunk inst stream dstate)
336 (declare (ignore inst))
337 (flet ((nt (x) (if stream (note x dstate))))
338 (case #!-ud2-breakpoints (byte-imm-code chunk dstate)
339 #!+ud2-breakpoints (word-imm-code chunk dstate)
340 (#.error-trap
341 (nt "error trap")
342 (handle-break-args #'snarf-error-junk stream dstate))
343 (#.cerror-trap
344 (nt "cerror trap")
345 (handle-break-args #'snarf-error-junk stream dstate))
346 (#.breakpoint-trap
347 (nt "breakpoint trap"))
348 (#.pending-interrupt-trap
349 (nt "pending interrupt trap"))
350 (#.halt-trap
351 (nt "halt trap"))
352 (#.fun-end-breakpoint-trap
353 (nt "function end breakpoint trap"))
354 (#.single-step-around-trap
355 (nt "single-step trap (around)"))
356 (#.single-step-before-trap
357 (nt "single-step trap (before)"))
358 (#.invalid-arg-count-trap
359 (nt "Invalid argument count trap")))))
361 ;;;;
363 #!+immobile-code
364 (defun sb!vm::collect-immobile-code-relocs ()
365 (let ((code-components
366 (make-array 20000 :element-type '(unsigned-byte 32)
367 :fill-pointer 0 :adjustable t))
368 (relocs
369 (make-array 100000 :element-type '(unsigned-byte 32)
370 :fill-pointer 0 :adjustable t))
371 ;; Look for these two instruction formats.
372 (jmp-inst (find-inst #b11101001 (get-inst-space)))
373 (call-inst (find-inst #b11101000 (get-inst-space)))
374 (seg (sb!disassem::%make-segment
375 :sap-maker #'error :virtual-location 0))
376 (dstate (make-dstate)))
377 (flet ((scan-function (fun-entry-addr fun-end-addr predicate)
378 (setf (seg-virtual-location seg) fun-entry-addr
379 (seg-length seg) (- fun-end-addr fun-entry-addr)
380 (seg-sap-maker seg)
381 (let ((sap (int-sap fun-entry-addr))) (lambda () sap)))
382 (map-segment-instructions
383 (lambda (dchunk inst)
384 (when (and (or (eq inst jmp-inst)
385 (eq inst call-inst))
386 (funcall predicate
387 (+ (near-jump-displacement dchunk dstate)
388 (dstate-next-addr dstate))))
389 (vector-push-extend (dstate-cur-addr dstate) relocs)))
390 seg dstate nil))
391 (finish-component (code start-relocs-index)
392 (when (> (fill-pointer relocs) start-relocs-index)
393 (vector-push-extend (get-lisp-obj-address code) code-components)
394 (vector-push-extend start-relocs-index code-components))))
396 ;; Assembler routines are in read-only space, and they can have
397 ;; relative jumps to immobile space.
398 ;; Since these code components do not contain simple-funs,
399 ;; we have to group the routines by looking at addresses.
400 (let ((asm-routines
401 (mapcar #'cdr (%hash-table-alist sb!fasl:*assembler-routines*)))
402 code-components)
403 (sb!vm::map-allocated-objects (lambda (obj type size)
404 (declare (ignore size))
405 (when (= type sb!vm:code-header-widetag)
406 (push obj code-components)))
407 :read-only)
408 (dolist (code (nreverse code-components))
409 (let* ((text-origin (sap-int (code-instructions code)))
410 (text-end (+ text-origin (%code-code-size code)))
411 (relocs-index (fill-pointer relocs)))
412 (mapl (lambda (list)
413 (scan-function (car list)
414 (if (cdr list) (cadr list) text-end)
415 ;; Look for transfers into immobile code
416 (lambda (jmp-targ-addr)
417 (<= sb!vm:immobile-space-start
418 jmp-targ-addr sb!vm:immobile-space-end))))
419 (sort (remove-if-not (lambda (address)
420 (<= text-origin address text-end))
421 asm-routines) #'<))
422 (finish-component code relocs-index))))
424 ;; Immobile space - code components can jump to immobile space,
425 ;; read-only space, and C runtime routines.
426 (sb!vm::map-allocated-objects
427 (lambda (code type size)
428 (declare (ignore size))
429 (when (= type code-header-widetag)
430 (let* ((text-origin (sap-int (code-instructions code)))
431 (text-end (+ text-origin (%code-code-size code)))
432 (relocs-index (fill-pointer relocs)))
433 (dotimes (i (code-n-entries code) (finish-component code relocs-index))
434 (let ((fun (%code-entry-point code i)))
435 (scan-function
436 (+ (get-lisp-obj-address fun) (- fun-pointer-lowtag)
437 (ash simple-fun-code-offset word-shift))
438 (if (< (1+ i) (code-n-entries code))
439 (- (get-lisp-obj-address (%code-entry-point code (1+ i)))
440 fun-pointer-lowtag)
441 text-end)
442 ;; Exclude transfers within this code component
443 (lambda (jmp-targ-addr)
444 (not (<= text-origin jmp-targ-addr text-end)))))))))
445 :immobile))
447 ;; Write a delimiter into the array passed to C
448 (vector-push-extend 0 code-components)
449 (vector-push-extend (fill-pointer relocs) code-components)
450 (values code-components relocs)))