1 ;;;; target-only stuff from CMU CL's src/compiler/x86/insts.lisp
3 ;;;; i.e. stuff which was in CMU CL's insts.lisp file, but which in
4 ;;;; the SBCL build process can't be compiled into code for the
5 ;;;; cross-compilation host
7 ;;;; This software is part of the SBCL system. See the README file for
10 ;;;; This software is derived from the CMU CL system, which was
11 ;;;; written at Carnegie Mellon University and released into the
12 ;;;; public domain. The software is in the public domain and is
13 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
14 ;;;; files for more information.
16 (in-package "SB-X86-64-ASM")
18 (defun sb-disassem::pre-decode
(chunk dstate
)
19 (let ((byte (ldb (byte 8 0) chunk
)))
23 #x66
; operand size modifier
24 #x67
; address size modifier
26 #xf2
; REPNE or SSE inst
27 #xf3
) ; REP or SSE inst
28 ;; If the next byte is a REX prefix, then strip it out, recording the 'wrxb'
29 ;; bits in the dstate, and return the chunk as if the REX byte were absent.
30 (let ((next (ldb (byte 8 8) chunk
)))
31 (when (= (logand next
#xf0
) #x40
)
32 (dstate-setprop dstate
(logior +rex
+ (logand next
#b1111
)))
33 (let ((new (logior byte
(ash (ldb (byte 48 16) chunk
) 8))))
34 (return-from sb-disassem
::pre-decode
(values new
1))))))))
37 (defmethod print-object ((reg reg
) stream
)
38 (if (or *print-escape
* *print-readably
*)
39 ;; cross-compiled DEFMETHOD can't use call-next-method
40 (default-structure-print reg stream
*current-level-in-print
*)
41 (write-string (reg-name reg
) stream
)))
43 ;;; Return the operand size depending on the prefixes and width bit as
45 (defun inst-operand-size (dstate)
46 (declare (type disassem-state dstate
))
47 (cond ((dstate-getprop dstate
+operand-size-8
+) :byte
)
48 ((dstate-getprop dstate
+rex-w
+) :qword
)
49 ((dstate-getprop dstate
+operand-size-16
+) :word
)
50 (t +default-operand-size
+)))
52 ;;; The same as INST-OPERAND-SIZE, but for those instructions (e.g.
53 ;;; PUSH, JMP) that have a default operand size of :qword. It can only
54 ;;; be overwritten to :word.
55 (defun inst-operand-size-default-qword (dstate)
56 (declare (type disassem-state dstate
))
57 (if (dstate-getprop dstate
+operand-size-16
+) :word
:qword
))
59 ;;; This prefilter is used solely for its side effect, namely to put
60 ;;; the property OPERAND-SIZE-8 into the DSTATE if VALUE is 0.
61 (defun prefilter-width (dstate value
)
62 (declare (type bit value
) (type disassem-state dstate
))
64 (dstate-setprop dstate
+operand-size-8
+))
67 ;;; A register field that can be extended by REX.R.
68 (defun prefilter-reg-r (dstate value
)
69 (declare (type (mod 8) value
) (type disassem-state dstate
))
70 ;; size is arbitrary here since the printer determines it
71 (get-gpr :qword
(if (dstate-getprop dstate
+rex-r
+) (+ value
8) value
)))
73 ;;; A register field that can be extended by REX.B.
74 (defun prefilter-reg-b (dstate value
)
75 (declare (type (mod 8) value
) (type disassem-state dstate
))
76 ;; size is arbitrary here since the printer determines it
77 (get-gpr :qword
(if (dstate-getprop dstate
+rex-b
+) (+ value
8) value
)))
79 ;; This reader extracts the 'imm' operand in "MOV reg,imm" format.
80 ;; KLUDGE: the REG instruction format can not define a reader
81 ;; because it has no field specification and no prefilter.
82 ;; (It's specified directly in the MOV instruction definition)
83 (defun reg-imm-data (dchunk dstate
) dchunk
84 (aref (sb-disassem::dstate-filtered-values dstate
) 4))
86 ;;; This structure is logically immutable, except for one problem:
87 ;;; the disassembler recycles instances of it (re-uses the same
88 ;;; one for each successive instruction). See DECODE-MOD-R/M.
89 (defstruct (machine-ea (:copier nil
)
90 (:constructor %make-machine-ea
))
91 ;; possible TODO: base,index,scale could be packed thusly in 13 bits:
93 ;; 1 bit for base register non-NULL
94 ;; 4 bits for base register number
95 ;; 1 bit for base-register-is-RIP
96 ;; 1 bit for index register non-NULL
97 ;; 4 bits for index register number
98 disp base index scale
)
99 (declaim (freeze-type machine-ea
))
101 (defun reg-num (reg) (reg-id-num (reg-id reg
)))
103 ;;; Print to STREAM the name of the general-purpose register encoded by
104 ;;; VALUE and of size WIDTH.
105 (defun print-reg-with-width (value width stream dstate
)
106 (declare (type (or null stream
) stream
)
107 (type disassem-state dstate
))
108 (let* ((num (etypecase value
109 ((unsigned-byte 4) value
)
110 ;; Decode and re-encode, because the size is always
112 (reg (reg-num value
))))
114 (if (and (eq width
:byte
)
115 (not (dstate-getprop dstate
+rex
+))
117 (+ 16 -
4 num
) ; legacy high-byte register
120 (princ (reg-name reg
) stream
)
121 (operand reg dstate
)))
122 ;; XXX plus should do some source-var notes
125 (defun print-reg (value stream dstate
)
126 (print-reg-with-width value
127 (inst-operand-size dstate
)
131 (defun print-reg-default-qword (value stream dstate
)
132 (print-reg-with-width value
133 (inst-operand-size-default-qword dstate
)
137 ;; Print a reg that can only be a :DWORD or :QWORD.
138 ;; Avoid use of INST-OPERAND-SIZE because it's wrong for this type of operand.
139 (defun print-d/q-word-reg
(value stream dstate
)
140 (print-reg-with-width value
141 (if (dstate-getprop dstate
+rex-w
+) :qword
:dword
)
145 (defun print-byte-reg (value stream dstate
)
146 (print-reg-with-width value
:byte stream dstate
))
148 (defun print-addr-reg (value stream dstate
)
149 (print-reg-with-width value
+default-address-size
+ stream dstate
))
151 ;;; Print a register or a memory reference of the given WIDTH.
152 ;;; If SIZED-P is true, add an explicit size indicator for memory
154 (defun print-reg/mem-with-width
(value width sized-p stream dstate
)
155 (declare (type (member :byte
:word
:dword
:qword
) width
)
156 (type boolean sized-p
))
157 (if (machine-ea-p value
)
158 (print-mem-ref (if sized-p
:sized-ref
:ref
) value width stream dstate
)
159 (print-reg-with-width value width stream dstate
)))
161 ;;; Print a register or a memory reference. The width is determined by
162 ;;; calling INST-OPERAND-SIZE.
163 (defun print-reg/mem
(value stream dstate
)
164 (print-reg/mem-with-width
165 value
(inst-operand-size dstate
) nil stream dstate
))
167 ;; Same as print-reg/mem, but prints an explicit size indicator for
168 ;; memory references.
169 (defun print-sized-reg/mem
(value stream dstate
)
170 (print-reg/mem-with-width
171 value
(inst-operand-size dstate
) t stream dstate
))
173 ;;; Same as print-sized-reg/mem, but with a default operand size of
175 (defun print-sized-reg/mem-default-qword
(value stream dstate
)
176 (print-reg/mem-with-width
177 value
(inst-operand-size-default-qword dstate
) t stream dstate
))
179 (defun print-rel32-disp (value stream dstate
)
180 (cond ((not stream
) (operand value dstate
))
182 (or (when (and (typep value
'word
)
183 (not (logtest value lowtag-mask
))
184 (< text-space-start value
(sap-int *text-space-free-pointer
*)))
185 (multiple-value-bind (fun ok
)
186 (make-lisp-obj (+ value -
16 fun-pointer-lowtag
) nil
)
188 (let ((name (%fun-name fun
)))
189 (note (if (and (symbolp name
) (eq (fboundp name
) fun
))
190 (lambda (stream) (format stream
"#'~A" name
))
191 (lambda (stream) (princ fun stream
)))
193 (maybe-note-assembler-routine value nil dstate
))
194 (print-label value stream dstate
))))
196 (defun print-jmp-ea (value stream dstate
)
197 (cond ((null stream
) (operand value dstate
))
198 ((typep value
'machine-ea
)
199 (when (or (eq (machine-ea-base value
) :rip
)
200 (and (eql (machine-ea-base value
)
201 (car (sb-disassem::dstate-known-register-contents dstate
)))
202 (eq (cdr (sb-disassem::dstate-known-register-contents dstate
))
203 'sb-vm
::linkage-table
)
204 (integerp (machine-ea-disp value
))
205 (not (machine-ea-index value
))))
206 (setf (sb-disassem::dstate-known-register-contents dstate
) nil
)
207 (let ((name (if (eq (machine-ea-base value
) :rip
)
208 (linkage-addr->name
(+ (dstate-next-addr dstate
)
209 (machine-ea-disp value
)) :abs
)
210 (linkage-addr->name
(machine-ea-disp value
) :rel
))))
212 ;; :COMPUTE won't show the contents of the word
213 (print-mem-ref :compute value
:qword stream dstate
)
214 (return-from print-jmp-ea
(note (lambda (s) (prin1 name s
)) dstate
)))))
215 (print-mem-ref :ref value
:qword stream dstate
)
217 (when (and (null (machine-ea-base value
)) (null (machine-ea-index value
)))
218 (let* ((v sb-fasl
::*asm-routine-vector
*)
219 (a (logandc2 (get-lisp-obj-address v
) sb-vm
:lowtag-mask
)))
220 (when (<= a
(machine-ea-disp value
) (1- (+ a
(primitive-object-size v
))))
221 (let ((target (sap-ref-word (int-sap (machine-ea-disp value
)) 0)))
222 (maybe-note-assembler-routine target t dstate
))))))
223 (t (write value
:stream stream
:escape nil
))))
225 (defun print-sized-byte-reg/mem
(value stream dstate
)
226 (print-reg/mem-with-width value
:byte t stream dstate
))
228 (defun print-sized-word-reg/mem
(value stream dstate
)
229 (print-reg/mem-with-width value
:word t stream dstate
))
231 (defun print-sized-dword-reg/mem
(value stream dstate
)
232 (print-reg/mem-with-width value
:dword t stream dstate
))
234 (defun print-label (value stream dstate
)
235 (declare (ignore dstate
))
236 (princ16 value stream
))
238 (defun print-xmmreg (value stream dstate
)
239 (let* ((reg (get-fpr :xmm
240 ;; FIXME: why are we seeing a value from the GPR
241 ;; prefilter instead of XMM prefilter here sometimes?
243 ((unsigned-byte 4) value
)
244 (reg (reg-num value
)))))
245 (name (reg-name reg
)))
247 (write-string name stream
)
248 (operand name dstate
))))
250 (defun print-xmmreg/mem
(value stream dstate
)
251 (if (machine-ea-p value
)
252 (print-mem-ref :ref value nil stream dstate
)
253 (print-xmmreg value stream dstate
)))
255 ;;; Guess whether VALUE is an immobile-space symbol or code blob by looking
256 ;;; at all code header constants. If it matches any constant, assume that it
257 ;;; is a use of the constant. This has false positives of course,
258 ;;; as does MAYBE-NOTE-STATIC-SYMBOL and friends. Any random immediate value
259 ;;; used in an unboxed context, such as an ADD instruction,
260 ;;; can be wrongly construed as an address.
261 ;;; Note that for symbols we can match either the tagged pointer to it
262 ;;; OR the untagged address of the SYMBOL-VALUE slot.
264 ;;; "static" in this usage implies "at a fixed address" - it could be
265 ;;; in static space or immobile space.
267 ;;; TODO: probably should take an &OPTIONAL for ALLOW-INTERIOR-PTR to
268 ;;; reject false positives from instructions that don't access an object
269 ;;; except through a tagged pointer.
270 (defun maybe-note-static-lispobj (value dstate
&optional quote
)
271 (when (maybe-note-static-symbol value dstate
)
272 ;; Returning T prints VALUE using base 16
273 ;; (see the SIGNED-IMM-DATA printer, PRINT-IMM/ASM-ROUTINE)
274 ;; This should probably pass through the QUOTE option but it's not critical.
275 (return-from maybe-note-static-lispobj t
))
276 (let ((code (seg-code (dstate-segment dstate
)))
277 (adjusted-val (logior (- value
(ash sb-vm
:symbol-value-slot sb-vm
:word-shift
))
278 sb-vm
:other-pointer-lowtag
))
282 (loop for i downfrom
(1- (code-header-words code
)) to sb-vm
:code-constants-offset
283 for const
= (code-header-ref code i
)
284 do
(when (symbolp const
)
285 (let ((addr (get-lisp-obj-address const
)))
286 (cond ((eql addr value
)
287 (return (setq found-const const
)))
288 ((eql addr adjusted-val
)
289 (return (setq found-const const
290 slot sb-vm
:symbol-value-slot
)))))))
291 (unless found-const
; try static symbol's value slots
292 (dovector (symbol sb-vm
:+static-symbols
+)
293 (when (= (get-lisp-obj-address symbol
) adjusted-val
)
294 (return (setq found-const symbol
295 slot sb-vm
:symbol-value-slot
)))))
298 (lambda (s) (format s
"(SYMBOL-VALUE '~S)" found-const
)))
299 ((and (symbolp found-const
) quote
)
300 (lambda (s) (write-char #\' s
) (prin1 found-const s
)))
302 (lambda (s) (prin1 found-const s
))))
304 ;; Returning T prints in base 16 (see PRINT-IMM/ASM-ROUTINE)
305 (return-from maybe-note-static-lispobj t
))))
306 #| This mysterious code seems to have no regression tests.
307 Comenting it out until I can figure out why it was in target-disassem
308 ;; Kludge: layout of STREAM, FILE-STREAM, and STRING-STREAM can be used
309 ;; as immediate operands without a corresponding boxed header constant.
310 ;; I think we always elide the boxed constant for builtin layouts,
311 ;; but these three have some slightly unusual codegen that causes a PUSH
312 ;; instruction to need some help to show its operand as a lisp object.
313 (dolist (thing (load-time-value (list (find-layout 'stream
)
314 (find-layout 'file-stream
)
315 (find-layout 'string-stream
))
317 (when (eql (get-lisp-obj-address thing
) address
)
318 (return-from found thing
))))) |
#
319 (awhen (and (typep value
'word
)
320 (sb-disassem::find-code-constant-from-interior-pointer value dstate
))
321 (note (lambda (stream) (princ it stream
)) dstate
)))
323 ;;; Return an instance of REG or MACHINE-EA.
324 ;;; MOD and R/M are the extracted bits from the instruction's ModRM byte.
325 ;;; Depending on MOD and R/M, a SIB byte and/or displacement may be read.
326 ;;; The REX.B and REX.X from dstate are appropriately consumed.
327 (defun decode-mod-r/m
(dstate mod r
/m regclass
)
328 (declare (type disassem-state dstate
)
329 (type (unsigned-byte 2) mod
)
330 (type (unsigned-byte 3) r
/m
))
331 (flet ((make-machine-ea (base &optional disp index scale
)
332 (let ((ea (the machine-ea
333 (sb-disassem::new-filtered-arg dstate
#'%make-machine-ea
))))
334 (setf (machine-ea-base ea
) base
335 (machine-ea-disp ea
) disp
336 (machine-ea-index ea
) index
337 (machine-ea-scale ea
) scale
)
341 (#b01
(read-signed-suffix 8 dstate
))
342 (#b10
(read-signed-suffix 32 dstate
))))
343 (extend (bit-name reg
)
344 (logior (if (dstate-getprop dstate bit-name
) 8 0) reg
)))
345 (declare (inline extend
))
346 (let ((full-reg (extend +rex-b
+ r
/m
)))
347 (cond ((= mod
#b11
) ; register direct mode
349 (gpr (get-gpr :qword full-reg
)) ; size is not really known here
350 (fpr (get-fpr :xmm full-reg
))))
351 ((= r
/m
#b100
) ; SIB byte - rex.b is "don't care"
352 (let* ((sib (the (unsigned-byte 8) (read-suffix 8 dstate
)))
353 (index-reg (extend +rex-x
+ (ldb (byte 3 3) sib
)))
354 (base-reg (ldb (byte 3 0) sib
)))
355 ;; mod=0 and base=RBP means no base reg
356 (make-machine-ea (unless (and (= mod
#b00
) (= base-reg
#b101
))
357 (extend +rex-b
+ base-reg
))
358 (cond ((/= mod
#b00
) (displacement))
359 ((= base-reg
#b101
) (read-signed-suffix 32 dstate
)))
360 (unless (= index-reg
#b100
) index-reg
) ; index can't be RSP
361 (ash 1 (ldb (byte 2 6) sib
)))))
362 ((/= mod
#b00
) (make-machine-ea full-reg
(displacement)))
363 ;; rex.b is not decoded in determining RIP-relative mode
364 ((= r
/m
#b101
) (make-machine-ea :rip
(read-signed-suffix 32 dstate
)))
365 (t (make-machine-ea full-reg
))))))
367 (defun prefilter-reg/mem
(dstate mod r
/m
)
368 (decode-mod-r/m dstate mod r
/m
'gpr
))
369 (defun prefilter-xmmreg/mem
(dstate mod r
/m
)
370 (decode-mod-r/m dstate mod r
/m
'fpr
))
372 ;;; Return contents of memory if either it refers to an unboxed code constant
373 ;;; or is RIP-relative with a displacement of 0.
374 (defun unboxed-constant-ref (dstate addr disp
)
375 (when (and (minusp disp
)
376 (awhen (seg-code (dstate-segment dstate
))
377 (sb-disassem::points-to-code-constant-p addr it
)))
378 (sap-ref-word (int-sap addr
) 0)))
380 (define-load-time-global thread-slot-names
381 (let* ((slots (coerce (primitive-object-slots
382 (sb-vm::primitive-object
'sb-vm
::thread
))
384 (a (make-array (1+ (slot-offset (car (last slots
))))
385 :initial-element nil
)))
386 (dolist (slot slots a
)
387 (setf (aref a
(slot-offset slot
)) (slot-name slot
)))))
389 ;;; Prints a memory reference to STREAM. VALUE is a list of
390 ;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component may be
391 ;;; missing or nil to indicate that it's not used or has the obvious
392 ;;; default value (e.g., 1 for the index-scale). BASE-REG can be the
393 ;;; symbol RIP or a full register, INDEX-REG a full register. If WIDTH
394 ;;; is non-nil it should be one of the symbols :BYTE, :WORD, :DWORD or
395 ;;; :QWORD; a corresponding size indicator is printed if MODE is :SIZED-REF.
396 ;;; The rationale for supplying WIDTH while eliding a pointer-size qualifier
397 ;;; is that proper dereferencing of RIP-relative constants requires a size,
398 ;;; but in other cases would only add clutter, since a source/destination
399 ;;; register implies a size.
401 (defun print-mem-ref (mode value width stream dstate
&key
(index-reg-printer #'print-addr-reg
))
402 ;; :COMPUTE is used for the LEA instruction - it informs this function
403 ;; that we're not loading from the address and that the contents should not
404 ;; be printed. It'll usually be a reference to code within the disasembly
405 ;; segment, as LEA is employed to compute the entry point for local call.
406 (declare (type (member :ref
:sized-ref
:compute
) mode
)
407 (type machine-ea value
)
408 (type (member nil
:byte
:word
:dword
:qword
) width
)
409 (type (or null stream
) stream
)
410 (type disassem-state dstate
))
411 ;; If disassembling into the dstate, print nothing; just stash the operand.
413 (return-from print-mem-ref
(operand (cons value width
) dstate
)))
415 ;; Unpack and print the pieces of the machine EA.
416 (let ((base-reg (machine-ea-base value
))
417 (disp (machine-ea-disp value
))
418 (index-reg (machine-ea-index value
))
420 (when (and width
(eq mode
:sized-ref
))
422 (princ '| PTR | stream
))
423 (when (dstate-getprop dstate
+fs-segment
+)
424 (princ "FS:" stream
))
425 (when (dstate-getprop dstate
+gs-segment
+)
426 (princ "GS:" stream
))
427 (write-char #\
[ stream
)
429 (if (eql :rip base-reg
)
430 (princ base-reg stream
)
431 (print-addr-reg base-reg stream dstate
))
434 (unless firstp
(write-char #\
+ stream
))
435 (funcall index-reg-printer index-reg stream dstate
)
436 (let ((scale (machine-ea-scale value
)))
438 (write-char #\
* stream
)
439 (princ scale stream
)))
441 (when (and disp
(or firstp
(not (zerop disp
))))
442 (unless (or firstp
(minusp disp
))
443 (write-char #\
+ stream
))
444 (cond ((eq (machine-ea-base value
) :rip
)
447 (princ16 disp stream
)
448 ;; Avoid the MAYBE-NOTE call if we can. A negative offset is never an
449 ;; absolute address as would be used for asm routines and static symbols.
450 ;; FIRSTP implies lack of a base and index register.
451 (unless (minusp disp
)
452 (or (maybe-note-static-symbol (+ disp
(- other-pointer-lowtag
453 (* n-word-bytes sb-vm
:symbol-value-slot
)))
455 (maybe-note-assembler-routine disp nil dstate
))))
457 (princ disp stream
))))
458 (write-char #\
] stream
)
460 ;; Always try to add an end-of-line comment about the EA.
461 ;; Assembler routines were already handled above (not really sure why)
462 ;; so now we have to figure out everything else.
464 (when (and (eql (machine-ea-base value
) sb-vm
::card-table-reg
)
465 (eql (machine-ea-disp value
) -
8))
466 (return-from print-mem-ref
(note "safepoint" dstate
)))
468 (when (and (eq (machine-ea-base value
) :rip
) (neq mode
:compute
))
470 (binding* ((seg (dstate-segment dstate
))
471 (code (seg-code seg
) :exit-if-null
)
472 (offs (sb-disassem::segment-offs-to-code-offs
473 (+ (dstate-next-offs dstate
) disp
) seg
)))
474 (when (note-code-constant offs dstate
) (return)))
475 (let ((addr (+ disp
(dstate-next-addr dstate
))))
476 ;; The origin is zero when disassembling into a trace-file.
477 ;; Don't crash on account of it.
478 ;; Also, don't try to look up C symbols in immobile space.
479 ;; In an elfinated core, the range that is reserved for
480 ;; compilation to memory says it is all associated with
481 ;; the symbol "lisp_jit_code" which is not useful.
483 (or (when (<= sb-vm
:alien-linkage-space-start addr
484 (+ sb-vm
:alien-linkage-space-start
485 (1- sb-vm
:alien-linkage-space-size
)))
486 (let* ((index (sb-vm::alien-linkage-table-index-from-address addr
))
487 (name (sb-impl::alien-linkage-index-to-name index
)))
488 (note (lambda (s) (format s
"&~A" name
)) dstate
)))
489 (unless (sb-kernel:immobile-space-addr-p addr
)
490 (maybe-note-assembler-routine addr nil dstate
))
491 ;; Show the absolute address and maybe the contents.
492 (note (format nil
"[#x~x]~@[ = #x~x~]"
495 (:qword
(unboxed-constant-ref dstate addr disp
))))
498 ;; Recognize "[Rbase+disp]" as an alien linkage table reference if Rbase was
499 ;; just loaded with the base address in the prior instruction.
500 (when (and (eql (machine-ea-base value
)
501 (car (sb-disassem::dstate-known-register-contents dstate
)))
502 (eq (cdr (sb-disassem::dstate-known-register-contents dstate
))
503 'sb-vm
::alien-linkage-table-base
)
504 (not (machine-ea-index value
))
505 (integerp (machine-ea-disp value
)))
506 (let ((name (sb-impl::alien-linkage-index-to-name
507 (floor (machine-ea-disp value
) sb-vm
:alien-linkage-table-entry-size
))))
508 (note (lambda (s) (format s
"&~A" name
)) dstate
)))
509 (setf (sb-disassem::dstate-known-register-contents dstate
) nil
)
511 (flet ((guess-symbol (predicate)
512 (binding* ((code-header (seg-code (dstate-segment dstate
)) :exit-if-null
)
513 (header-n-words (code-header-words code-header
)))
514 (loop for word-num from code-constants-offset below header-n-words
515 for obj
= (code-header-ref code-header word-num
)
516 when
(and (symbolp obj
) (funcall predicate obj
))
518 (when (and (not base-reg
) (not index-reg
) disp
)
519 (let ((addr (+ disp
; guess that DISP points to a symbol-value slot
520 (- (ash sb-vm
:symbol-value-slot sb-vm
:word-shift
))
521 sb-vm
:other-pointer-lowtag
)))
522 (awhen (guess-symbol (lambda (s) (= (get-lisp-obj-address s
) addr
)))
523 (note (lambda (stream) (prin1 it stream
)) dstate
)
524 (return-from print-mem-ref
))))
525 ;; Try to reverse-engineer which thread-local binding this is
526 (cond ((and disp
; Test whether disp looks aligned to an object header
527 (not (logtest (- disp
4) sb-vm
:lowtag-mask
))
528 (not base-reg
) (not index-reg
))
529 (let* ((addr (+ disp
(- 4) sb-vm
:other-pointer-lowtag
))
531 (guess-symbol (lambda (s) (= (get-lisp-obj-address s
) addr
)))))
533 ;; "tls_index:" is access to the half-sized slot within the
534 ;; symbol header that provides an offset into TLS.
535 (note (lambda (stream) (format stream
"tls_index: ~S" symbol
))
538 ((and (eql base-reg sb-vm
::thread-reg
)
539 #+gs-seg
(dstate-getprop dstate
+gs-segment
+)
540 #-gs-seg
(not (dstate-getprop dstate
+fs-segment
+)) ; not system TLS
541 (not index-reg
) ; no index
542 (typep disp
'(integer -
128 *)) ; valid displacement
543 (zerop (logand disp
7))) ; lispword-aligned
544 (let* ((index (ash disp -
3))
545 (symbol (cond ((minusp index
)
546 (let ((index (1- (- index
))))
547 (when (array-in-bounds-p sb-vm
::+thread-header-slot-names
+ index
)
548 (aref sb-vm
::+thread-header-slot-names
+ index
))))
549 ((< index
(length thread-slot-names
))
550 (aref thread-slot-names index
)))))
552 (when (and (member index
`(,sb-vm
::thread-alien-linkage-table-base-slot
553 ,sb-vm
::thread-linkage-table-slot
))
554 (eql (logandc2 (sb-disassem::dstate-inst-properties dstate
) +rex-r
+)
555 (logior +rex
+ +rex-w
+ +rex-b
+)))
556 (setf (sb-disassem::dstate-known-register-contents dstate
)
557 `(,(reg-num (regrm-inst-reg dchunk-zero dstate
)) .
,symbol
)))
558 (return-from print-mem-ref
559 (note (lambda (stream) (format stream
"thread.~(~A~)" symbol
))
562 (let ((symbol (or (guess-symbol
563 (lambda (s) (= (symbol-tls-index s
) disp
)))
564 ;; static symbols aren't in the code header
565 (find disp
+static-symbols
+
566 :key
#'symbol-tls-index
))))
568 (return-from print-mem-ref
569 ;; "tls:" refers to the current value of the symbol in TLS
570 (note (lambda (stream) (format stream
"tls: ~S" symbol
))
573 (defun lea-compute-label (value dstate
)
574 ;; If VALUE should be regarded as a label, return the address.
575 ;; If not, just return VALUE. Don't try to use a label if there is no CODE.
576 (if (and (seg-code (dstate-segment dstate
))
577 (typep value
'machine-ea
)
578 (eq (machine-ea-base value
) :rip
))
579 (let ((addr (+ (dstate-next-addr dstate
) (machine-ea-disp value
))))
580 (if (= (logand addr lowtag-mask
) fun-pointer-lowtag
)
581 (- addr fun-pointer-lowtag
)
585 ;; Figure out whether LEA should print its EA with just the stuff in brackets,
586 ;; or additionally show the EA as either a label or a hex literal.
587 (defun lea-print-ea (value stream dstate
&aux
(width (inst-operand-size dstate
)))
588 ;; If disassembling into the dstate, print nothing; just stash the operand.
590 (return-from lea-print-ea
(operand (cons value width
) dstate
)))
596 (let ((linkage ; gets cleared by PRINT-MEM-REF
597 (and (eql (machine-ea-base value
)
598 (car (sb-disassem::dstate-known-register-contents dstate
)))
599 (eq (cdr (sb-disassem::dstate-known-register-contents dstate
))
600 'sb-vm
::linkage-table
)
601 (integerp (machine-ea-disp value
))
602 (not (machine-ea-index value
)))))
603 (declare (ignorable linkage
))
604 ;; Indicate to PRINT-MEM-REF that this is not a memory access.
605 (print-mem-ref :compute value width stream dstate
)
606 (cond ((eq (machine-ea-base value
) :rip
)
607 (+ (dstate-next-addr dstate
) (machine-ea-disp value
)))
609 (sap-int (sap+ sb-vm
::*linkage-table
* (machine-ea-disp value
)))))))
611 ;; A label for the EA should not print as itself, but as the decomposed
612 ;; addressing mode so that [ADDR] and [RIP+disp] are unmistakable.
613 ;; We can see an INTEGER here because LEA-COMPUTE-LABEL is always called
614 ;; on the operand to LEA, and it will compute an absolute address based
615 ;; off RIP when possible. If :use-labels NIL was specified, there is
616 ;; no hashtable of address to string, so we get the address.
617 ;; But ordinarily we get the string. Either way, the r/m arg reveals the
618 ;; EA calculation. DCHUNK-ZERO is a meaningless value - any would do -
619 ;; because the EA was computed in a prefilter.
620 ;; (the instruction format is known because LEA has exactly one format)
621 (print-mem-ref :compute
(setf ea
(regrm-inst-r/m dchunk-zero dstate
))
625 ;; LEA Rx,Ry is an illegal encoding, but we'll show it as-is.
626 ;; When we used integers instead of REG to represent registers, this case
627 ;; overlapped with the preceding. It's nice that it no longer does.
629 (print-reg-with-width value width stream dstate
)
631 (cond ((stringp addr
) ; label
632 (note (lambda (s) (format s
"= ~A" addr
)) dstate
))
635 (= (logand (+ (dstate-next-addr dstate
) (machine-ea-disp ea
))
638 (let* ((seg (dstate-segment dstate
))
639 (code (seg-code seg
))
640 (offset (+ (sb-disassem::seg-initial-offset seg
)
641 (dstate-next-offs dstate
)
642 (- (machine-ea-disp ea
)
643 fun-pointer-lowtag
))))
644 (loop for n below
(code-n-entries code
)
645 do
(when (= (%code-fun-offset code n
) offset
)
646 (let ((fun (%code-entry-point code n
)))
647 (note (lambda (stream) (prin1-quoted-short fun stream
)) dstate
))
650 (acond ((linkage-addr->name addr
:abs
)
651 (note (lambda (s) (format s
"#'~S" it
)) dstate
))
653 (note (lambda (s) (format s
"= #x~x" addr
)) dstate
)))))))
655 ;;;; interrupt instructions
657 (defun break-control (chunk inst stream dstate
)
658 ;; Do not parse bytes following a trap instruction unless it belongs to lisp code.
659 ;; C++ compilers will emit ud2 for various reasons.
660 (when (sb-disassem::dstate-foreign-code-p dstate
)
661 (return-from break-control
))
662 (flet ((nt (x) (if stream
(note x dstate
))))
663 (let ((trap (if (eq (sb-disassem::inst-print-name inst
) 'ud2
)
664 (word-imm-code chunk dstate
)
665 (byte-imm-code chunk dstate
))))
668 (nt "breakpoint trap"))
669 (#.pending-interrupt-trap
670 (nt "pending interrupt trap"))
673 (#.fun-end-breakpoint-trap
674 (nt "function end breakpoint trap"))
675 (#.single-step-around-trap
676 (nt "single-step trap (around)"))
677 (#.single-step-before-trap
678 (nt "single-step trap (before)"))
679 (#.invalid-arg-count-trap
680 (nt "Invalid argument count trap"))
682 (when (or (and (= trap cerror-trap
) (progn (nt "cerror trap") t
))
683 (>= trap uninitialized-load-trap
))
685 (lambda (sap offset trap-number length-only
)
686 (if (= trap-number uninitialized-load-trap
)
687 (let ((reg (ash (sap-ref-8 sap offset
) -
2)))
688 ;; decode a single byte arg, not an SC+OFFSET
689 (values (error-number-or-lose 'uninitialized-memory-error
)
690 1 ; total number of bytes consumed after the trap
691 (list (make-sc+offset unsigned-reg-sc-number reg
))
692 '(1) ; display 1 byte for the register encoding
693 nil
)) ; no error number after the trap instruction
694 (snarf-error-junk sap offset trap-number length-only
)))
695 trap stream dstate
)))))))
697 ;;; Disassemble memory of CODE from START-ADDRESS for LENGTH bytes
698 ;;; calling FUNCTION on each instruction that has a PC-relative operand.
699 ;;; If supplied, PREDICATE is used to filter out some function invocations.
700 (defun scan-relative-operands
701 (code start-address length dstate segment function
702 &optional
(predicate #'constantly-t
))
703 (declare (type function function
))
704 (let* ((inst-space (get-inst-space))
705 ;; Look for these instruction formats.
706 (call-inst (find-inst #xE8 inst-space
))
707 (jmp-inst (find-inst #xE9 inst-space
))
708 (call*-inst
(find-inst #x15ff inst-space
))
709 (jmp*-inst
(find-inst #x25ff inst-space
))
710 (cond-jmp-inst (find-inst #x800f inst-space
))
711 (lea-inst (find-inst #x8D inst-space
))
712 (mov-inst (find-inst #x8B inst-space
))
713 (address (get-lisp-obj-address code
))
714 (text-start (sap-int (code-instructions code
)))
715 (text-end (+ text-start
(%code-text-size code
)))
716 (sap (int-sap start-address
)))
717 (setf (seg-virtual-location segment
) start-address
718 (seg-length segment
) length
719 (seg-sap-maker segment
) (lambda () sap
))
720 (map-segment-instructions
721 (lambda (dchunk inst
&aux
(opcode (sap-ref-8 sap
(dstate-cur-offs dstate
))))
722 (flet ((includep (target)
723 ;; Self-relative (to the code object) operands are ignored.
724 (and (or (< target address
) (>= target text-end
))
725 (funcall predicate target
))))
726 (cond ((or (eq inst jmp-inst
) (eq inst call-inst
))
727 (let ((operand (+ (near-jump-displacement dchunk dstate
)
728 (dstate-next-addr dstate
))))
729 (when (includep operand
)
730 (funcall function
(+ (dstate-cur-offs dstate
) 1)
732 ((eq inst cond-jmp-inst
)
733 ;; jmp CALL-SYMBOL is invoked with a conditional jump
734 ;; (but not call CALL-SYMBOL because only JMP can be conditional)
735 (let ((operand (+ (near-cond-jump-displacement dchunk dstate
)
736 (dstate-next-addr dstate
))))
737 (when (includep operand
)
738 (funcall function
(+ (dstate-cur-offs dstate
) 2)
740 ((or (eq inst lea-inst
) (eq inst jmp
*-inst
) (eq inst call
*-inst
)
741 (and (eq inst mov-inst
) (eql opcode
#x8B
)))
742 ;; Computing the address of UNDEFINED-FDEFN is done with LEA.
743 ;; Load from the alien linkage table can be done with MOV Rnn,[RIP-k].
744 (let ((modrm (sap-ref-8 sap
(1+ (dstate-cur-offs dstate
)))))
745 (when (= (logand modrm
#b11000111
) #b00000101
) ; RIP-relative mode
746 (let ((operand (+ (signed-sap-ref-32 sap
(+ (dstate-cur-offs dstate
) 2))
747 (dstate-next-addr dstate
))))
748 (when (includep operand
)
749 (when (or (eq inst lea-inst
) (eq inst mov-inst
))
750 (aver (eql (logand (sap-ref-8 sap
(1- (dstate-cur-offs dstate
))) #xF0
)
751 #x40
))) ; expect a REX prefix
752 (funcall function
(+ (dstate-cur-offs dstate
) 2) operand inst
)))))))))
753 segment dstate nil
)))
755 ;;; A code signature (for purposes of the ICF pass) is a list of function
756 ;;; signatures, each of which is cons of a vector of instruction bytes with some
757 ;;; replaced by 0, plus an opaque set of integers that need to be compared for
758 ;;; equality to test whether the blobs of code are functionally equivalent.
759 (defun sb-vm::compute-code-signature
(code dstate
&aux result
)
760 (dotimes (i (code-n-entries code
) result
)
761 (let* ((f (%code-entry-point code i
))
762 (len (%simple-fun-text-len f i
))
763 (buffer (make-array (ceiling len n-word-bytes
) :element-type
'word
))
765 (with-pinned-objects (code buffer
)
766 (let ((fun-sap (simple-fun-entry-sap f
)))
767 (%byte-blt fun-sap
0 buffer
0 len
)
768 ;; Smash each PC-relative operand, and collect the effective value of
769 ;; the operand and its offset in the buffer. We needn't compute the
770 ;; lisp object referred to by the operand because it can't change.
771 ;; (PC-relative values are used only when the EA is not subject to
772 ;; movement due to GC, except during defrag). More than 32 bits might
773 ;; be needed for the absolute EA, so we don't simply write it back
774 ;; to the buffer, though at present 32 bits would in fact suffice.
775 (scan-relative-operands
776 code
(sap-int fun-sap
) len dstate
777 (make-memory-segment nil
0 0) ; will get start/length reassigned anyway
778 (lambda (offset operand inst
)
779 (declare (ignore inst
))
780 (setf operand-values
(list* operand offset operand-values
)
781 (sap-ref-32 (vector-sap buffer
) offset
) 0)))))
782 (push (cons buffer
(coerce operand-values
'simple-vector
)) result
))))