Avoid freeing literal memory.
[sbcl.git] / src / compiler / x86-64 / target-insts.lisp
blobd2f091304b6b3322b015fc956435e0a706d1dc80
1 ;;;; target-only stuff from CMU CL's src/compiler/x86/insts.lisp
2 ;;;;
3 ;;;; i.e. stuff which was in CMU CL's insts.lisp file, but which in
4 ;;;; the SBCL build process can't be compiled into code for the
5 ;;;; cross-compilation host
7 ;;;; This software is part of the SBCL system. See the README file for
8 ;;;; more information.
9 ;;;;
10 ;;;; This software is derived from the CMU CL system, which was
11 ;;;; written at Carnegie Mellon University and released into the
12 ;;;; public domain. The software is in the public domain and is
13 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
14 ;;;; files for more information.
16 (in-package "SB-X86-64-ASM")
18 (defun sb-disassem::pre-decode (chunk dstate)
19 (let ((byte (ldb (byte 8 0) chunk)))
20 (case byte
21 ((#x64 ; FS:
22 #x65 ; GS:
23 #x66 ; operand size modifier
24 #x67 ; address size modifier
25 #xf0 ; LOCK
26 #xf2 ; REPNE or SSE inst
27 #xf3) ; REP or SSE inst
28 ;; If the next byte is a REX prefix, then strip it out, recording the 'wrxb'
29 ;; bits in the dstate, and return the chunk as if the REX byte were absent.
30 (let ((next (ldb (byte 8 8) chunk)))
31 (when (= (logand next #xf0) #x40)
32 (dstate-setprop dstate (logior +rex+ (logand next #b1111)))
33 (let ((new (logior byte (ash (ldb (byte 48 16) chunk) 8))))
34 (return-from sb-disassem::pre-decode (values new 1))))))))
35 (values chunk 0))
37 (defmethod print-object ((reg reg) stream)
38 (if (or *print-escape* *print-readably*)
39 ;; cross-compiled DEFMETHOD can't use call-next-method
40 (default-structure-print reg stream *current-level-in-print*)
41 (write-string (reg-name reg) stream)))
43 ;;; Return the operand size depending on the prefixes and width bit as
44 ;;; stored in DSTATE.
45 (defun inst-operand-size (dstate)
46 (declare (type disassem-state dstate))
47 (cond ((dstate-getprop dstate +operand-size-8+) :byte)
48 ((dstate-getprop dstate +rex-w+) :qword)
49 ((dstate-getprop dstate +operand-size-16+) :word)
50 (t +default-operand-size+)))
52 ;;; The same as INST-OPERAND-SIZE, but for those instructions (e.g.
53 ;;; PUSH, JMP) that have a default operand size of :qword. It can only
54 ;;; be overwritten to :word.
55 (defun inst-operand-size-default-qword (dstate)
56 (declare (type disassem-state dstate))
57 (if (dstate-getprop dstate +operand-size-16+) :word :qword))
59 ;;; This prefilter is used solely for its side effect, namely to put
60 ;;; the property OPERAND-SIZE-8 into the DSTATE if VALUE is 0.
61 (defun prefilter-width (dstate value)
62 (declare (type bit value) (type disassem-state dstate))
63 (when (zerop value)
64 (dstate-setprop dstate +operand-size-8+))
65 value)
67 ;;; A register field that can be extended by REX.R.
68 (defun prefilter-reg-r (dstate value)
69 (declare (type (mod 8) value) (type disassem-state dstate))
70 ;; size is arbitrary here since the printer determines it
71 (get-gpr :qword (if (dstate-getprop dstate +rex-r+) (+ value 8) value)))
73 ;;; A register field that can be extended by REX.B.
74 (defun prefilter-reg-b (dstate value)
75 (declare (type (mod 8) value) (type disassem-state dstate))
76 ;; size is arbitrary here since the printer determines it
77 (get-gpr :qword (if (dstate-getprop dstate +rex-b+) (+ value 8) value)))
79 ;; This reader extracts the 'imm' operand in "MOV reg,imm" format.
80 ;; KLUDGE: the REG instruction format can not define a reader
81 ;; because it has no field specification and no prefilter.
82 ;; (It's specified directly in the MOV instruction definition)
83 (defun reg-imm-data (dchunk dstate) dchunk
84 (aref (sb-disassem::dstate-filtered-values dstate) 4))
86 ;;; This structure is logically immutable, except for one problem:
87 ;;; the disassembler recycles instances of it (re-uses the same
88 ;;; one for each successive instruction). See DECODE-MOD-R/M.
89 (defstruct (machine-ea (:copier nil)
90 (:constructor %make-machine-ea))
91 ;; possible TODO: base,index,scale could be packed thusly in 13 bits:
92 ;; 2 bits for scale
93 ;; 1 bit for base register non-NULL
94 ;; 4 bits for base register number
95 ;; 1 bit for base-register-is-RIP
96 ;; 1 bit for index register non-NULL
97 ;; 4 bits for index register number
98 disp base index scale)
99 (declaim (freeze-type machine-ea))
101 (defun reg-num (reg) (reg-id-num (reg-id reg)))
103 ;;; Print to STREAM the name of the general-purpose register encoded by
104 ;;; VALUE and of size WIDTH.
105 (defun print-reg-with-width (value width stream dstate)
106 (declare (type (or null stream) stream)
107 (type disassem-state dstate))
108 (let* ((num (etypecase value
109 ((unsigned-byte 4) value)
110 ;; Decode and re-encode, because the size is always
111 ;; initially :qword.
112 (reg (reg-num value))))
113 (reg (get-gpr width
114 (if (and (eq width :byte)
115 (not (dstate-getprop dstate +rex+))
116 (<= 4 num 7))
117 (+ 16 -4 num) ; legacy high-byte register
118 num))))
119 (if stream
120 (princ (reg-name reg) stream)
121 (operand reg dstate)))
122 ;; XXX plus should do some source-var notes
125 (defun print-reg (value stream dstate)
126 (print-reg-with-width value
127 (inst-operand-size dstate)
128 stream
129 dstate))
131 (defun print-reg-default-qword (value stream dstate)
132 (print-reg-with-width value
133 (inst-operand-size-default-qword dstate)
134 stream
135 dstate))
137 ;; Print a reg that can only be a :DWORD or :QWORD.
138 ;; Avoid use of INST-OPERAND-SIZE because it's wrong for this type of operand.
139 (defun print-d/q-word-reg (value stream dstate)
140 (print-reg-with-width value
141 (if (dstate-getprop dstate +rex-w+) :qword :dword)
142 stream
143 dstate))
145 (defun print-byte-reg (value stream dstate)
146 (print-reg-with-width value :byte stream dstate))
148 (defun print-addr-reg (value stream dstate)
149 (print-reg-with-width value +default-address-size+ stream dstate))
151 ;;; Print a register or a memory reference of the given WIDTH.
152 ;;; If SIZED-P is true, add an explicit size indicator for memory
153 ;;; references.
154 (defun print-reg/mem-with-width (value width sized-p stream dstate)
155 (declare (type (member :byte :word :dword :qword) width)
156 (type boolean sized-p))
157 (if (machine-ea-p value)
158 (print-mem-ref (if sized-p :sized-ref :ref) value width stream dstate)
159 (print-reg-with-width value width stream dstate)))
161 ;;; Print a register or a memory reference. The width is determined by
162 ;;; calling INST-OPERAND-SIZE.
163 (defun print-reg/mem (value stream dstate)
164 (print-reg/mem-with-width
165 value (inst-operand-size dstate) nil stream dstate))
167 ;; Same as print-reg/mem, but prints an explicit size indicator for
168 ;; memory references.
169 (defun print-sized-reg/mem (value stream dstate)
170 (print-reg/mem-with-width
171 value (inst-operand-size dstate) t stream dstate))
173 ;;; Same as print-sized-reg/mem, but with a default operand size of
174 ;;; :qword.
175 (defun print-sized-reg/mem-default-qword (value stream dstate)
176 (print-reg/mem-with-width
177 value (inst-operand-size-default-qword dstate) t stream dstate))
179 (defun print-rel32-disp (value stream dstate)
180 (cond ((not stream) (operand value dstate))
182 (or (when (and (typep value 'word)
183 (not (logtest value lowtag-mask))
184 (< text-space-start value (sap-int *text-space-free-pointer*)))
185 (multiple-value-bind (fun ok)
186 (make-lisp-obj (+ value -16 fun-pointer-lowtag) nil)
187 (when ok
188 (let ((name (%fun-name fun)))
189 (note (if (and (symbolp name) (eq (fboundp name) fun))
190 (lambda (stream) (format stream "#'~A" name))
191 (lambda (stream) (princ fun stream)))
192 dstate)))))
193 (maybe-note-assembler-routine value nil dstate))
194 (print-label value stream dstate))))
196 (defun print-jmp-ea (value stream dstate)
197 (cond ((null stream) (operand value dstate))
198 ((typep value 'machine-ea)
199 (when (or (eq (machine-ea-base value) :rip)
200 (and (eql (machine-ea-base value)
201 (car (sb-disassem::dstate-known-register-contents dstate)))
202 (eq (cdr (sb-disassem::dstate-known-register-contents dstate))
203 'sb-vm::linkage-table)
204 (integerp (machine-ea-disp value))
205 (not (machine-ea-index value))))
206 (setf (sb-disassem::dstate-known-register-contents dstate) nil)
207 (let ((name (if (eq (machine-ea-base value) :rip)
208 (linkage-addr->name (+ (dstate-next-addr dstate)
209 (machine-ea-disp value)) :abs)
210 (linkage-addr->name (machine-ea-disp value) :rel))))
211 (when name
212 ;; :COMPUTE won't show the contents of the word
213 (print-mem-ref :compute value :qword stream dstate)
214 (return-from print-jmp-ea (note (lambda (s) (prin1 name s)) dstate)))))
215 (print-mem-ref :ref value :qword stream dstate)
216 #+immobile-space
217 (when (and (null (machine-ea-base value)) (null (machine-ea-index value)))
218 (let* ((v sb-fasl::*asm-routine-vector*)
219 (a (logandc2 (get-lisp-obj-address v) sb-vm:lowtag-mask)))
220 (when (<= a (machine-ea-disp value) (1- (+ a (primitive-object-size v))))
221 (let ((target (sap-ref-word (int-sap (machine-ea-disp value)) 0)))
222 (maybe-note-assembler-routine target t dstate))))))
223 (t (write value :stream stream :escape nil))))
225 (defun print-sized-byte-reg/mem (value stream dstate)
226 (print-reg/mem-with-width value :byte t stream dstate))
228 (defun print-sized-word-reg/mem (value stream dstate)
229 (print-reg/mem-with-width value :word t stream dstate))
231 (defun print-sized-dword-reg/mem (value stream dstate)
232 (print-reg/mem-with-width value :dword t stream dstate))
234 (defun print-label (value stream dstate)
235 (declare (ignore dstate))
236 (princ16 value stream))
238 (defun print-xmmreg (value stream dstate)
239 (let* ((reg (get-fpr :xmm
240 ;; FIXME: why are we seeing a value from the GPR
241 ;; prefilter instead of XMM prefilter here sometimes?
242 (etypecase value
243 ((unsigned-byte 4) value)
244 (reg (reg-num value)))))
245 (name (reg-name reg)))
246 (if stream
247 (write-string name stream)
248 (operand name dstate))))
250 (defun print-xmmreg/mem (value stream dstate)
251 (if (machine-ea-p value)
252 (print-mem-ref :ref value nil stream dstate)
253 (print-xmmreg value stream dstate)))
255 ;;; Guess whether VALUE is an immobile-space symbol or code blob by looking
256 ;;; at all code header constants. If it matches any constant, assume that it
257 ;;; is a use of the constant. This has false positives of course,
258 ;;; as does MAYBE-NOTE-STATIC-SYMBOL and friends. Any random immediate value
259 ;;; used in an unboxed context, such as an ADD instruction,
260 ;;; can be wrongly construed as an address.
261 ;;; Note that for symbols we can match either the tagged pointer to it
262 ;;; OR the untagged address of the SYMBOL-VALUE slot.
264 ;;; "static" in this usage implies "at a fixed address" - it could be
265 ;;; in static space or immobile space.
267 ;;; TODO: probably should take an &OPTIONAL for ALLOW-INTERIOR-PTR to
268 ;;; reject false positives from instructions that don't access an object
269 ;;; except through a tagged pointer.
270 (defun maybe-note-static-lispobj (value dstate &optional quote)
271 (when (maybe-note-static-symbol value dstate)
272 ;; Returning T prints VALUE using base 16
273 ;; (see the SIGNED-IMM-DATA printer, PRINT-IMM/ASM-ROUTINE)
274 ;; This should probably pass through the QUOTE option but it's not critical.
275 (return-from maybe-note-static-lispobj t))
276 (let ((code (seg-code (dstate-segment dstate)))
277 (adjusted-val (logior (- value (ash sb-vm:symbol-value-slot sb-vm:word-shift))
278 sb-vm:other-pointer-lowtag))
279 (found-const)
280 (slot))
281 (when code
282 (loop for i downfrom (1- (code-header-words code)) to sb-vm:code-constants-offset
283 for const = (code-header-ref code i)
284 do (when (symbolp const)
285 (let ((addr (get-lisp-obj-address const)))
286 (cond ((eql addr value)
287 (return (setq found-const const)))
288 ((eql addr adjusted-val)
289 (return (setq found-const const
290 slot sb-vm:symbol-value-slot)))))))
291 (unless found-const ; try static symbol's value slots
292 (dovector (symbol sb-vm:+static-symbols+)
293 (when (= (get-lisp-obj-address symbol) adjusted-val)
294 (return (setq found-const symbol
295 slot sb-vm:symbol-value-slot)))))
296 (when found-const
297 (note (cond (slot
298 (lambda (s) (format s "(SYMBOL-VALUE '~S)" found-const)))
299 ((and (symbolp found-const) quote)
300 (lambda (s) (write-char #\' s) (prin1 found-const s)))
302 (lambda (s) (prin1 found-const s))))
303 dstate)
304 ;; Returning T prints in base 16 (see PRINT-IMM/ASM-ROUTINE)
305 (return-from maybe-note-static-lispobj t))))
306 #| This mysterious code seems to have no regression tests.
307 Comenting it out until I can figure out why it was in target-disassem
308 ;; Kludge: layout of STREAM, FILE-STREAM, and STRING-STREAM can be used
309 ;; as immediate operands without a corresponding boxed header constant.
310 ;; I think we always elide the boxed constant for builtin layouts,
311 ;; but these three have some slightly unusual codegen that causes a PUSH
312 ;; instruction to need some help to show its operand as a lisp object.
313 (dolist (thing (load-time-value (list (find-layout 'stream)
314 (find-layout 'file-stream)
315 (find-layout 'string-stream))
317 (when (eql (get-lisp-obj-address thing) address)
318 (return-from found thing))))) |#
319 (awhen (and (typep value 'word)
320 (sb-disassem::find-code-constant-from-interior-pointer value dstate))
321 (note (lambda (stream) (princ it stream)) dstate)))
323 ;;; Return an instance of REG or MACHINE-EA.
324 ;;; MOD and R/M are the extracted bits from the instruction's ModRM byte.
325 ;;; Depending on MOD and R/M, a SIB byte and/or displacement may be read.
326 ;;; The REX.B and REX.X from dstate are appropriately consumed.
327 (defun decode-mod-r/m (dstate mod r/m regclass)
328 (declare (type disassem-state dstate)
329 (type (unsigned-byte 2) mod)
330 (type (unsigned-byte 3) r/m))
331 (flet ((make-machine-ea (base &optional disp index scale)
332 (let ((ea (the machine-ea
333 (sb-disassem::new-filtered-arg dstate #'%make-machine-ea))))
334 (setf (machine-ea-base ea) base
335 (machine-ea-disp ea) disp
336 (machine-ea-index ea) index
337 (machine-ea-scale ea) scale)
338 ea))
339 (displacement ()
340 (case mod
341 (#b01 (read-signed-suffix 8 dstate))
342 (#b10 (read-signed-suffix 32 dstate))))
343 (extend (bit-name reg)
344 (logior (if (dstate-getprop dstate bit-name) 8 0) reg)))
345 (declare (inline extend))
346 (let ((full-reg (extend +rex-b+ r/m)))
347 (cond ((= mod #b11) ; register direct mode
348 (case regclass
349 (gpr (get-gpr :qword full-reg)) ; size is not really known here
350 (fpr (get-fpr :xmm full-reg))))
351 ((= r/m #b100) ; SIB byte - rex.b is "don't care"
352 (let* ((sib (the (unsigned-byte 8) (read-suffix 8 dstate)))
353 (index-reg (extend +rex-x+ (ldb (byte 3 3) sib)))
354 (base-reg (ldb (byte 3 0) sib)))
355 ;; mod=0 and base=RBP means no base reg
356 (make-machine-ea (unless (and (= mod #b00) (= base-reg #b101))
357 (extend +rex-b+ base-reg))
358 (cond ((/= mod #b00) (displacement))
359 ((= base-reg #b101) (read-signed-suffix 32 dstate)))
360 (unless (= index-reg #b100) index-reg) ; index can't be RSP
361 (ash 1 (ldb (byte 2 6) sib)))))
362 ((/= mod #b00) (make-machine-ea full-reg (displacement)))
363 ;; rex.b is not decoded in determining RIP-relative mode
364 ((= r/m #b101) (make-machine-ea :rip (read-signed-suffix 32 dstate)))
365 (t (make-machine-ea full-reg))))))
367 (defun prefilter-reg/mem (dstate mod r/m)
368 (decode-mod-r/m dstate mod r/m 'gpr))
369 (defun prefilter-xmmreg/mem (dstate mod r/m)
370 (decode-mod-r/m dstate mod r/m 'fpr))
372 ;;; Return contents of memory if either it refers to an unboxed code constant
373 ;;; or is RIP-relative with a displacement of 0.
374 (defun unboxed-constant-ref (dstate addr disp)
375 (when (and (minusp disp)
376 (awhen (seg-code (dstate-segment dstate))
377 (sb-disassem::points-to-code-constant-p addr it)))
378 (sap-ref-word (int-sap addr) 0)))
380 (define-load-time-global thread-slot-names
381 (let* ((slots (coerce (primitive-object-slots
382 (sb-vm::primitive-object 'sb-vm::thread))
383 'list))
384 (a (make-array (1+ (slot-offset (car (last slots))))
385 :initial-element nil)))
386 (dolist (slot slots a)
387 (setf (aref a (slot-offset slot)) (slot-name slot)))))
389 ;;; Prints a memory reference to STREAM. VALUE is a list of
390 ;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component may be
391 ;;; missing or nil to indicate that it's not used or has the obvious
392 ;;; default value (e.g., 1 for the index-scale). BASE-REG can be the
393 ;;; symbol RIP or a full register, INDEX-REG a full register. If WIDTH
394 ;;; is non-nil it should be one of the symbols :BYTE, :WORD, :DWORD or
395 ;;; :QWORD; a corresponding size indicator is printed if MODE is :SIZED-REF.
396 ;;; The rationale for supplying WIDTH while eliding a pointer-size qualifier
397 ;;; is that proper dereferencing of RIP-relative constants requires a size,
398 ;;; but in other cases would only add clutter, since a source/destination
399 ;;; register implies a size.
401 (defun print-mem-ref (mode value width stream dstate &key (index-reg-printer #'print-addr-reg))
402 ;; :COMPUTE is used for the LEA instruction - it informs this function
403 ;; that we're not loading from the address and that the contents should not
404 ;; be printed. It'll usually be a reference to code within the disasembly
405 ;; segment, as LEA is employed to compute the entry point for local call.
406 (declare (type (member :ref :sized-ref :compute) mode)
407 (type machine-ea value)
408 (type (member nil :byte :word :dword :qword) width)
409 (type (or null stream) stream)
410 (type disassem-state dstate))
411 ;; If disassembling into the dstate, print nothing; just stash the operand.
412 (unless stream
413 (return-from print-mem-ref (operand (cons value width) dstate)))
415 ;; Unpack and print the pieces of the machine EA.
416 (let ((base-reg (machine-ea-base value))
417 (disp (machine-ea-disp value))
418 (index-reg (machine-ea-index value))
419 (firstp t))
420 (when (and width (eq mode :sized-ref))
421 (princ width stream)
422 (princ '| PTR | stream))
423 (when (dstate-getprop dstate +fs-segment+)
424 (princ "FS:" stream))
425 (when (dstate-getprop dstate +gs-segment+)
426 (princ "GS:" stream))
427 (write-char #\[ stream)
428 (when base-reg
429 (if (eql :rip base-reg)
430 (princ base-reg stream)
431 (print-addr-reg base-reg stream dstate))
432 (setq firstp nil))
433 (when index-reg
434 (unless firstp (write-char #\+ stream))
435 (funcall index-reg-printer index-reg stream dstate)
436 (let ((scale (machine-ea-scale value)))
437 (unless (= scale 1)
438 (write-char #\* stream)
439 (princ scale stream)))
440 (setq firstp nil))
441 (when (and disp (or firstp (not (zerop disp))))
442 (unless (or firstp (minusp disp))
443 (write-char #\+ stream))
444 (cond ((eq (machine-ea-base value) :rip)
445 (princ disp stream))
446 (firstp
447 (princ16 disp stream)
448 ;; Avoid the MAYBE-NOTE call if we can. A negative offset is never an
449 ;; absolute address as would be used for asm routines and static symbols.
450 ;; FIRSTP implies lack of a base and index register.
451 (unless (minusp disp)
452 (or (maybe-note-static-symbol (+ disp (- other-pointer-lowtag
453 (* n-word-bytes sb-vm:symbol-value-slot)))
454 dstate)
455 (maybe-note-assembler-routine disp nil dstate))))
457 (princ disp stream))))
458 (write-char #\] stream)
460 ;; Always try to add an end-of-line comment about the EA.
461 ;; Assembler routines were already handled above (not really sure why)
462 ;; so now we have to figure out everything else.
463 #+sb-safepoint
464 (when (and (eql (machine-ea-base value) sb-vm::card-table-reg)
465 (eql (machine-ea-disp value) -8))
466 (return-from print-mem-ref (note "safepoint" dstate)))
468 (when (and (eq (machine-ea-base value) :rip) (neq mode :compute))
469 (block nil
470 (binding* ((seg (dstate-segment dstate))
471 (code (seg-code seg) :exit-if-null)
472 (offs (sb-disassem::segment-offs-to-code-offs
473 (+ (dstate-next-offs dstate) disp) seg)))
474 (when (note-code-constant offs dstate) (return)))
475 (let ((addr (+ disp (dstate-next-addr dstate))))
476 ;; The origin is zero when disassembling into a trace-file.
477 ;; Don't crash on account of it.
478 ;; Also, don't try to look up C symbols in immobile space.
479 ;; In an elfinated core, the range that is reserved for
480 ;; compilation to memory says it is all associated with
481 ;; the symbol "lisp_jit_code" which is not useful.
482 (when (plusp addr)
483 (or (when (<= sb-vm:alien-linkage-space-start addr
484 (+ sb-vm:alien-linkage-space-start
485 (1- sb-vm:alien-linkage-space-size)))
486 (let* ((index (sb-vm::alien-linkage-table-index-from-address addr))
487 (name (sb-impl::alien-linkage-index-to-name index)))
488 (note (lambda (s) (format s "&~A" name)) dstate)))
489 (unless (sb-kernel:immobile-space-addr-p addr)
490 (maybe-note-assembler-routine addr nil dstate))
491 ;; Show the absolute address and maybe the contents.
492 (note (format nil "[#x~x]~@[ = #x~x~]"
493 addr
494 (case width
495 (:qword (unboxed-constant-ref dstate addr disp))))
496 dstate))))))
498 ;; Recognize "[Rbase+disp]" as an alien linkage table reference if Rbase was
499 ;; just loaded with the base address in the prior instruction.
500 (when (and (eql (machine-ea-base value)
501 (car (sb-disassem::dstate-known-register-contents dstate)))
502 (eq (cdr (sb-disassem::dstate-known-register-contents dstate))
503 'sb-vm::alien-linkage-table-base)
504 (not (machine-ea-index value))
505 (integerp (machine-ea-disp value)))
506 (let ((name (sb-impl::alien-linkage-index-to-name
507 (floor (machine-ea-disp value) sb-vm:alien-linkage-table-entry-size))))
508 (note (lambda (s) (format s "&~A" name)) dstate)))
509 (setf (sb-disassem::dstate-known-register-contents dstate) nil)
511 (flet ((guess-symbol (predicate)
512 (binding* ((code-header (seg-code (dstate-segment dstate)) :exit-if-null)
513 (header-n-words (code-header-words code-header)))
514 (loop for word-num from code-constants-offset below header-n-words
515 for obj = (code-header-ref code-header word-num)
516 when (and (symbolp obj) (funcall predicate obj))
517 do (return obj)))))
518 (when (and (not base-reg) (not index-reg) disp)
519 (let ((addr (+ disp ; guess that DISP points to a symbol-value slot
520 (- (ash sb-vm:symbol-value-slot sb-vm:word-shift))
521 sb-vm:other-pointer-lowtag)))
522 (awhen (guess-symbol (lambda (s) (= (get-lisp-obj-address s) addr)))
523 (note (lambda (stream) (prin1 it stream)) dstate)
524 (return-from print-mem-ref))))
525 ;; Try to reverse-engineer which thread-local binding this is
526 (cond ((and disp ; Test whether disp looks aligned to an object header
527 (not (logtest (- disp 4) sb-vm:lowtag-mask))
528 (not base-reg) (not index-reg))
529 (let* ((addr (+ disp (- 4) sb-vm:other-pointer-lowtag))
530 (symbol
531 (guess-symbol (lambda (s) (= (get-lisp-obj-address s) addr)))))
532 (when symbol
533 ;; "tls_index:" is access to the half-sized slot within the
534 ;; symbol header that provides an offset into TLS.
535 (note (lambda (stream) (format stream "tls_index: ~S" symbol))
536 dstate))))
537 ;; thread slots
538 ((and (eql base-reg sb-vm::thread-reg)
539 #+gs-seg (dstate-getprop dstate +gs-segment+)
540 #-gs-seg (not (dstate-getprop dstate +fs-segment+)) ; not system TLS
541 (not index-reg) ; no index
542 (typep disp '(integer -128 *)) ; valid displacement
543 (zerop (logand disp 7))) ; lispword-aligned
544 (let* ((index (ash disp -3))
545 (symbol (cond ((minusp index)
546 (let ((index (1- (- index))))
547 (when (array-in-bounds-p sb-vm::+thread-header-slot-names+ index)
548 (aref sb-vm::+thread-header-slot-names+ index))))
549 ((< index (length thread-slot-names))
550 (aref thread-slot-names index)))))
551 (when symbol
552 (when (and (member index `(,sb-vm::thread-alien-linkage-table-base-slot
553 ,sb-vm::thread-linkage-table-slot))
554 (eql (logandc2 (sb-disassem::dstate-inst-properties dstate) +rex-r+)
555 (logior +rex+ +rex-w+ +rex-b+)))
556 (setf (sb-disassem::dstate-known-register-contents dstate)
557 `(,(reg-num (regrm-inst-reg dchunk-zero dstate)) . ,symbol)))
558 (return-from print-mem-ref
559 (note (lambda (stream) (format stream "thread.~(~A~)" symbol))
560 dstate))))
561 #+sb-thread
562 (let ((symbol (or (guess-symbol
563 (lambda (s) (= (symbol-tls-index s) disp)))
564 ;; static symbols aren't in the code header
565 (find disp +static-symbols+
566 :key #'symbol-tls-index))))
567 (when symbol
568 (return-from print-mem-ref
569 ;; "tls:" refers to the current value of the symbol in TLS
570 (note (lambda (stream) (format stream "tls: ~S" symbol))
571 dstate)))))))))
573 (defun lea-compute-label (value dstate)
574 ;; If VALUE should be regarded as a label, return the address.
575 ;; If not, just return VALUE. Don't try to use a label if there is no CODE.
576 (if (and (seg-code (dstate-segment dstate))
577 (typep value 'machine-ea)
578 (eq (machine-ea-base value) :rip))
579 (let ((addr (+ (dstate-next-addr dstate) (machine-ea-disp value))))
580 (if (= (logand addr lowtag-mask) fun-pointer-lowtag)
581 (- addr fun-pointer-lowtag)
582 addr))
583 value))
585 ;; Figure out whether LEA should print its EA with just the stuff in brackets,
586 ;; or additionally show the EA as either a label or a hex literal.
587 (defun lea-print-ea (value stream dstate &aux (width (inst-operand-size dstate)))
588 ;; If disassembling into the dstate, print nothing; just stash the operand.
589 (unless stream
590 (return-from lea-print-ea (operand (cons value width) dstate)))
591 (let*
592 ((ea)
593 (addr
594 (etypecase value
595 (machine-ea
596 (let ((linkage ; gets cleared by PRINT-MEM-REF
597 (and (eql (machine-ea-base value)
598 (car (sb-disassem::dstate-known-register-contents dstate)))
599 (eq (cdr (sb-disassem::dstate-known-register-contents dstate))
600 'sb-vm::linkage-table)
601 (integerp (machine-ea-disp value))
602 (not (machine-ea-index value)))))
603 (declare (ignorable linkage))
604 ;; Indicate to PRINT-MEM-REF that this is not a memory access.
605 (print-mem-ref :compute value width stream dstate)
606 (cond ((eq (machine-ea-base value) :rip)
607 (+ (dstate-next-addr dstate) (machine-ea-disp value)))
608 (linkage
609 (sap-int (sap+ sb-vm::*linkage-table* (machine-ea-disp value)))))))
610 ((or string integer)
611 ;; A label for the EA should not print as itself, but as the decomposed
612 ;; addressing mode so that [ADDR] and [RIP+disp] are unmistakable.
613 ;; We can see an INTEGER here because LEA-COMPUTE-LABEL is always called
614 ;; on the operand to LEA, and it will compute an absolute address based
615 ;; off RIP when possible. If :use-labels NIL was specified, there is
616 ;; no hashtable of address to string, so we get the address.
617 ;; But ordinarily we get the string. Either way, the r/m arg reveals the
618 ;; EA calculation. DCHUNK-ZERO is a meaningless value - any would do -
619 ;; because the EA was computed in a prefilter.
620 ;; (the instruction format is known because LEA has exactly one format)
621 (print-mem-ref :compute (setf ea (regrm-inst-r/m dchunk-zero dstate))
622 width stream dstate)
623 value)
625 ;; LEA Rx,Ry is an illegal encoding, but we'll show it as-is.
626 ;; When we used integers instead of REG to represent registers, this case
627 ;; overlapped with the preceding. It's nice that it no longer does.
628 (reg
629 (print-reg-with-width value width stream dstate)
630 nil))))
631 (cond ((stringp addr) ; label
632 (note (lambda (s) (format s "= ~A" addr)) dstate))
633 ;; Local function
634 ((and ea
635 (= (logand (+ (dstate-next-addr dstate) (machine-ea-disp ea))
636 lowtag-mask)
637 fun-pointer-lowtag)
638 (let* ((seg (dstate-segment dstate))
639 (code (seg-code seg))
640 (offset (+ (sb-disassem::seg-initial-offset seg)
641 (dstate-next-offs dstate)
642 (- (machine-ea-disp ea)
643 fun-pointer-lowtag))))
644 (loop for n below (code-n-entries code)
645 do (when (= (%code-fun-offset code n) offset)
646 (let ((fun (%code-entry-point code n)))
647 (note (lambda (stream) (prin1-quoted-short fun stream)) dstate))
648 (return t))))))
649 (addr
650 (acond ((linkage-addr->name addr :abs)
651 (note (lambda (s) (format s "#'~S" it)) dstate))
653 (note (lambda (s) (format s "= #x~x" addr)) dstate)))))))
655 ;;;; interrupt instructions
657 (defun break-control (chunk inst stream dstate)
658 ;; Do not parse bytes following a trap instruction unless it belongs to lisp code.
659 ;; C++ compilers will emit ud2 for various reasons.
660 (when (sb-disassem::dstate-foreign-code-p dstate)
661 (return-from break-control))
662 (flet ((nt (x) (if stream (note x dstate))))
663 (let ((trap (if (eq (sb-disassem::inst-print-name inst) 'ud2)
664 (word-imm-code chunk dstate)
665 (byte-imm-code chunk dstate))))
666 (case trap
667 (#.breakpoint-trap
668 (nt "breakpoint trap"))
669 (#.pending-interrupt-trap
670 (nt "pending interrupt trap"))
671 (#.halt-trap
672 (nt "halt trap"))
673 (#.fun-end-breakpoint-trap
674 (nt "function end breakpoint trap"))
675 (#.single-step-around-trap
676 (nt "single-step trap (around)"))
677 (#.single-step-before-trap
678 (nt "single-step trap (before)"))
679 (#.invalid-arg-count-trap
680 (nt "Invalid argument count trap"))
682 (when (or (and (= trap cerror-trap) (progn (nt "cerror trap") t))
683 (>= trap uninitialized-load-trap))
684 (handle-break-args
685 (lambda (sap offset trap-number length-only)
686 (if (= trap-number uninitialized-load-trap)
687 (let ((reg (ash (sap-ref-8 sap offset) -2)))
688 ;; decode a single byte arg, not an SC+OFFSET
689 (values (error-number-or-lose 'uninitialized-memory-error)
690 1 ; total number of bytes consumed after the trap
691 (list (make-sc+offset unsigned-reg-sc-number reg))
692 '(1) ; display 1 byte for the register encoding
693 nil)) ; no error number after the trap instruction
694 (snarf-error-junk sap offset trap-number length-only)))
695 trap stream dstate)))))))
697 ;;; Disassemble memory of CODE from START-ADDRESS for LENGTH bytes
698 ;;; calling FUNCTION on each instruction that has a PC-relative operand.
699 ;;; If supplied, PREDICATE is used to filter out some function invocations.
700 (defun scan-relative-operands
701 (code start-address length dstate segment function
702 &optional (predicate #'constantly-t))
703 (declare (type function function))
704 (let* ((inst-space (get-inst-space))
705 ;; Look for these instruction formats.
706 (call-inst (find-inst #xE8 inst-space))
707 (jmp-inst (find-inst #xE9 inst-space))
708 (call*-inst (find-inst #x15ff inst-space))
709 (jmp*-inst (find-inst #x25ff inst-space))
710 (cond-jmp-inst (find-inst #x800f inst-space))
711 (lea-inst (find-inst #x8D inst-space))
712 (mov-inst (find-inst #x8B inst-space))
713 (address (get-lisp-obj-address code))
714 (text-start (sap-int (code-instructions code)))
715 (text-end (+ text-start (%code-text-size code)))
716 (sap (int-sap start-address)))
717 (setf (seg-virtual-location segment) start-address
718 (seg-length segment) length
719 (seg-sap-maker segment) (lambda () sap))
720 (map-segment-instructions
721 (lambda (dchunk inst &aux (opcode (sap-ref-8 sap (dstate-cur-offs dstate))))
722 (flet ((includep (target)
723 ;; Self-relative (to the code object) operands are ignored.
724 (and (or (< target address) (>= target text-end))
725 (funcall predicate target))))
726 (cond ((or (eq inst jmp-inst) (eq inst call-inst))
727 (let ((operand (+ (near-jump-displacement dchunk dstate)
728 (dstate-next-addr dstate))))
729 (when (includep operand)
730 (funcall function (+ (dstate-cur-offs dstate) 1)
731 operand inst))))
732 ((eq inst cond-jmp-inst)
733 ;; jmp CALL-SYMBOL is invoked with a conditional jump
734 ;; (but not call CALL-SYMBOL because only JMP can be conditional)
735 (let ((operand (+ (near-cond-jump-displacement dchunk dstate)
736 (dstate-next-addr dstate))))
737 (when (includep operand)
738 (funcall function (+ (dstate-cur-offs dstate) 2)
739 operand inst))))
740 ((or (eq inst lea-inst) (eq inst jmp*-inst) (eq inst call*-inst)
741 (and (eq inst mov-inst) (eql opcode #x8B)))
742 ;; Computing the address of UNDEFINED-FDEFN is done with LEA.
743 ;; Load from the alien linkage table can be done with MOV Rnn,[RIP-k].
744 (let ((modrm (sap-ref-8 sap (1+ (dstate-cur-offs dstate)))))
745 (when (= (logand modrm #b11000111) #b00000101) ; RIP-relative mode
746 (let ((operand (+ (signed-sap-ref-32 sap (+ (dstate-cur-offs dstate) 2))
747 (dstate-next-addr dstate))))
748 (when (includep operand)
749 (when (or (eq inst lea-inst) (eq inst mov-inst))
750 (aver (eql (logand (sap-ref-8 sap (1- (dstate-cur-offs dstate))) #xF0)
751 #x40))) ; expect a REX prefix
752 (funcall function (+ (dstate-cur-offs dstate) 2) operand inst)))))))))
753 segment dstate nil)))
755 ;;; A code signature (for purposes of the ICF pass) is a list of function
756 ;;; signatures, each of which is cons of a vector of instruction bytes with some
757 ;;; replaced by 0, plus an opaque set of integers that need to be compared for
758 ;;; equality to test whether the blobs of code are functionally equivalent.
759 (defun sb-vm::compute-code-signature (code dstate &aux result)
760 (dotimes (i (code-n-entries code) result)
761 (let* ((f (%code-entry-point code i))
762 (len (%simple-fun-text-len f i))
763 (buffer (make-array (ceiling len n-word-bytes) :element-type 'word))
764 (operand-values))
765 (with-pinned-objects (code buffer)
766 (let ((fun-sap (simple-fun-entry-sap f)))
767 (%byte-blt fun-sap 0 buffer 0 len)
768 ;; Smash each PC-relative operand, and collect the effective value of
769 ;; the operand and its offset in the buffer. We needn't compute the
770 ;; lisp object referred to by the operand because it can't change.
771 ;; (PC-relative values are used only when the EA is not subject to
772 ;; movement due to GC, except during defrag). More than 32 bits might
773 ;; be needed for the absolute EA, so we don't simply write it back
774 ;; to the buffer, though at present 32 bits would in fact suffice.
775 (scan-relative-operands
776 code (sap-int fun-sap) len dstate
777 (make-memory-segment nil 0 0) ; will get start/length reassigned anyway
778 (lambda (offset operand inst)
779 (declare (ignore inst))
780 (setf operand-values (list* operand offset operand-values)
781 (sap-ref-32 (vector-sap buffer) offset) 0)))))
782 (push (cons buffer (coerce operand-values 'simple-vector)) result))))