1 ;;;; machine-independent disassembler
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!DISASSEM")
14 ;;; types and defaults
16 (def!constant label-column-width
7)
18 (deftype text-width
() '(integer 0 1000))
19 (deftype alignment
() '(integer 0 64))
20 (deftype offset
() '(signed-byte 24))
21 (deftype address
() '(unsigned-byte #.sb
!vm
:n-word-bits
))
22 (deftype disassem-length
() '(unsigned-byte 24))
23 (deftype column
() '(integer 0 1000))
25 (def!constant max-filtered-value-index
32)
26 (deftype filtered-value-index
()
27 `(integer 0 (,max-filtered-value-index
)))
28 (deftype filtered-value-vector
()
29 `(simple-array t
(,max-filtered-value-index
)))
31 ;;;; disassembly parameters
33 ;; With a few tweaks, you can use a running SBCL as a cross-assembler
34 ;; and disassembler for other supported backends,
35 ;; if that backend has been converted to use a distinct ASM package.
36 (eval-when (:compile-toplevel
:load-toplevel
:execute
)
37 (defparameter sb
!assem
::*backend-instruction-set-package
*
38 (find-package #.
(sb-cold::backend-asm-package-name
))))
40 (defvar *disassem-inst-space
* nil
)
42 ;;; minimum alignment of instructions, in bytes
43 (defvar *disassem-inst-alignment-bytes
* sb
!vm
:n-word-bytes
)
44 (declaim (type alignment
*disassem-inst-alignment-bytes
*))
46 ;; How many columns of output to allow for the address preceding each line.
47 ;; If NIL, use the minimum possible width for the disassembly range.
48 ;; If 0, do not print addresses.
49 (defvar *disassem-location-column-width
* nil
)
50 (declaim (type (or null text-width
) *disassem-location-column-width
*))
52 ;;; the width of the column in which instruction-names are printed. A
53 ;;; value of zero gives the effect of not aligning the arguments at
55 (defvar *disassem-opcode-column-width
* 0)
56 (declaim (type text-width
*disassem-opcode-column-width
*))
58 ;;; the width of the column in which instruction-bytes are printed. A
59 ;;; value of zero disables the printing of instruction bytes.
60 (defvar *disassem-inst-column-width
* 16
62 "The width of instruction bytes.")
63 (declaim (type text-width
*disassem-inst-column-width
*))
65 (defvar *disassem-note-column
* (+ 45 *disassem-inst-column-width
*)
67 "The column in which end-of-line comments for notes are started.")
69 ;;;; A DCHUNK contains the bits we look at to decode an
71 ;;;; I tried to keep this abstract so that if using integers > the machine
72 ;;;; word size conses too much, it can be changed to use bit-vectors or
75 ;;;; KLUDGE: It's not clear that using bit-vectors would be any more efficient.
76 ;;;; Perhaps the abstraction could go away. -- WHN 19991124
79 (declaim (inline dchunk-or dchunk-and dchunk-clear dchunk-not
80 dchunk-make-mask dchunk-make-field
86 (def!constant dchunk-bits
#.sb
!vm
:n-word-bits
)
89 `(unsigned-byte ,dchunk-bits
))
90 (deftype dchunk-index
()
91 `(integer 0 ,dchunk-bits
))
93 (def!constant dchunk-zero
0)
94 (def!constant dchunk-one
#.
(1- (expt 2 sb
!vm
:n-word-bits
)))
96 (defun dchunk-extract (chunk byte-spec
)
97 (declare (type dchunk chunk
))
98 (the dchunk
(ldb byte-spec
(the dchunk chunk
))))
100 (defmacro dchunk-copy
(x)
103 (defun dchunk-or (to from
)
104 (declare (type dchunk to from
))
105 (the dchunk
(logior to from
)))
106 (defun dchunk-and (to from
)
107 (declare (type dchunk to from
))
108 (the dchunk
(logand to from
)))
109 (defun dchunk-clear (to from
)
110 (declare (type dchunk to from
))
111 (the dchunk
(logandc2 to from
)))
112 (defun dchunk-not (from)
113 (declare (type dchunk from
))
114 (the dchunk
(logand dchunk-one
(lognot from
))))
116 (defmacro dchunk-andf
(to from
)
117 `(setf ,to
(dchunk-and ,to
,from
)))
118 (defmacro dchunk-orf
(to from
)
119 `(setf ,to
(dchunk-or ,to
,from
)))
120 (defmacro dchunk-clearf
(to from
)
121 `(setf ,to
(dchunk-clear ,to
,from
)))
123 (defun dchunk-make-mask (pos)
124 (the dchunk
(mask-field pos -
1)))
125 (defun dchunk-make-field (pos value
)
126 (the dchunk
(dpb value pos
0)))
128 (defmacro make-dchunk
(value)
129 `(the dchunk
,value
))
131 #-sb-xc-host
;; FIXME: function belongs in 'target-disassem'
132 (defun sap-ref-dchunk (sap byte-offset byte-order
)
133 (declare (type sb
!sys
:system-area-pointer sap
)
134 (type offset byte-offset
)
135 (muffle-conditions compiler-note
) ; returns possible bignum
136 (optimize (speed 3) (safety 0)))
139 (32 (if (eq byte-order
:big-endian
)
140 (+ (ash (sb!sys
:sap-ref-8 sap byte-offset
) 24)
141 (ash (sb!sys
:sap-ref-8 sap
(+ 1 byte-offset
)) 16)
142 (ash (sb!sys
:sap-ref-8 sap
(+ 2 byte-offset
)) 8)
143 (sb!sys
:sap-ref-8 sap
(+ 3 byte-offset
)))
144 (+ (sb!sys
:sap-ref-8 sap byte-offset
)
145 (ash (sb!sys
:sap-ref-8 sap
(+ 1 byte-offset
)) 8)
146 (ash (sb!sys
:sap-ref-8 sap
(+ 2 byte-offset
)) 16)
147 (ash (sb!sys
:sap-ref-8 sap
(+ 3 byte-offset
)) 24))))
148 (64 (if (eq byte-order
:big-endian
)
149 (+ (ash (sb!sys
:sap-ref-8 sap byte-offset
) 56)
150 (ash (sb!sys
:sap-ref-8 sap
(+ 1 byte-offset
)) 48)
151 (ash (sb!sys
:sap-ref-8 sap
(+ 2 byte-offset
)) 40)
152 (ash (sb!sys
:sap-ref-8 sap
(+ 3 byte-offset
)) 32)
153 (ash (sb!sys
:sap-ref-8 sap
(+ 4 byte-offset
)) 24)
154 (ash (sb!sys
:sap-ref-8 sap
(+ 5 byte-offset
)) 16)
155 (ash (sb!sys
:sap-ref-8 sap
(+ 6 byte-offset
)) 8)
156 (sb!sys
:sap-ref-8 sap
(+ 7 byte-offset
)))
157 (+ (sb!sys
:sap-ref-8 sap byte-offset
)
158 (ash (sb!sys
:sap-ref-8 sap
(+ 1 byte-offset
)) 8)
159 (ash (sb!sys
:sap-ref-8 sap
(+ 2 byte-offset
)) 16)
160 (ash (sb!sys
:sap-ref-8 sap
(+ 3 byte-offset
)) 24)
161 (ash (sb!sys
:sap-ref-8 sap
(+ 4 byte-offset
)) 32)
162 (ash (sb!sys
:sap-ref-8 sap
(+ 5 byte-offset
)) 40)
163 (ash (sb!sys
:sap-ref-8 sap
(+ 6 byte-offset
)) 48)
164 (ash (sb!sys
:sap-ref-8 sap
(+ 7 byte-offset
)) 56)))))))
166 (defun dchunk-corrected-extract (from pos unit-bits byte-order
)
167 (declare (type dchunk from
))
168 (if (eq byte-order
:big-endian
)
169 (ldb (byte (byte-size pos
)
170 (+ (byte-position pos
) (- dchunk-bits unit-bits
)))
172 (ldb pos
(the dchunk from
))))
174 (defmacro dchunk-insertf
(place pos value
)
175 `(setf ,place
(the dchunk
(dpb ,value
,pos
(the dchunk
,place
)))))
178 (declare (type dchunk x y
))
180 (defmacro dchunk-zerop
(x)
181 `(dchunk= ,x dchunk-zero
))
183 (defun dchunk-strict-superset-p (sup sub
)
184 (and (zerop (logandc2 sub sup
))
185 (not (zerop (logandc2 sup sub
)))))
187 (defun dchunk-count-bits (x)
188 (declare (type dchunk x
))
191 (defstruct (instruction (:conc-name inst-
)
193 make-instruction
(name
199 labeller prefilters control
))
201 (name nil
:type
(or symbol string
) :read-only t
)
202 (format-name nil
:type
(or symbol string
) :read-only t
)
204 (mask dchunk-zero
:type dchunk
:read-only t
) ; bits in the inst that are constant
205 (id dchunk-zero
:type dchunk
:read-only t
) ; value of those constant bits
207 (length 0 :type disassem-length
:read-only t
) ; in bytes
209 (print-name nil
:type symbol
:read-only t
)
211 ;; disassembly "functions"
212 (prefilters nil
:type list
)
213 (labeller nil
:type
(or list vector
))
214 (printer (missing-arg) :type
(or null function
))
215 (control nil
:type
(or null function
) :read-only t
)
217 ;; instructions that are the same as this instruction but with more
219 (specializers nil
:type list
))
220 (def!method print-object
((inst instruction
) stream
)
221 (print-unreadable-object (inst stream
:type t
:identity t
)
222 (format stream
"~A(~A)" (inst-name inst
) (inst-format-name inst
))))
224 ;;;; an instruction space holds all known machine instructions in a
225 ;;;; form that can be easily searched
227 (defstruct (inst-space (:conc-name ispace-
)
229 (valid-mask dchunk-zero
:type dchunk
) ; applies to *children*
230 (choices nil
:type list
))
231 (def!method print-object
((ispace inst-space
) stream
)
232 (print-unreadable-object (ispace stream
:type t
:identity t
)))
234 ;;; now that we've defined the structure, we can declaim the type of
236 (declaim (type (or null inst-space
) *disassem-inst-space
*))
238 (defstruct (inst-space-choice (:conc-name ischoice-
)
240 (common-id dchunk-zero
:type dchunk
) ; applies to *parent's* mask
241 (subspace (missing-arg) :type
(or inst-space instruction
)))
243 (defmacro !begin-instruction-definitions
() nil
) ; FIXME: remove
245 ;;; FIXME: If we we interned the temp vars,
246 ;;; and wouldn't use symbols qua strings, then this would reduce to EQUAL.
247 (defun equal-mod-gensyms (a b
)
248 (named-let recurse
((a a
) (b b
))
251 (list (and (listp b
) (recurse (car a
) (car b
)) (recurse (cdr a
) (cdr b
))))
254 (not (symbol-package a
))
255 (not (symbol-package b
))
256 ;; If "strings", then comparison by STRING= is right,
257 ;; and if lexical vars, it's also right because
258 ;; we never rebind a given temp within a function.
260 ((or number character function
) (eql a b
))
261 (vector (and (vectorp b
) (every #'recurse a b
))))))
263 ;;; Previously there were complicated checker functions which tried to attempt to
264 ;;; decide, given two FUNSTATEs, whether all their args were similarly used,
265 ;;; where "similarity" required that the prefilter and such be identical.
266 ;;; Instead we can just look at two sexprs and decide whether they act the same,
267 ;;; which is of course impossible in general; however, for this purpose,
268 ;;; if sexprs are EQUAL disregarding variations in gensyms, then their code
269 ;;; can be folded. If we miss (don't fold) things that act the same, it's ok.
270 ;;; N.B.: This definition of equivalence is admissible because there can be
271 ;;; no "interesting" non-null lexical environment. While it could be non-null,
272 ;;; it can't matter, because our auto-generated code can't depend on the lexenv.
273 (defvar *current-instruction-flavor
* nil
)
274 (defun generate-function (kind forms funstate code-folding-cache skeleton
)
275 (let* ((sub-table (assq kind code-folding-cache
))
276 (bindings (make-arg-temp-bindings funstate
))
277 (guts `(let* ,bindings
,@forms
)))
278 (or (cdr (assoc guts
(cdr sub-table
) :test
#'equal-mod-gensyms
))
279 (let* ((name (concatenate 'string
"INST-" (string kind
) "-"
280 (write-to-string (length sub-table
))))
283 `(named-lambda ,name
,@(subst guts
:body
(cdr skeleton
))))))
284 (push (cons guts definition
) (cdr sub-table
))
287 (defstruct (arg (:copier nil
)
289 (:constructor %make-arg
(name))
290 (:constructor standard-make-arg
) ; only so #S readmacro works
292 (lambda (self stream
)
295 (print-unreadable-object (self stream
:type t
)
297 "~A ~:[~;+~]~:S~@[=~S~]~@[ filt=~S~]~
298 ~@[ lbl=~S~]~@[ prt=~S~]"
300 (arg-sign-extend-p self
)
305 (arg-printer self
)))))))
306 (name nil
:type symbol
)
307 (fields nil
:type list
)
309 (value nil
:type
(or list integer
))
310 (sign-extend-p nil
:type boolean
)
313 (printer nil
:type
(or null function vector
))
314 (prefilter nil
:type
(or null function
))
315 (use-label nil
:type
(or boolean function
)))
317 (defstruct (instruction-format (:conc-name format-
)
318 (:constructor make-inst-format
319 (name length default-printer args
))
322 (args nil
:type list
)
324 (length 0 :type disassem-length
) ; in bytes
326 (default-printer nil
:type list
))
328 ;;; A FUNSTATE holds the state of any arguments used in a disassembly
329 ;;; function. It is a 2-level alist. The outer list maps each ARG to
330 ;;; a list of styles in which that arg can be rendered.
331 ;;; Each rendering is named by a keyword (the key to the inner alist),
332 ;;; and is represented as a list of temp vars and values for them.
333 (defun make-funstate (args) (mapcar #'list args
))
335 (defun arg-position (arg funstate
)
336 ;;; The THE form is to assert that ARG is found.
337 (the filtered-value-index
(position arg funstate
:key
#'car
)))
339 (defun arg-or-lose (name funstate
)
340 (or (car (assoc name funstate
:key
#'arg-name
:test
#'eq
))
341 (pd-error "unknown argument ~S" name
)))
343 ;;;; Since we can't include some values in compiled output as they are
344 ;;;; (notably functions), we sometimes use a VALSRC structure to keep
345 ;;;; track of the source from which they were derived.
347 ;;; machinery to provide more meaningful error messages during compilation
348 (defun pd-error (fmt &rest args
)
349 (if *current-instruction-flavor
*
350 (error "~@<in printer-definition for ~S(~S): ~3I~:_~?~:>"
351 (car *current-instruction-flavor
*)
352 (cdr *current-instruction-flavor
*)
354 (apply #'error fmt args
)))
356 (defun format-or-lose (name)
357 (or (get name
'inst-format
)
358 (pd-error "unknown instruction format ~S" name
)))
360 ;;; Return a modified copy of ARG that has property values changed
361 ;;; depending on whether it is being used at compile-time or load-time.
362 ;;; This is to avoid evaluating #'FOO references at compile-time
363 ;;; while allowing compile-time manipulation of byte specifiers.
364 (defun massage-arg (spec when
)
367 ;; At compile-time we get a restricted view of the DEFINE-ARG-TYPE args,
368 ;; just enough to macroexpand :READER definitions. :TYPE and ::SIGN-EXTEND
369 ;; are as specified, but :PREFILTER, :LABELLER, and :PRINTER are not
370 ;; compile-time evaluated.
371 (loop for
(indicator val
) on
(cdr spec
) by
#'cddr
372 nconc
(case indicator
373 (:sign-extend
; Only a literal T or NIL is allowed
374 (list indicator
(the boolean val
)))
376 ;; #'ERROR is a placeholder for any compile-time non-nil
377 ;; value. If nil, it must be literally nil, not 'NIL.
378 (list indicator
(if val
#'error nil
)))
379 ((:field
:fields
:type
)
380 (list indicator val
)))))
382 (loop for
(indicator raw-val
) on
(cdr spec
) by
#'cddr
383 ;; Use NAMED-LAMBDAs to enhance debuggability,
384 for val
= (if (typep raw-val
'(cons (eql lambda
)))
385 `(named-lambda ,(format nil
"~A.~A" (car spec
) indicator
)
388 nconc
(case indicator
389 (:reader nil
) ; drop it
390 (:prefilter
; Enforce compile-time-determined not-nullness.
391 (list indicator
(if val
`(the (not null
) ,val
) nil
)))
392 (t (list indicator val
)))))))
394 (defmacro define-instruction-format
((format-name length-in-bits
395 &key default-printer include
)
397 #+sb-xc-host
(declare (ignore default-printer
))
399 "DEFINE-INSTRUCTION-FORMAT (Name Length {Format-Key Value}*) Arg-Def*
400 Define an instruction format NAME for the disassembler's use. LENGTH is
401 the length of the format in bits.
402 Possible FORMAT-KEYs:
404 :INCLUDE other-format-name
405 Inherit all arguments and properties of the given format. Any
406 arguments defined in the current format definition will either modify
407 the copy of an existing argument (keeping in the same order with
408 respect to when prefilters are called), if it has the same name as
409 one, or be added to the end.
410 :DEFAULT-PRINTER printer-list
411 Use the given PRINTER-LIST as a format to print any instructions of
412 this format when they don't specify something else.
414 Each ARG-DEF defines one argument in the format, and is of the form
415 (Arg-Name {Arg-Key Value}*)
417 Possible ARG-KEYs (the values are evaluated unless otherwise specified):
419 :FIELDS byte-spec-list
420 The argument takes values from these fields in the instruction. If
421 the list is of length one, then the corresponding value is supplied by
422 itself; otherwise it is a list of the values. The list may be NIL.
424 The same as :FIELDS (list byte-spec).
427 If the argument only has one field, this is the value it should have,
428 otherwise it's a list of the values of the individual fields. This can
429 be overridden in an instruction-definition or a format definition
430 including this one by specifying another, or NIL to indicate that it's
434 If non-NIL, the raw value of this argument is sign-extended,
435 immediately after being extracted from the instruction (before any
436 prefilters are run, for instance). If the argument has multiple
437 fields, they are all sign-extended.
440 Inherit any properties of the given argument type.
443 A function which is called (along with all other prefilters, in the
444 order that their arguments appear in the instruction-format) before
445 any printing is done, to filter the raw value. Any uses of READ-SUFFIX
446 must be done inside a prefilter.
448 :PRINTER function-string-or-vector
449 A function, string, or vector which is used to print this argument.
452 If non-NIL, the value of this argument is used as an address, and if
453 that address occurs inside the disassembled code, it is replaced by a
454 label. If this is a function, it is called to filter the value."
456 (eval-when (:compile-toplevel
)
458 ',format-name
',include
,length-in-bits nil
459 ,@(mapcar (lambda (arg) `(list ',(car arg
) ,@(massage-arg arg
:compile
)))
463 (awhen (getf (cdr arg-spec
) :reader
)
464 `((defun ,it
(dchunk dstate
)
465 (declare (ignorable dchunk dstate
))
466 (flet ((local-filtered-value (offset)
467 (declare (type filtered-value-index offset
))
468 (aref (dstate-filtered-values dstate
) offset
))
469 (local-extract (bytespec)
470 (dchunk-extract dchunk bytespec
)))
471 (declare (ignorable #'local-filtered-value
#'local-extract
)
472 (inline local-filtered-value local-extract
))
473 ;; Delay ARG-FORM-VALUE call until after compile-time-too
474 ;; processing of !%DEF-INSTRUCTION-FORMAT has happened.
478 (format-args (format-or-lose ',format-name
)))
479 (arg (find ',(car arg-spec
) format-args
481 (funstate (make-funstate format-args
))
482 (expr (arg-value-form arg funstate
:numeric
)))
483 `(let* ,(make-arg-temp-bindings funstate
) ,expr
))))
486 #-sb-xc-host
; Host doesn't need the real definition.
488 ',format-name
',include
,length-in-bits
,default-printer
489 ,@(mapcar (lambda (arg) `(list ',(car arg
) ,@(massage-arg arg
:eval
)))
492 (defun %def-inst-format
(name inherit length printer
&rest arg-specs
)
493 (let ((args (if inherit
(copy-list (format-args (format-or-lose inherit
)))))
495 (dolist (arg-spec arg-specs
)
496 (let* ((arg-name (car arg-spec
))
497 (properties (cdr arg-spec
))
498 (cell (member arg-name args
:key
#'arg-name
)))
499 (aver (not (memq arg-name seen
)))
502 (setq args
(nconc args
(list (apply #'modify-arg
(%make-arg arg-name
)
503 length properties
)))))
505 (rplaca cell
(apply #'modify-arg
(copy-structure (car cell
))
506 length properties
))))))
507 (setf (get name
'inst-format
)
508 (make-inst-format name
(bits-to-bytes length
) printer args
))))
510 (defun modify-arg (arg format-length
511 &key
(value nil value-p
)
513 (prefilter nil prefilter-p
)
514 (printer nil printer-p
)
515 (sign-extend nil sign-extend-p
)
516 (use-label nil use-label-p
)
518 (fields nil fields-p
))
521 (error ":FIELD and :FIELDS are mutually exclusive")
522 (setf fields
(list field
) fields-p t
)))
524 (let ((type-arg (or (get type
'arg-type
)
525 (pd-error "unknown argument type: ~S" type
))))
526 (setf (arg-printer arg
) (arg-printer type-arg
))
527 (setf (arg-prefilter arg
) (arg-prefilter type-arg
))
528 (setf (arg-sign-extend-p arg
) (arg-sign-extend-p type-arg
))
529 (setf (arg-use-label arg
) (arg-use-label type-arg
))))
531 (setf (arg-value arg
) value
))
533 (setf (arg-prefilter arg
) prefilter
))
535 (setf (arg-sign-extend-p arg
) sign-extend
))
537 (setf (arg-printer arg
) printer
))
539 (setf (arg-use-label arg
) use-label
))
541 (setf (arg-fields arg
)
542 (mapcar (lambda (bytespec)
543 (when (> (+ (byte-position bytespec
) (byte-size bytespec
))
545 (error "~@<in arg ~S: ~3I~:_~
546 The field ~S doesn't fit in an ~
547 instruction-format ~W bits wide.~:>"
548 (arg-name arg
) bytespec format-length
))
549 (correct-dchunk-bytespec-for-endianness
550 bytespec format-length sb
!c
:*backend-byte-order
*))
554 (defun arg-value-form (arg funstate
557 (allow-multiple-p (neq rendering
:numeric
)))
558 (let ((forms (gen-arg-forms arg rendering funstate
)))
559 (when (and (not allow-multiple-p
)
561 (/= (length forms
) 1))
562 (pd-error "~S must not have multiple values." arg
))
563 (maybe-listify forms
)))
565 (defun correct-dchunk-bytespec-for-endianness (bs unit-bits byte-order
)
566 (if (eq byte-order
:big-endian
)
567 (byte (byte-size bs
) (+ (byte-position bs
) (- dchunk-bits unit-bits
)))
570 (defun make-arg-temp-bindings (funstate)
571 (let ((bindings nil
))
572 ;; Prefilters have to be called in the correct order, so reverse FUNSTATE
573 ;; because we're using PUSH in the inner loop.
574 (dolist (arg-cell (reverse funstate
) bindings
)
575 ;; These sublists are "backwards", so PUSH ends up being correct.
576 (dolist (rendering (cdr arg-cell
))
577 (let* ((binding (cdr rendering
))
579 (vals (cdr binding
)))
581 (mapc (lambda (var val
) (push `(,var
,val
) bindings
)) vars vals
)
582 (push `(,vars
,vals
) bindings
)))))))
584 ;;; Return the form(s) that should be evaluated to render ARG in the chosen
585 ;;; RENDERING style, which is one of :RAW, :SIGN-EXTENDED,
586 ;;; :FILTERED, :NUMERIC, and :FINAL. Each rendering depends on the preceding
587 ;;; one, so asking for :FINAL will implicitly compute all renderings.
588 (defun gen-arg-forms (arg rendering funstate
)
589 (let* ((arg-cell (assq arg funstate
))
590 (rendering-temps (cdr (assq rendering
(cdr arg-cell
))))
591 (vars (car rendering-temps
))
592 (forms (cdr rendering-temps
)))
594 (multiple-value-bind (new-forms single-value-p
)
595 (%gen-arg-forms arg rendering funstate
)
596 (setq forms new-forms
597 vars
(cond ((or single-value-p
(atom forms
))
598 (if (symbolp forms
) vars
(sb!xc
:gensym
"_")))
599 ((every #'symbolp forms
)
600 ;; just use the same as the forms
603 (make-gensym-list (length forms
) "_"))))
604 (push (list* rendering vars forms
) (cdr arg-cell
))))
607 (defun maybe-listify (forms)
610 ((/= (length forms
) 1)
615 ;;; DEFINE-ARG-TYPE Name {Key Value}*
617 ;;; Define a disassembler argument type NAME (which can then be referenced in
618 ;;; another argument definition using the :TYPE argument). &KEY args are:
620 ;;; :SIGN-EXTEND boolean
621 ;;; If non-NIL, the raw value of this argument is sign-extended.
623 ;;; :TYPE arg-type-name
624 ;;; Inherit any properties of given arg-type.
626 ;;; :PREFILTER function
627 ;;; A function which is called (along with all other prefilters,
628 ;;; in the order that their arguments appear in the instruction-
629 ;;; format) before any printing is done, to filter the raw value.
630 ;;; Any uses of READ-SUFFIX must be done inside a prefilter.
632 ;;; :PRINTER function-string-or-vector
633 ;;; A function, string, or vector which is used to print an argument of
637 ;;; If non-NIL, the value of an argument of this type is used as
638 ;;; an address, and if that address occurs inside the disassembled
639 ;;; code, it is replaced by a label. If this is a function, it is
640 ;;; called to filter the value.
641 (defmacro define-arg-type
(name &rest args
642 &key
((:type inherit
))
643 sign-extend prefilter printer use-label
)
644 (declare (ignore sign-extend prefilter printer use-label
))
645 ;; FIXME: this should be an *unevaluated* macro arg (named :INHERIT)
646 (aver (typep inherit
'(or null
(cons (eql quote
) (cons symbol null
)))))
647 (let ((pair (cons name
(loop for
(ind val
) on args by
#'cddr
648 unless
(eq ind
:type
)
649 nconc
(list ind val
)))))
651 (eval-when (:compile-toplevel
)
652 (%def-arg-type
',name
,inherit
,@(massage-arg pair
:compile
)))
653 #-sb-xc-host
; Host doesn't need the real definition.
654 (%def-arg-type
',name
,inherit
,@(massage-arg pair
:eval
)))))
656 (defun %def-arg-type
(name inherit
&rest properties
)
657 (setf (get name
'arg-type
)
658 (apply 'modify-arg
(%make-arg name
) nil
659 (nconc (when inherit
(list :type inherit
)) properties
))))
661 (defun %gen-arg-forms
(arg rendering funstate
)
662 (declare (type arg arg
) (type list funstate
))
664 (:raw
; just extract the bits
665 (mapcar (lambda (bytespec)
666 `(the (unsigned-byte ,(byte-size bytespec
))
667 (local-extract ',bytespec
)))
669 (:sign-extended
; sign-extend, or not
670 (let ((raw-forms (gen-arg-forms arg
:raw funstate
)))
671 (if (and (arg-sign-extend-p arg
) (listp raw-forms
))
672 (mapcar (lambda (form field
)
673 `(the (signed-byte ,(byte-size field
))
674 (sign-extend ,form
,(byte-size field
))))
678 (:filtered
; extract from the prefiltered value vector
679 (let ((pf (arg-prefilter arg
)))
681 (values `(local-filtered-value ,(arg-position arg funstate
)) t
)
682 (gen-arg-forms arg
:sign-extended funstate
))))
683 (:numeric
; pass the filtered value to the label adjuster, or not
684 (let ((filtered-forms (gen-arg-forms arg
:filtered funstate
))
685 (use-label (arg-use-label arg
)))
686 ;; use-label = T means that the prefiltered value is already an address,
687 ;; otherwise non-nil means a function to call, and NIL means not a label.
688 ;; So only the middle case needs to call ADJUST-LABEL.
689 (if (and use-label
(neq use-label t
))
690 `((adjust-label ,(maybe-listify filtered-forms
) ,use-label
))
692 (:final
; if arg is not a label, return numeric value, otherwise a string
693 (let ((numeric-forms (gen-arg-forms arg
:numeric funstate
)))
694 (if (arg-use-label arg
)
695 `((lookup-label ,(maybe-listify numeric-forms
)))
698 (defun find-printer-fun (printer-source args cache
)
699 (let ((source (preprocess-printer printer-source args
))
700 (funstate (make-funstate args
)))
703 (let ((sb!xc
:*gensym-counter
* 0)) (compile-printer-list source funstate
))
706 '(lambda (chunk inst stream dstate
)
707 (declare (type dchunk chunk
)
708 (type instruction inst
)
710 (type disassem-state dstate
))
711 (macrolet ((local-format-arg (arg fmt
)
712 `(funcall (formatter ,fmt
) stream
,arg
)))
713 (flet ((local-tab-to-arg-column ()
714 (tab (dstate-argument-column dstate
) stream
))
716 (princ (inst-print-name inst
) stream
))
717 (local-write-char (ch)
718 (write-char ch stream
))
720 (princ thing stream
))
721 (local-princ16 (thing)
722 (princ16 thing stream
))
723 (local-call-arg-printer (arg printer
)
724 (funcall printer arg stream dstate
))
725 (local-call-global-printer (fun)
726 (funcall fun chunk inst stream dstate
))
727 (local-filtered-value (offset)
728 (declare (type filtered-value-index offset
))
729 (aref (dstate-filtered-values dstate
) offset
))
730 (local-extract (bytespec)
731 (dchunk-extract chunk bytespec
))
733 (or (gethash lab
(dstate-label-hash dstate
))
735 (adjust-label (val adjust-fun
)
736 (funcall adjust-fun val dstate
)))
737 (declare (ignorable #'local-tab-to-arg-column
739 #'local-princ
#'local-princ16
741 #'local-call-arg-printer
742 #'local-call-global-printer
744 #'local-filtered-value
745 #'lookup-label
#'adjust-label
)
746 (inline local-tab-to-arg-column
747 local-princ local-princ16
748 local-call-arg-printer local-call-global-printer
749 local-filtered-value local-extract
750 lookup-label adjust-label
))
753 (defun preprocess-test (subj form args
)
754 (multiple-value-bind (subj test
)
755 (if (and (consp form
) (symbolp (car form
)) (not (keywordp (car form
))))
756 (values (car form
) (cdr form
))
758 (let ((key (if (consp test
) (car test
) test
))
759 (body (if (consp test
) (cdr test
) nil
)))
763 ;; If no supplied constant values, just any constant is ok,
764 ;; just see whether there's some constant value in the arg.
768 (or (find subj args
:key
#'arg-name
)
769 (pd-error "unknown argument ~S" subj
)))))
770 ;; Otherwise, defer to run-time.
781 (preprocess-test subj sub-test args
))
785 (defun preprocess-conditionals (printer args
)
790 (preprocess-conditionals
791 `(:cond
((:not
,(nth 1 printer
)) ,@(nthcdr 2 printer
)))
794 (preprocess-conditionals `(:cond
(,(cdr printer
))) args
))
796 (preprocess-conditionals
797 `(:cond
(,(nth 1 printer
) ,(nth 2 printer
))
798 (t ,(nth 3 printer
)))
808 (lambda (sub-printer)
809 (preprocess-conditionals sub-printer args
))
813 (preprocess-test (find-first-field-name filtered-body
)
821 (lambda (sub-printer)
822 (preprocess-conditionals sub-printer args
))
825 ;;; Return a version of the disassembly-template PRINTER with
826 ;;; compile-time tests (e.g. :constant without a value), and any
827 ;;; :CHOOSE operators resolved properly for the args ARGS.
829 ;;; (:CHOOSE Sub*) simply returns the first Sub in which every field
830 ;;; reference refers to a valid arg.
831 (defun preprocess-printer (printer args
)
832 (preprocess-conditionals (preprocess-chooses printer args
) args
))
834 ;;; Return the first non-keyword symbol in a depth-first search of TREE.
835 (defun find-first-field-name (tree)
838 ((and (symbolp tree
) (not (keywordp tree
)))
842 ((eq (car tree
) 'quote
)
845 (or (find-first-field-name (car tree
))
846 (find-first-field-name (cdr tree
))))))
848 (defun preprocess-chooses (printer args
)
849 (cond ((atom printer
)
851 ((eq (car printer
) :choose
)
852 (pick-printer-choice (cdr printer
) args
))
854 (sharing-mapcar (lambda (sub) (preprocess-chooses sub args
))
857 ;;;; some simple functions that help avoid consing when we're just
858 ;;;; recursively filtering things that usually don't change
860 (defun sharing-cons (old-cons car cdr
)
862 "If CAR is eq to the car of OLD-CONS and CDR is eq to the CDR, return
863 OLD-CONS, otherwise return (cons CAR CDR)."
864 (if (and (eq car
(car old-cons
)) (eq cdr
(cdr old-cons
)))
868 (defun sharing-mapcar (fun list
)
869 (declare (type function fun
))
871 "A simple (one list arg) mapcar that avoids consing up a new list
872 as long as the results of calling FUN on the elements of LIST are
876 (funcall fun
(car list
))
877 (sharing-mapcar fun
(cdr list
)))))
879 (defun all-arg-refs-relevant-p (printer args
)
880 (cond ((or (null printer
) (keywordp printer
) (eq printer t
))
883 (find printer args
:key
#'arg-name
))
885 (every (lambda (x) (all-arg-refs-relevant-p x args
))
889 (defun pick-printer-choice (choices args
)
890 (dolist (choice choices
891 (pd-error "no suitable choice found in ~S" choices
))
892 (when (all-arg-refs-relevant-p choice args
)
895 (defun compile-printer-list (sources funstate
)
896 (unless (null sources
)
897 ;; Coalesce adjacent symbols/strings, and convert to strings if possible,
898 ;; since they require less consing to write.
899 (do ((el (car sources
) (car sources
))
900 (names nil
(cons (strip-quote el
) names
)))
901 ((not (string-or-qsym-p el
))
903 ;; concatenate adjacent strings and symbols
907 (mapcar #'string
(nreverse names
)))))
908 ;; WTF? Everything else using INST-PRINT-NAME writes a string.
909 (push (if (some #'alpha-char-p string
)
910 `',(make-symbol string
) ; Preserve casifying output.
914 (cons (compile-printer-body (car sources
) funstate
)
915 (compile-printer-list (cdr sources
) funstate
))))
917 (defun compile-printer-body (source funstate
)
923 `(local-tab-to-arg-column))
925 (pd-error "unknown printer element: ~S" source
))
927 (compile-print source funstate
))
929 `(local-princ ',source
))
930 ((eq (car source
) :using
)
931 (unless (or (stringp (cadr source
))
932 (and (listp (cadr source
))
933 (eq (caadr source
) 'function
)))
934 (pd-error "The first arg to :USING must be a string or #'function."))
935 ;; For (:using #'F) to be stuffed in properly, the printer as expressed
936 ;; in its DSL would have to compile-time expand into a thing that
937 ;; reconstructs it such that #'F forms don't appear inside quoted list
938 ;; structure. Lacking the ability to do that, we treat #'F as a bit of
939 ;; syntax to be evaluated manually.
940 (compile-print (caddr source
) funstate
941 (let ((f (cadr source
)))
942 (if (typep f
'(cons (eql function
) (cons symbol null
)))
943 (symbol-function (second f
))
945 ((eq (car source
) :plus-integer
)
946 ;; prints the given field proceed with a + or a -
948 (arg-value-form (arg-or-lose (cadr source
) funstate
)
953 (local-write-char #\
+))
954 (local-princ ,form
))))
955 ((eq (car source
) 'quote
)
956 `(local-princ ,source
))
957 ((eq (car source
) 'function
)
958 `(local-call-global-printer ,source
))
959 ((eq (car source
) :cond
)
960 `(cond ,@(mapcar (lambda (clause)
961 `(,(compile-test (find-first-field-name
965 ,@(compile-printer-list (cdr clause
)
968 ;; :IF, :UNLESS, and :WHEN are replaced by :COND during preprocessing
970 `(progn ,@(compile-printer-list source funstate
)))))
972 (defun compile-print (arg-name funstate
&optional printer
)
973 (let* ((arg (arg-or-lose arg-name funstate
))
974 (printer (or printer
(arg-printer arg
))))
977 `(local-format-arg ,(arg-value-form arg funstate
) ,printer
))
979 `(local-princ (aref ,printer
,(arg-value-form arg funstate
:numeric
))))
980 ((or function
(cons (eql function
)))
981 `(local-call-arg-printer ,(arg-value-form arg funstate
) ,printer
))
983 `(,(if (arg-use-label arg
) 'local-princ16
'local-princ
)
984 ,(arg-value-form arg funstate
))))))
986 (defun string-or-qsym-p (thing)
989 (eq (car thing
) 'quote
)
990 (or (stringp (cadr thing
))
991 (symbolp (cadr thing
))))))
993 (defun strip-quote (thing)
994 (if (and (consp thing
) (eq (car thing
) 'quote
))
998 (defun compare-fields-form (val-form-1 val-form-2
)
999 (flet ((listify-fields (fields)
1000 (cond ((symbolp fields
) fields
)
1001 ((every #'constantp fields
) `',fields
)
1002 (t `(list ,@fields
)))))
1003 (cond ((or (symbolp val-form-1
) (symbolp val-form-2
))
1004 `(equal ,(listify-fields val-form-1
)
1005 ,(listify-fields val-form-2
)))
1007 `(and ,@(mapcar (lambda (v1 v2
) `(= ,v1
,v2
))
1008 val-form-1 val-form-2
))))))
1010 (defun compile-test (subj test funstate
)
1011 (when (and (consp test
) (symbolp (car test
)) (not (keywordp (car test
))))
1012 (setf subj
(car test
)
1014 (let ((key (if (consp test
) (car test
) test
))
1015 (body (if (consp test
) (cdr test
) nil
)))
1021 (let* ((arg (arg-or-lose subj funstate
))
1022 (fields (arg-fields arg
))
1024 (when (not (= (length fields
) (length consts
)))
1025 (pd-error "The number of constants doesn't match number of ~
1026 fields in: (~S :constant~{ ~S~})"
1028 (compare-fields-form (gen-arg-forms arg
:numeric funstate
)
1031 `(> ,(arg-value-form (arg-or-lose subj funstate
) funstate
:numeric
)
1034 `(< ,(arg-value-form (arg-or-lose subj funstate
) funstate
:numeric
)
1037 (let ((arg1 (arg-or-lose subj funstate
))
1038 (arg2 (arg-or-lose (car body
) funstate
)))
1039 (unless (and (= (length (arg-fields arg1
))
1040 (length (arg-fields arg2
)))
1041 (every (lambda (bs1 bs2
)
1042 (= (byte-size bs1
) (byte-size bs2
)))
1045 (pd-error "can't compare differently sized fields: ~
1046 (~S :same-as ~S)" subj
(car body
)))
1047 (compare-fields-form (gen-arg-forms arg1
:numeric funstate
)
1048 (gen-arg-forms arg2
:numeric funstate
))))
1050 `(or ,@(mapcar (lambda (sub) (compile-test subj sub funstate
))
1053 `(and ,@(mapcar (lambda (sub) (compile-test subj sub funstate
))
1056 `(not ,(compile-test subj
(car body
) funstate
)))
1057 ((and (consp key
) (null body
))
1058 (compile-test subj key funstate
))
1060 (pd-error "bogus test-form: ~S" test
)))))
1062 (defun compute-mask-id (args)
1063 (let ((mask dchunk-zero
)
1065 (dolist (arg args
(values mask id
))
1066 (let ((av (arg-value arg
)))
1068 (do ((fields (arg-fields arg
) (cdr fields
))
1069 (values (if (atom av
) (list av
) av
) (cdr values
)))
1071 (let ((field-mask (dchunk-make-mask (car fields
))))
1072 (when (/= (dchunk-and mask field-mask
) dchunk-zero
)
1073 (pd-error "The field ~S in arg ~S overlaps some other field."
1076 (dchunk-insertf id
(car fields
) (car values
))
1077 (dchunk-orf mask field-mask
))))))))
1079 #!-sb-fluid
(declaim (inline bytes-to-bits
))
1080 (declaim (maybe-inline sign-extend aligned-p align tab tab0
))
1082 (defun bytes-to-bits (bytes)
1083 (declare (type disassem-length bytes
))
1084 (* bytes sb
!vm
:n-byte-bits
))
1086 (defun bits-to-bytes (bits)
1087 (declare (type disassem-length bits
))
1088 (multiple-value-bind (bytes rbits
)
1089 (truncate bits sb
!vm
:n-byte-bits
)
1090 (when (not (zerop rbits
))
1091 (error "~W bits is not a byte-multiple." bits
))
1094 (defun sign-extend (int size
)
1095 (declare (type integer int
)
1096 (type (integer 0 128) size
))
1097 (if (logbitp (1- size
) int
)
1098 (dpb int
(byte size
0) -
1)
1101 ;;; Is ADDRESS aligned on a SIZE byte boundary?
1102 (defun aligned-p (address size
)
1103 (declare (type address address
)
1104 (type alignment size
))
1105 (zerop (logand (1- size
) address
)))
1107 ;;; Return ADDRESS aligned *upward* to a SIZE byte boundary.
1108 (defun align (address size
)
1109 (declare (type address address
)
1110 (type alignment size
))
1111 (logandc1 (1- size
) (+ (1- size
) address
)))
1113 (defun tab (column stream
)
1114 (funcall (formatter "~V,1t") stream column
)
1116 (defun tab0 (column stream
)
1117 (funcall (formatter "~V,0t") stream column
)
1120 (defun princ16 (value stream
)
1121 (write value
:stream stream
:radix t
:base
16 :escape nil
))
1123 (declaim (ftype function read-suffix
))
1124 (defun read-signed-suffix (length dstate
)
1125 (declare (type (member 8 16 32 64) length
)
1126 (type disassem-state dstate
)
1127 (optimize (speed 3) (safety 0)))
1128 (sign-extend (read-suffix length dstate
) length
))
1130 (defstruct (storage-info (:copier nil
))
1131 (groups nil
:type list
) ; alist of (name . location-group)
1132 (debug-vars #() :type vector
))
1134 (defstruct (segment (:conc-name seg-
)
1135 (:constructor %make-segment
)
1137 (sap-maker (missing-arg)
1138 :type
(function () sb
!sys
:system-area-pointer
))
1139 ;; Length in bytes of the range of memory covered by this segment.
1140 (length 0 :type disassem-length
)
1141 ;; Length of the memory range excluding any trailing untagged data.
1142 ;; Defaults to 'length' but could be shorter.
1143 ;; FIXME: can opcodes-length really be shorter? Nothing ever alters it.
1144 (opcodes-length 0 :type disassem-length
)
1145 (virtual-location 0 :type address
)
1146 (storage-info nil
:type
(or null storage-info
))
1147 ;; KLUDGE: CODE-COMPONENT is not a type the host understands
1148 #-sb-xc-host
(code nil
:type
(or null sb
!kernel
:code-component
))
1149 (unboxed-data-range nil
:type
(or null
(cons fixnum fixnum
)))
1150 (hooks nil
:type list
))
1152 ;;; All state during disassembly. We store some seemingly redundant
1153 ;;; information so that we can allow garbage collect during disassembly and
1154 ;;; not get tripped up by a code block being moved...
1155 (defstruct (disassem-state (:conc-name dstate-
)
1156 (:constructor %make-dstate
)
1158 ;; offset of current pos in segment
1159 (cur-offs 0 :type offset
)
1160 ;; offset of next position
1161 (next-offs 0 :type offset
)
1162 ;; a sap pointing to our segment
1163 (segment-sap nil
:type
(or null sb
!sys
:system-area-pointer
))
1164 ;; the current segment
1165 (segment nil
:type
(or null segment
))
1166 ;; what to align to in most cases
1167 (alignment sb
!vm
:n-word-bytes
:type alignment
)
1168 (byte-order :little-endian
1169 :type
(member :big-endian
:little-endian
))
1170 ;; for user code to hang stuff off of
1171 (properties nil
:type list
)
1172 ;; for user code to hang stuff off of, cleared each time after a
1173 ;; non-prefix instruction is processed
1174 (inst-properties nil
:type list
)
1175 (filtered-values (make-array max-filtered-value-index
)
1176 :type filtered-value-vector
)
1177 ;; used for prettifying printing
1178 (addr-print-len nil
:type
(or null
(integer 0 20)))
1179 (argument-column 0 :type column
)
1180 ;; to make output look nicer
1181 (output-state :beginning
1182 :type
(member :beginning
1186 ;; alist of (address . label-number)
1187 (labels nil
:type list
)
1188 ;; same as LABELS slot data, but in a different form
1189 (label-hash (make-hash-table) :type hash-table
)
1191 (fun-hooks nil
:type list
)
1193 ;; alist of (address . label-number), popped as it's used
1194 (cur-labels nil
:type list
)
1195 ;; OFFS-HOOKs, popped as they're used
1196 (cur-offs-hooks nil
:type list
)
1198 ;; for the current location
1199 (notes nil
:type list
)
1201 ;; currently active source variables
1202 (current-valid-locations nil
:type
(or null
(vector bit
))))
1203 (def!method print-object
((dstate disassem-state
) stream
)
1204 (print-unreadable-object (dstate stream
:type t
)
1207 (dstate-cur-offs dstate
)
1208 (dstate-segment dstate
))))
1210 ;;; Return the absolute address of the current instruction in DSTATE.
1211 (defun dstate-cur-addr (dstate)
1212 (the address
(+ (seg-virtual-location (dstate-segment dstate
))
1213 (dstate-cur-offs dstate
))))
1215 ;;; Return the absolute address of the next instruction in DSTATE.
1216 (defun dstate-next-addr (dstate)
1217 (the address
(+ (seg-virtual-location (dstate-segment dstate
))
1218 (dstate-next-offs dstate
))))
1220 ;;; Get the value of the property called NAME in DSTATE. Also SETF'able.
1222 ;;; KLUDGE: The associated run-time machinery for this is in
1223 ;;; target-disassem.lisp (much later). This is here just to make sure
1224 ;;; it's defined before it's used. -- WHN ca. 19990701
1225 (defmacro dstate-get-prop
(dstate name
)
1226 `(getf (dstate-properties ,dstate
) ,name
))
1228 ;;; Push NAME on the list of instruction properties in DSTATE.
1229 (defun dstate-put-inst-prop (dstate name
)
1230 (push name
(dstate-inst-properties dstate
)))
1232 ;;; Return non-NIL if NAME is on the list of instruction properties in
1234 (defun dstate-get-inst-prop (dstate name
)
1235 (member name
(dstate-inst-properties dstate
) :test
#'eq
))