Allocate symbols into permgen if enabled
[sbcl.git] / src / compiler / disassem.lisp
blob82344b03e490088c74b3f2f37168b336fb410d47
1 ;;;; machine-independent disassembler
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-DISASSEM")
14 ;;; types and defaults
16 (deftype text-width () '(integer 0 1000))
17 (deftype alignment () '(integer 0 64))
18 (deftype offset () 'fixnum)
19 (deftype address () 'word)
20 (deftype disassem-length () '(and unsigned-byte fixnum))
21 (deftype column () '(integer 0 1000))
23 (defconstant max-filtered-value-index 32)
24 (deftype filtered-value-index ()
25 `(integer 0 (,max-filtered-value-index)))
26 (deftype filtered-value-vector ()
27 `(simple-array t (,max-filtered-value-index)))
29 ;;;; disassembly parameters
31 ;; With a few tweaks, you can use a running SBCL as a cross-assembler
32 ;; and disassembler for other supported backends,
33 ;; if that backend has been converted to use a distinct ASM package.
34 (eval-when (:compile-toplevel :load-toplevel :execute)
35 (defparameter sb-assem::*backend-instruction-set-package*
36 (find-package #.(sb-cold::backend-asm-package-name))))
38 ;; How many columns of output to allow for the address preceding each line.
39 ;; If NIL, use the minimum possible width for the disassembly range.
40 ;; If 0, do not print addresses.
41 (defvar *disassem-location-column-width* nil)
42 (declaim (type (or null text-width) *disassem-location-column-width*))
44 ;;; the width of the column in which instruction-names are printed. A
45 ;;; value of zero gives the effect of not aligning the arguments at
46 ;;; all.
47 (defvar *disassem-opcode-column-width* 0)
48 (declaim (type text-width *disassem-opcode-column-width*))
50 ;;; the width of the column in which instruction-bytes are printed. A
51 ;;; value of zero disables the printing of instruction bytes.
52 (defvar *disassem-inst-column-width* 16
53 "The width of instruction bytes.")
54 (declaim (type text-width *disassem-inst-column-width*))
56 (defvar *disassem-note-column* (+ 45 *disassem-inst-column-width*)
57 "The column in which end-of-line comments for notes are started.")
59 ;;;; A DCHUNK contains the bits we look at to decode an
60 ;;;; instruction.
61 ;;;; I tried to keep this abstract so that if using integers > the machine
62 ;;;; word size conses too much, it can be changed to use bit-vectors or
63 ;;;; something.
64 ;;;;
65 ;;;; KLUDGE: It's not clear that using bit-vectors would be any more efficient.
66 ;;;; Perhaps the abstraction could go away. -- WHN 19991124
68 (declaim (inline dchunk-or dchunk-and dchunk-clear dchunk-not
69 dchunk-make-mask dchunk-make-field
70 dchunk-extract
71 dchunk=
72 dchunk-count-bits))
74 ;;; For variable-length instruction sets, such as x86, it is better to
75 ;;; define the dchunk size to be the smallest number of bits necessary
76 ;;; and sufficient to decode any instruction format, if that quantity
77 ;;; of bits is small enough to avoid bignum consing.
78 ;;; Ideally this constant would go in the 'insts' file for the architecture,
79 ;;; but there's really no easy way to do that at present.
80 (defconstant dchunk-bits
81 #+x86-64 56
82 #+ppc64 32
83 #-(or x86-64 ppc64) sb-vm:n-word-bits)
85 (deftype dchunk ()
86 `(unsigned-byte ,dchunk-bits))
87 (deftype dchunk-index ()
88 `(integer 0 ,dchunk-bits))
90 (defconstant dchunk-zero 0)
91 (defconstant dchunk-one (ldb (byte dchunk-bits 0) -1))
93 (defun dchunk-extract (chunk byte-spec)
94 (declare (type dchunk chunk))
95 (the dchunk (ldb byte-spec (the dchunk chunk))))
97 (defmacro dchunk-copy (x)
98 `(the dchunk ,x))
100 (defun dchunk-or (to from)
101 (declare (type dchunk to from))
102 (the dchunk (logior to from)))
103 (defun dchunk-and (to from)
104 (declare (type dchunk to from))
105 (the dchunk (logand to from)))
106 (defun dchunk-clear (to from)
107 (declare (type dchunk to from))
108 (the dchunk (logandc2 to from)))
109 (defun dchunk-not (from)
110 (declare (type dchunk from))
111 (the dchunk (logand dchunk-one (lognot from))))
113 (defmacro dchunk-andf (to from)
114 `(setf ,to (dchunk-and ,to ,from)))
115 (defmacro dchunk-orf (to from)
116 `(setf ,to (dchunk-or ,to ,from)))
117 (defmacro dchunk-clearf (to from)
118 `(setf ,to (dchunk-clear ,to ,from)))
120 (defun dchunk-make-mask (pos)
121 (the dchunk (mask-field pos -1)))
122 (defun dchunk-make-field (pos value)
123 (the dchunk (dpb value pos 0)))
125 (defmacro make-dchunk (value)
126 `(the dchunk ,value))
128 (defun dchunk-corrected-extract (from pos unit-bits byte-order)
129 (declare (type dchunk from))
130 (if (eq byte-order :big-endian)
131 (ldb (byte (byte-size pos)
132 (+ (byte-position pos) (- dchunk-bits unit-bits)))
133 (the dchunk from))
134 (ldb pos (the dchunk from))))
136 (defmacro dchunk-insertf (place pos value)
137 `(setf ,place (the dchunk (dpb ,value ,pos (the dchunk,place)))))
139 (defun dchunk= (x y)
140 (declare (type dchunk x y))
141 (= x y))
142 (defmacro dchunk-zerop (x)
143 `(dchunk= ,x dchunk-zero))
145 (defun dchunk-strict-superset-p (sup sub)
146 (and (zerop (logandc2 sub sup))
147 (not (zerop (logandc2 sup sub)))))
149 (defun dchunk-count-bits (x)
150 (declare (type dchunk x))
151 (logcount x))
153 (defstruct (arg (:constructor %make-arg (name))
154 (:copier nil)
155 (:predicate nil))
156 (name nil :type symbol)
157 (fields nil :type list)
159 (value nil :type (or list integer))
160 (sign-extend-p nil :type boolean)
162 ;; functions to use
163 (printer nil :type (or null function vector))
164 (prefilter nil :type (or null function))
165 (use-label nil :type (or boolean function)))
167 (defstruct (instruction-format (:conc-name format-)
168 (:constructor make-inst-format
169 (name length default-printer args))
170 (:copier nil))
171 (name nil)
172 (args nil :type list)
174 (length 0 :type disassem-length) ; in bytes
176 (default-printer nil :type list))
177 (declaim (freeze-type arg instruction-format))
179 ;;; A FUNSTATE holds the state of any arguments used in a disassembly
180 ;;; function. It is a 2-level alist. The outer list maps each ARG to
181 ;;; a list of styles in which that arg can be rendered.
182 ;;; Each rendering is named by a keyword (the key to the inner alist),
183 ;;; and is represented as a list of temp vars and values for them.
184 (defun make-funstate (args) (mapcar #'list args))
186 (defun arg-position (arg funstate)
187 ;;; The THE form is to assert that ARG is found.
188 (the filtered-value-index (position arg funstate :key #'car)))
190 (defun arg-or-lose (name funstate)
191 (or (car (assoc name funstate :key #'arg-name :test #'eq))
192 (pd-error "unknown argument ~S" name)))
194 ;;; machinery to provide more meaningful error messages during compilation
195 (defvar *current-instruction-flavor*)
196 (defun pd-error (fmt &rest args)
197 (if (boundp '*current-instruction-flavor*)
198 (error "~{A printer ~D~}: ~?" *current-instruction-flavor* fmt args)
199 (apply #'error fmt args)))
201 (defun format-or-lose (name)
202 (or (get name 'inst-format)
203 (pd-error "unknown instruction format ~S" name)))
205 ;;; Return a modified copy of ARG that has property values changed
206 ;;; depending on whether it is being used at compile-time or load-time.
207 ;;; This is to avoid evaluating #'FOO references at compile-time
208 ;;; while allowing compile-time manipulation of byte specifiers.
209 (defun massage-arg (spec when)
210 (ecase when
211 (:compile
212 ;; At compile-time we get a restricted view of the DEFINE-ARG-TYPE args,
213 ;; just enough to macroexpand :READER definitions. :TYPE and :SIGN-EXTEND
214 ;; are as specified, but :PREFILTER, :LABELLER, and :PRINTER are not
215 ;; compile-time evaluated.
216 (loop for (indicator val) on (cdr spec) by #'cddr
217 nconc (case indicator
218 (:sign-extend ; Only a literal T or NIL is allowed
219 (list indicator (the boolean val)))
220 (:prefilter
221 ;; #'ERROR is a placeholder for any compile-time non-nil
222 ;; value. If nil, it must be literally nil, not 'NIL.
223 (list indicator (if val #'error nil)))
224 ((:field :fields :type)
225 (list indicator val)))))
226 (:eval
227 (loop for (indicator raw-val) on (cdr spec) by #'cddr
228 ;; Use NAMED-LAMBDAs to enhance debuggability,
229 for val = (if (typep raw-val '(cons (eql lambda)))
230 `(named-lambda ,(format nil "~A.~A" (car spec) indicator)
231 ,@(cdr raw-val))
232 raw-val)
233 nconc (case indicator
234 (:reader nil) ; drop it
235 (:prefilter ; Enforce compile-time-determined not-nullness.
236 (list indicator (if val `(the (not null) ,val) nil)))
237 (t (list indicator val)))))))
239 (defmacro define-instruction-format ((format-name length-in-bits
240 &key default-printer include)
241 &body arg-specs)
242 #+sb-xc-host (declare (ignore default-printer))
243 "DEFINE-INSTRUCTION-FORMAT (Name Length {Format-Key Value}*) Arg-Def*
244 Define an instruction format NAME for the disassembler's use. LENGTH is
245 the length of the format in bits.
246 Possible FORMAT-KEYs:
248 :INCLUDE other-format-name
249 Inherit all arguments and properties of the given format. Any
250 arguments defined in the current format definition will either modify
251 the copy of an existing argument (keeping in the same order with
252 respect to when prefilters are called), if it has the same name as
253 one, or be added to the end.
254 :DEFAULT-PRINTER printer-list
255 Use the given PRINTER-LIST as a format to print any instructions of
256 this format when they don't specify something else.
258 Each ARG-DEF defines one argument in the format, and is of the form
259 (Arg-Name {Arg-Key Value}*)
261 If ARG-NAME is an integer it is the same as (#.(gensym) :value arg-name ...).
263 Possible ARG-KEYs (the values are evaluated unless otherwise specified):
265 :FIELDS byte-spec-list
266 The argument takes values from these fields in the instruction. If
267 the list is of length one, then the corresponding value is supplied by
268 itself; otherwise it is a list of the values. The list may be NIL.
269 :FIELD byte-spec
270 The same as :FIELDS (list byte-spec).
272 :VALUE value
273 If the argument only has one field, this is the value it should have,
274 otherwise it's a list of the values of the individual fields. This can
275 be overridden in an instruction-definition or a format definition
276 including this one by specifying another, or NIL to indicate that it's
277 variable.
279 :SIGN-EXTEND boolean
280 If non-NIL, the raw value of this argument is sign-extended,
281 immediately after being extracted from the instruction (before any
282 prefilters are run, for instance). If the argument has multiple
283 fields, they are all sign-extended.
285 :TYPE arg-type-name
286 Inherit any properties of the given argument type.
288 :PREFILTER function
289 A function which is called (along with all other prefilters, in the
290 order that their arguments appear in the instruction-format) before
291 any printing is done, to filter the raw value. Any uses of READ-SUFFIX
292 must be done inside a prefilter.
294 :PRINTER function-string-or-vector
295 A function, string, or vector which is used to print this argument.
297 :USE-LABEL
298 If non-NIL, the value of this argument is used as an address, and if
299 that address occurs inside the disassembled code, it is replaced by a
300 label. If this is a function, it is called to filter the value."
301 `(progn
302 (eval-when (:compile-toplevel)
303 (%def-inst-format
304 ',format-name ',include ,length-in-bits nil
305 ,@(mapcar (lambda (arg) `(list ',(car arg) ,@(massage-arg arg :compile)))
306 arg-specs)))
307 #-sb-xc-host ; Host doesn't execute any stuff that comes with
308 (progn ; format definitions, including dchunk readers
309 ,@(mapcan
310 (lambda (arg-spec)
311 (awhen (getf (cdr arg-spec) :reader)
312 `((defun ,it (dchunk dstate)
313 (declare (ignorable dchunk dstate))
314 (flet ((local-filtered-value (offset)
315 (declare (type filtered-value-index offset))
316 (aref (dstate-filtered-values dstate) offset))
317 (local-extract (bytespec)
318 (dchunk-extract dchunk bytespec)))
319 (declare (ignorable #'local-filtered-value #'local-extract)
320 (inline local-filtered-value local-extract))
321 ;; Delay ARG-FORM-VALUE call until after compile-time-too
322 ;; processing of !%DEF-INSTRUCTION-FORMAT has happened.
323 (macrolet
324 ((reader ()
325 (let* ((format-args
326 (format-args (format-or-lose ',format-name)))
327 (arg (find ',(car arg-spec) format-args
328 :key #'arg-name))
329 (funstate (make-funstate format-args))
330 (*!temp-var-counter* 0)
331 (expr (arg-value-form arg funstate :numeric)))
332 `(let* ,(make-arg-temp-bindings funstate) ,expr))))
333 (reader)))))))
334 arg-specs)
335 (%def-inst-format
336 ',format-name ',include ,length-in-bits ,default-printer
337 ,@(mapcar (lambda (arg) `(list ',(car arg) ,@(massage-arg arg :eval)))
338 arg-specs)))))
340 (defun %def-inst-format (name inherit length printer &rest arg-specs)
341 (let ((args (if inherit (copy-list (format-args (format-or-lose inherit)))))
342 (seen))
343 (dolist (arg-spec arg-specs)
344 (let* ((arg-name (car arg-spec))
345 (properties (cdr arg-spec)))
346 (when (integerp arg-name)
347 (setf properties (list* :value arg-name
348 properties)
349 arg-name (gensym)))
350 (let ((cell (member arg-name args :key #'arg-name)))
351 (aver (not (memq arg-name seen)))
352 (push arg-name seen)
353 (cond ((not cell)
354 (setq args (nconc args (list (apply #'modify-arg (%make-arg arg-name)
355 length properties)))))
356 (properties
357 (rplaca cell (apply #'modify-arg (copy-structure (car cell))
358 length properties)))))))
359 (setf (get name 'inst-format)
360 (make-inst-format name (bits-to-bytes length) printer args))))
362 (defun modify-arg (arg format-length
363 &key (value nil value-p)
364 (type nil type-p)
365 (prefilter nil prefilter-p)
366 (printer nil printer-p)
367 (sign-extend nil sign-extend-p)
368 (use-label nil use-label-p)
369 (field nil field-p)
370 (fields nil fields-p))
371 (when field-p
372 (if fields-p
373 (error ":FIELD and :FIELDS are mutually exclusive")
374 (setf fields (list field) fields-p t)))
375 (when type-p
376 (let ((type-arg (or (get type 'arg-type)
377 (pd-error "unknown argument type: ~S" type))))
378 (setf (arg-printer arg) (arg-printer type-arg))
379 (setf (arg-prefilter arg) (arg-prefilter type-arg))
380 (setf (arg-sign-extend-p arg) (arg-sign-extend-p type-arg))
381 (setf (arg-use-label arg) (arg-use-label type-arg))))
382 (when value-p
383 (setf (arg-value arg) value))
384 (when prefilter-p
385 (setf (arg-prefilter arg) prefilter))
386 (when sign-extend-p
387 (setf (arg-sign-extend-p arg) sign-extend))
388 (when printer-p
389 (setf (arg-printer arg) printer))
390 (when use-label-p
391 (setf (arg-use-label arg) use-label))
392 (when fields-p
393 (setf (arg-fields arg)
394 (mapcar (lambda (bytespec)
395 (when (> (+ (byte-position bytespec) (byte-size bytespec))
396 format-length)
397 (error "~@<in arg ~S: ~3I~:_~
398 The field ~S doesn't fit in an ~
399 instruction-format ~W bits wide.~:>"
400 (arg-name arg) bytespec format-length))
401 (correct-dchunk-bytespec-for-endianness
402 bytespec format-length sb-c:*backend-byte-order*))
403 fields)))
404 arg)
406 (defun arg-value-form (arg funstate
407 &optional
408 (rendering :final)
409 (allow-multiple-p (neq rendering :numeric)))
410 (let ((forms (gen-arg-forms arg rendering funstate)))
411 (when (and (not allow-multiple-p)
412 (listp forms)
413 (/= (length forms) 1))
414 (pd-error "~S must not have multiple values." arg))
415 (maybe-listify forms)))
417 (defun correct-dchunk-bytespec-for-endianness (bs unit-bits byte-order)
418 (if (eq byte-order :big-endian)
419 (byte (byte-size bs) (+ (byte-position bs) (- dchunk-bits unit-bits)))
420 bs))
422 (defun make-arg-temp-bindings (funstate)
423 (let ((bindings nil))
424 ;; Prefilters have to be called in the correct order, so reverse FUNSTATE
425 ;; because we're using PUSH in the inner loop.
426 (dolist (arg-cell (reverse funstate) bindings)
427 ;; These sublists are "backwards", so PUSH ends up being correct.
428 (dolist (rendering (cdr arg-cell))
429 (let* ((binding (cdr rendering))
430 (vars (car binding))
431 (vals (cdr binding)))
432 ;; We can end up here with VARS = NIL, and VALS = an atom.
433 ;; As the spec says, MAPC "should be prepared to signal an error
434 ;; ... if any list is not a proper list"
435 ;; We don't err in that situation because we check for ENDP of the
436 ;; lists from left to right. However, at least one implementation
437 ;; does rigorously use ENDP on both lists on each iteration.
438 (cond ((not vars))
439 ((listp vars)
440 (mapc (lambda (var val) (push `(,var ,val) bindings)) vars vals))
442 (push `(,vars ,vals) bindings))))))))
444 ;;; Return the form(s) that should be evaluated to render ARG in the chosen
445 ;;; RENDERING style, which is one of :RAW, :SIGN-EXTENDED,
446 ;;; :FILTERED, :NUMERIC, and :FINAL. Each rendering depends on the preceding
447 ;;; one, so asking for :FINAL will implicitly compute all renderings.
448 (defvar *!temp-var-counter*)
449 (defun gen-arg-forms (arg rendering funstate)
450 (labels ((tempvars (n)
451 (if (plusp n)
452 (cons (package-symbolicate
453 #.(find-package "SB-DISASSEM") ".T" (incf *!temp-var-counter*))
454 (tempvars (1- n))))))
455 (let* ((arg-cell (assq arg funstate))
456 (rendering-temps (cdr (assq rendering (cdr arg-cell))))
457 (vars (car rendering-temps))
458 (forms (cdr rendering-temps)))
459 (unless forms
460 (multiple-value-bind (new-forms single-value-p)
461 (%gen-arg-forms arg rendering funstate)
462 (setq forms new-forms
463 vars (cond ((or single-value-p (atom forms))
464 (if (symbolp forms) vars (car (tempvars 1))))
465 ((every #'symbolp forms)
466 ;; just use the same as the forms
467 nil)
469 (tempvars (length forms)))))
470 (push (list* rendering vars forms) (cdr arg-cell))))
471 (or vars forms))))
473 (defun maybe-listify (forms)
474 (cond ((atom forms)
475 forms)
476 ((/= (length forms) 1)
477 `(list ,@forms))
479 (car forms))))
481 ;;; DEFINE-ARG-TYPE Name {Key Value}*
483 ;;; Define a disassembler argument type NAME (which can then be referenced in
484 ;;; another argument definition using the :TYPE argument). &KEY args are:
486 ;;; :SIGN-EXTEND boolean
487 ;;; If non-NIL, the raw value of this argument is sign-extended.
489 ;;; :TYPE arg-type-name
490 ;;; Inherit any properties of given arg-type.
492 ;;; :PREFILTER function
493 ;;; A function which is called (along with all other prefilters,
494 ;;; in the order that their arguments appear in the instruction-
495 ;;; format) before any printing is done, to filter the raw value.
496 ;;; Any uses of READ-SUFFIX must be done inside a prefilter.
498 ;;; :PRINTER function-string-or-vector
499 ;;; A function, string, or vector which is used to print an argument of
500 ;;; this type.
502 ;;; :USE-LABEL
503 ;;; If non-NIL, the value of an argument of this type is used as
504 ;;; an address, and if that address occurs inside the disassembled
505 ;;; code, it is replaced by a label. If this is a function, it is
506 ;;; called to filter the value.
507 (defmacro define-arg-type (name &rest args
508 &key ((:type inherit))
509 sign-extend prefilter printer use-label)
510 (declare (ignore sign-extend prefilter printer use-label))
511 ;; FIXME: this should be an *unevaluated* macro arg (named :INHERIT)
512 (aver (typep inherit '(or null (cons (eql quote) (cons symbol null)))))
513 (let ((pair (cons name (loop for (ind val) on args by #'cddr
514 unless (eq ind :type)
515 nconc (list ind val)))))
516 `(progn
517 (eval-when (:compile-toplevel :execute)
518 (%def-arg-type ',name ,inherit ,@(massage-arg pair :compile)))
519 #-sb-xc-host ; Host doesn't need the real definition.
520 (%def-arg-type ',name ,inherit ,@(massage-arg pair :eval)))))
522 (defun %def-arg-type (name inherit &rest properties)
523 (setf (get name 'arg-type)
524 (apply 'modify-arg (%make-arg name) nil
525 (nconc (when inherit (list :type inherit)) properties))))
527 (defun %gen-arg-forms (arg rendering funstate)
528 (declare (type arg arg) (type list funstate))
529 (ecase rendering
530 (:raw ; just extract the bits
531 (mapcar (lambda (bytespec)
532 `(the (unsigned-byte ,(byte-size bytespec))
533 (local-extract ',bytespec)))
534 (arg-fields arg)))
535 (:sign-extended ; sign-extend, or not
536 (let ((raw-forms (gen-arg-forms arg :raw funstate)))
537 (if (and (arg-sign-extend-p arg) (listp raw-forms))
538 (mapcar (lambda (form field)
539 `(the (signed-byte ,(byte-size field))
540 (sign-extend ,form ,(byte-size field))))
541 raw-forms
542 (arg-fields arg))
543 raw-forms)))
544 (:filtered ; extract from the prefiltered value vector
545 (let ((pf (arg-prefilter arg)))
546 (if pf
547 (values `(local-filtered-value ,(arg-position arg funstate)) t)
548 (gen-arg-forms arg :sign-extended funstate))))
549 (:numeric ; pass the filtered value to the label adjuster, or not
550 (let ((filtered-forms (gen-arg-forms arg :filtered funstate))
551 (use-label (arg-use-label arg)))
552 ;; use-label = T means that the prefiltered value is already an address,
553 ;; otherwise non-nil means a function to call, and NIL means not a label.
554 ;; So only the middle case needs to call ADJUST-LABEL.
555 (if (and use-label (neq use-label t))
556 `((adjust-label ,(maybe-listify filtered-forms) ,use-label))
557 filtered-forms)))
558 (:final ; if arg is not a label, return numeric value, otherwise a string
559 (let ((numeric-forms (gen-arg-forms arg :numeric funstate)))
560 (if (arg-use-label arg)
561 `((lookup-label ,(maybe-listify numeric-forms)))
562 numeric-forms)))))
564 (declaim (inline bytes-to-bits))
566 (defun bytes-to-bits (bytes)
567 (declare (type disassem-length bytes))
568 (* bytes sb-vm:n-byte-bits))
570 (defun bits-to-bytes (bits)
571 (declare (type disassem-length bits))
572 (multiple-value-bind (bytes rbits)
573 (truncate bits sb-vm:n-byte-bits)
574 (when (not (zerop rbits))
575 (error "~W bits is not a byte-multiple." bits))
576 bytes))
578 (defun sign-extend (int size)
579 (declare (type integer int)
580 (type (integer 0 128) size))
581 (if (logbitp (1- size) int)
582 (dpb int (byte size 0) -1)
583 int))