Have sign-extend-complex deal correctly with bytes of size 0.
[movitz-ia-x86.git] / codec.lisp
blobf9f19bf5c4a654b0f03f3e2478d14c2678266e75
1 ;;;;------------------------------------------------------------------
2 ;;;;
3 ;;;; Copyright (C) 2000, 2001, 2002, 2004-2005,
4 ;;;; Department of Computer Science, University of Tromso, Norway
5 ;;;;
6 ;;;; Filename: codec.lisp
7 ;;;; Description: Encoding and decoding of instructions to/from binary.
8 ;;;; Author: Frode Vatvedt Fjeld <frodef@acm.org>
9 ;;;; Created at: Thu May 4 15:16:45 2000
10 ;;;; Distribution: See the accompanying file COPYING.
11 ;;;;
12 ;;;; $Id: codec.lisp,v 1.8 2007/02/26 22:14:00 ffjeld Exp $
13 ;;;;
14 ;;;;------------------------------------------------------------------
16 (in-package #:ia-x86)
18 (defparameter *error-on-eof* nil)
19 (defparameter *error-on-unknown-instruction* nil)
21 ;;; ----------------------------------------------------------------
22 ;;; Instruction template match
23 ;;; ----------------------------------------------------------------
25 ;;; A "template" is an object that is used to map op-codes to
26 ;;; instructions. The basic problem it solves is that ia-x86 uses
27 ;;; variable-length op-codes. Consequently, figuring out if the
28 ;;; octets you are currently decoding represents a complete op-code
29 ;;; is non-trivial.
30 ;;;
31 ;;; If you regard the X86 IA as huffman-comression-codes, the templates
32 ;;; would be something like the nodes in the huffman tree.
33 ;;;
34 ;;; Templates are specialized to:
35 ;;; * instr-template: represents actual instructions.
36 ;;; * prefix-template: represents instruction-prefixes.
38 (defconstant +template-default-priority+ 0)
41 (defclass template ()
42 ((value
43 :type unsigned-byte
44 :initarg value
45 :accessor template-value)
46 (numo
47 :type (integer 1 12)
48 :initarg numo
49 :accessor template-numo)
50 (priority
51 :type integer
52 :initarg priority
53 :initform +template-default-priority+
54 :accessor template-priority)))
56 (defclass instr-template (template)
57 ((mask
58 :type signed-byte
59 :initarg mask
60 :accessor template-mask)
61 (not-list
62 :type list ; list of value/masks that must *not* match.
63 :initform '()
64 :initarg not-list
65 :accessor template-not-list)
66 (cpu-mode
67 :type (member :32-bit :16-bit :any-mode)
68 :initform :any-mode
69 :initarg cpu-mode
70 :accessor template-cpu-mode)
71 (operand-mode
72 :type (member :32-bit :16-bit :any-mode)
73 :initform :any-mode
74 :initarg operand-mode
75 :accessor template-operand-mode)
76 (addressing-mode
77 :type (member :32-bit :16-bit :any-mode)
78 :initform :any-mode
79 :initarg addressing-mode
80 :accessor template-addressing-mode)
81 (req-prefixes
82 :type list ; list of prefixes required for a match
83 :initform '()
84 :initarg req-prefixes
85 :accessor template-req-prefixes)
86 (not-prefixes
87 :type list ; list of prefixes disqualifying any match
88 :initform '()
89 :initarg not-prefixes
90 :accessor template-not-prefixes)
91 (instr-classname
92 :initarg instr-classname
93 :accessor template-instr-classname)
94 (instr-numo
95 :type (integer 1 16)
96 :initarg instr-numo
97 :accessor template-instr-numo)
98 (instr-operand-types
99 :type list
100 :initarg instr-operand-types
101 :accessor template-instr-operand-types)
102 (instr-operand-classes
103 :type list ; this template generates operands of these classes (in this order)
104 :initarg instr-operand-classes
105 :accessor template-instr-operand-classes)
106 (instr-operand-base-classes
107 :type list ; caching the base-classes of instr-operand-classes
108 :initarg instr-operand-base-classes
109 :accessor template-instr-operand-base-classes)
110 (instr-modr/m-p
111 :type boolean ; does instruction have modr/m?
112 :initarg modr/m-p
113 :accessor template-instr-modr/m-p)
114 (instr-sib-p
115 :type boolean ; does instruction have SIB?
116 :initarg sib-p
117 :accessor template-instr-sib-p)
118 (instr-displacement-numo
119 :type (integer 0 4) ; size of "displacement" field.
120 :initarg displacement-numo
121 :accessor template-instr-displacement-numo)
122 (instr-immediate-numo
123 :type (integer 0 4) ; size of "immediate" field.
124 :initarg immediate-numo
125 :accessor template-instr-immediate-numo)
126 (instr-opcode-numo
127 :type (integer 1 16) ; size of "opcode" field.
128 :initarg opcode-numo
129 :accessor template-instr-opcode-numo)))
131 (defmethod print-object ((obj instr-template) stream)
132 (if *print-pretty*
133 (progn
134 (format stream "#IT(~A)"
135 (loop for slot in '(instr-classname cpu-mode operand-mode addressing-mode
136 req-prefixes not-prefixes instr-opcode-numo
137 instr-immediate-numo instr-displacement-numo
138 instr-sib-p instr-modr/m-p
139 instr-operand-classes instr-operand-base-classes
140 instr-operand-types)
141 when (slot-boundp obj slot)
142 collect (list slot (slot-value obj slot))))
143 obj)
144 (call-next-method obj stream)))
146 (defclass prefix-template (template)
147 ((value
148 :type (unsigned-byte 8))))
150 (defmethod template-numo ((template prefix-template))
151 (values 1))
153 (defmethod template-match-datum-and-prefixes ((template prefix-template) datum datum-numo prefixes)
154 (declare (ignore prefixes))
155 (and (= 1 datum-numo)
156 (= (template-value template)
157 datum)))
159 (defgeneric template-equal (t1 t2)
160 (:documentation "Returns true when two templates are considered to be the same."))
162 (defmethod template-equal (t1 t2)
163 (declare (ignore t1 t2))
164 nil)
166 (defmethod template-equal ((t1 prefix-template) (t2 prefix-template))
167 (= (template-value t1)
168 (template-value t2)))
170 (defmethod template-equal ((t1 instr-template) (t2 instr-template))
171 (and (= (template-value t1)
172 (template-value t2))
173 (= (template-numo t1)
174 (template-numo t2))
175 (= (template-mask t1)
176 (template-mask t2))
177 (eq (template-operand-mode t1)
178 (template-operand-mode t2))
179 (eq (template-addressing-mode t1)
180 (template-addressing-mode t2))
181 (equalp (template-not-list t1)
182 (template-not-list t2))
183 (equal (template-req-prefixes t1)
184 (template-req-prefixes t2))
185 (equal (template-not-prefixes t1)
186 (template-not-prefixes t2))))
188 (defmethod template-match-opcode ((template prefix-template) datum datum-numo)
189 (declare (ignore datum-numo))
190 (= (template-value template)
191 datum))
193 (defmethod template-match-opcode ((template instr-template) datum datum-numo)
194 (assert (<= datum-numo
195 (template-numo template))
196 (datum template)
197 "Trying to match a template size ~A with ~A of size ~A"
198 (template-numo template)
199 datum
200 datum-numo)
201 (and (= (logand datum ; match value
202 (ldb #0=(byte (* 8 datum-numo)
203 (* 8 (- (template-numo template)
204 datum-numo)))
205 (template-mask template)))
206 (ldb #0#
207 (template-value template)))
208 (every #'(lambda (not-spec) ; check against every element of not-list
209 (destructuring-bind (not-value not-mask)
210 not-spec
211 (let ((effective-not-mask (ldb #1=(byte (* 8 datum-numo)
212 (* 8 (- (template-numo template)
213 datum-numo)))
214 not-mask)))
215 (or (zerop effective-not-mask) ; not match not-value
216 (/= (logand datum
217 effective-not-mask)
218 (ldb #1#
219 not-value))))))
220 (template-not-list template))))
222 (defvar *cpu-mode* :32-bit
223 "The assumed processor mode. One of :32-bit or :16-bit.")
225 (defun effective-operand-mode (prefixes)
226 "Calcuclate the operand mode (:32-bit or :16-bit) of and instruction,
227 based on the cpu state (i.e. *cpu-mode*) and the active set of prefixes."
228 (ecase *cpu-mode*
229 ((:32-bit) (if (member '16-bit-operand prefixes) :16-bit :32-bit))
230 ((:16-bit) (if (member '16-bit-operand prefixes) :32-bit :16-bit))))
232 (defun effective-addressing-mode (prefixes)
233 "Calcuclate the addressing mode (:32-bit or :16-bit) of and instruction,
234 based on the cpu state (i.e. *cpu-mode*) and the active set of prefixes."
235 (ecase *cpu-mode*
236 ((:32-bit) (if (member '16-bit-address prefixes) :16-bit :32-bit))
237 ((:16-bit) (if (member '16-bit-address prefixes) :32-bit :16-bit))))
239 (defmethod template-match-datum-and-prefixes ((template instr-template) datum datum-numo prefixes)
240 (assert (< datum (ash 1 (* 8 datum-numo)))
241 (datum datum-numo)
242 "Datum ~A is too big for datum-numo ~A."
243 datum datum-numo)
244 (and (template-match-opcode template datum datum-numo)
245 (or (eq :any-mode
246 (template-cpu-mode template))
247 (eq *cpu-mode*
248 (template-cpu-mode template)))
249 (or (eq :any-mode
250 (template-operand-mode template))
251 (eq (effective-operand-mode prefixes)
252 (template-operand-mode template)))
253 (or (eq :any-mode
254 (template-addressing-mode template))
255 (eq (effective-addressing-mode prefixes)
256 (template-addressing-mode template)))
257 (every #'(lambda (required-prefix) ; match req-prefixes
258 (member required-prefix prefixes))
259 (template-req-prefixes template))
260 (null (intersection prefixes ; not match not-prefixes
261 (template-not-prefixes template)))))
263 (defvar *template-table* (make-array #x100
264 :element-type 'list
265 :initial-element ())
266 "For each 8-bit value (matching the first octet of an one-byte op-code),
267 this array holds a list of candidate template objects.")
269 (defvar *template-table-0f* (make-array #x100
270 :element-type 'list
271 :initial-element ())
272 "For each 8-bit value (matching the second octet of a two-byte op-code),
273 this array holds a list of candidate template objects.")
275 (defvar *template-by-class-name* (make-hash-table :test #'eq)
276 "For every instruction-class, we have a slot in this hash-table
277 where the key is the class' name and the value is a list of
278 that class' templates.")
280 (defun template-forget-all ()
281 (setf *template-table* (make-array #x100
282 :element-type 'list
283 :initial-element nil)
284 *template-table-0f* (make-array #x100
285 :element-type 'list
286 :initial-element nil)
287 *template-by-class-name* (make-hash-table :test #'eq))
288 (mapcar #'(lambda (pp)
289 (templates-remember nil (list (make-instance 'prefix-template
290 'value (car pp)))))
291 +opcode-prefix-map+))
293 (defun templates-remember (name template-list)
294 (when name
295 (setf (gethash name *template-by-class-name*) template-list)
296 (loop for i from 0 to #xff
297 when (typep (aref *template-table* i) 'instr-template)
298 do (setf (aref *template-table* i)
299 (delete-if #'(lambda (template-to-delete)
300 (eq name (template-instr-classname template-to-delete)))
301 (aref *template-table* i)))
302 when (typep (aref *template-table-0f* i) 'instr-template)
303 do (setf (aref *template-table-0f* i)
304 (delete-if #'(lambda (template-to-delete)
305 (eq name (template-instr-classname template-to-delete)))
306 (aref *template-table-0f* i)))))
307 (loop for template in template-list
308 if (= #x0f (ldb (byte 8 (* 8 (1- (template-numo template))))
309 (realpart (template-value template))))
310 do (loop for o from #x0f00 to #x0fff
311 and i from 0 to #xff
312 when (template-match-opcode template o 2)
313 do (pushnew template (aref *template-table-0f* i) :test #'template-equal))
314 else do (loop for i from #x00 to #xff
315 when (template-match-opcode template i 1)
316 do (pushnew template (aref *template-table* i) :test #'template-equal))))
318 ;;; (cond
319 ;;; ((= #x0f (ldb (byte 8 (* 8 (1- (template-numo template))))
320 ;;; (realpart (template-value template))))
321 ;;; (loop for o from #x0f00 to #x0fff
322 ;;; and i from 0 to #xff
323 ;;; when (template-match-opcode template o 2)
324 ;;; do (pushnew template (aref *template-table-0f i) #'template-equal)))
326 ;;; do (setf (aref *template-table-0f* i)
327 ;;; (cons template
328 ;;; (delete-if #'(lambda (tt) ; remove existing equal templates
329 ;;; (and (or (typep template 'prefix-template)
330 ;;; (eq (template-instr-classname template)
331 ;;; (template-instr-classname tt)))
332 ;;; (template-equal template tt)))
333 ;;; (aref *template-table-0f* i))))))
335 ;;; (t
336 ;;; (dotimes (i #x100)
337 ;;; (when (template-match-opcode template i 1)
338 ;;; (setf (aref *template-table* i)
339 ;;; (cons template
340 ;;; (delete-if #'(lambda (tt) ; remove existing equal templates
341 ;;; (and (or (typep template 'prefix-template)
342 ;;; (eq (template-instr-class template)
343 ;;; (template-instr-class tt)))
344 ;;; (template-equal template tt)))
345 ;;; (aref *template-table* i)))))))))
347 (defun template-lookup-next (read-octet-fn)
348 (let ((new-octet (funcall read-octet-fn)))
349 (case new-octet
350 ((nil) (when *error-on-eof*
351 (error "Unexpected EOF")))
352 ((#x0f) (let ((newer-octet (funcall read-octet-fn)))
353 (when (and (null newer-octet)
354 *error-on-eof*)
355 (error "Unexpected EOF 2"))
356 (values (aref *template-table-0f* newer-octet)
357 (complex (logior #x0f00 newer-octet) 2))))
358 (t (values (aref *template-table* new-octet)
359 (complex new-octet 1))))))
361 (defun templates-lookup-by-class-name (class-name)
362 (gethash class-name *template-by-class-name*))
364 (defun operand-type-p (operand class)
365 (if (typep operand 'abstract-operand)
366 (member (class-name class) '(operand-immediate operand-direct operand-rel-pointer))
367 (typep operand class)))
369 ;;;(defun template-match-by-operand-classes (template operand-list)
370 ;;; "This predicate returns true if the template matches the classes
371 ;;;of the list of operands."
372 ;;; (and (= (length (template-instr-operand-base-classes template))
373 ;;; (length operand-list))
374 ;;; (loop
375 ;;; for top-class in (template-instr-operand-base-classes template)
376 ;;; and operand in operand-list
377 ;;; always (operand-type-p operand top-class))))
379 (defun template-match-by-operand-classes (template operand-list)
380 "This predicate returns true if the template matches the classes
381 of the list of operands."
382 (let ((class-list (template-instr-operand-base-classes template)))
383 (or (and (null class-list)
384 (null operand-list))
385 (and class-list
386 operand-list
387 (loop
388 for top-classes on class-list
389 and operands on operand-list
390 always (and (operand-type-p (car operands)
391 (car top-classes))
392 (if (null (cdr operands))
393 (null (cdr top-classes))
394 (cdr top-classes))))))))
396 (defun template-forget-class (class-name operand-types)
397 (when (and (boundp '*template-by-class-name*)
398 (templates-lookup-by-class-name class-name))
399 (loop for dead-template in (templates-lookup-by-class-name class-name)
400 ;;; with have-warned = nil
401 when (equalp operand-types
402 (template-instr-operand-types dead-template))
403 ;;; do (unless have-warned
404 ;;; (warn "Removing old templates for: ~A~{ ~A~}" class-name operand-types))
405 ;;; do (setf have-warned t)
406 do (loop for i from 0 to #xff
407 do (setf (aref *template-table* i)
408 (delete dead-template (aref *template-table* i)))))
409 (setf (gethash class-name *template-by-class-name*) nil)))
411 (defun template-filter (cdatum prefixes template-list)
412 "Given a op-code byte, a list of active prefixes and a list of
413 templates, return the subset of the templates that are still eligible matches."
414 (mapcan #'(lambda (template)
415 (when (template-match-datum-and-prefixes template
416 (realpart cdatum)
417 (imagpart cdatum)
418 prefixes)
419 (list template)))
420 template-list))
422 (unless +opcode-prefix-map+
423 (warn "+opcode-prefix-map+ is empty."))
424 (mapcar #'(lambda (pp)
425 (templates-remember nil (list (make-instance 'prefix-template
426 'value (car pp)))))
427 +opcode-prefix-map+)
431 (defun template-instr-and-prefix-length (template instr env)
432 (+ (template-instr-numo template)
433 (length (calculate-prefixes instr template))
434 (length (compute-instruction-extra-prefixes instr template env))))
437 ;;; ----------------------------------------------------------------
438 ;;; Instruction decode
439 ;;; ----------------------------------------------------------------
441 (defgeneric instruction-decode (instr template)
442 (:documentation
443 "Decode <instr> according to its datum."))
445 (defmethod instruction-decode ((instr instruction) template)
446 (when (slot-boundp instr 'original-datum)
447 (let ((instr-symbolic (decode-instr-symbolic template
448 (realpart (instruction-original-datum instr)))))
449 (setf (instruction-operands instr)
450 (loop for op-class in (template-instr-operand-classes template)
451 collecting
452 (operand-decode (make-instance op-class)
454 instr-symbolic)))))
455 instr)
457 (defun make-decode-instruction (datum prefixes template)
458 (instruction-decode (make-instance (template-instr-classname template)
459 :datum datum
460 :prefixes (set-difference prefixes
461 (template-req-prefixes template)))
462 template))
464 (defun calculate-prefixes (instr template)
465 (remove-duplicates
466 (append (if (or (eq :any-mode (template-operand-mode template))
467 (eq *cpu-mode* (template-operand-mode template)))
469 '(16-bit-operand))
470 (if (or (eq :any-mode (template-addressing-mode template))
471 (eq *cpu-mode* (template-addressing-mode template)))
473 '(16-bit-address))
474 (template-req-prefixes template)
475 (set-difference (instruction-prefixes instr)
476 (template-not-prefixes template)))))
478 (defun prefix-encode (cdatum prefix-list &optional extra-prefixes)
479 "Given an instruction encoded into <cdatum> by <template>,
480 append the necessary prefix-bytes to cdatum."
481 (let ((new-byte (realpart cdatum))
482 (byte-pos (imagpart cdatum)))
483 (loop for prefix in prefix-list
484 do (setf (ldb (byte 8 (* 8 byte-pos))
485 new-byte)
486 (decode-set +prefix-opcode-map+
487 prefix))
488 (incf byte-pos))
489 (loop for prefix in extra-prefixes
490 do (setf (ldb (byte 8 (* 8 byte-pos))
491 new-byte)
492 prefix)
493 (incf byte-pos))
494 (complex new-byte byte-pos)))
496 (defun make-instr-symbolic-from-template (template)
497 (check-type template template)
498 (let ((instr-symbolic (make-instr-symbolic
499 :opcode (ldb (byte (* 8 (template-instr-opcode-numo template))
500 (* 8 (- (template-numo template)
501 (template-instr-opcode-numo template))))
502 (template-value template)))))
503 (when (and (template-instr-modr/m-p template)
504 (> (template-numo template)
505 (template-instr-opcode-numo template)))
506 ;; initialize modr/m from template
507 (let ((modrm-octet (ldb (byte 8 (* 8 (- (template-numo template)
508 (template-instr-opcode-numo template)
509 1)))
510 (template-value template))))
511 (with-slots (mod reg r/m)
512 instr-symbolic
513 (setf mod (ldb (byte 2 6) modrm-octet)
514 reg (ldb (byte 3 3) modrm-octet)
515 r/m (ldb (byte 3 0) modrm-octet)))))
516 instr-symbolic))
518 (defstruct assemble-env
519 symtab
520 current-pc)
522 (defstruct teo
523 template
524 encoding-list
525 resolved-operand-list)
527 (defun template-unify-operands (template instr operand-list env)
528 "Given a template and a list of operands, determine if the operands
529 can be encoded in this template. If not, return NIL. Otherwise, return
530 a teo for encoding later."
531 (make-teo
532 :resolved-operand-list (loop
533 for operand in operand-list
534 and top-class in (template-instr-operand-classes template)
535 and top-type in (template-instr-operand-types template)
536 if (operand-and-encoding-unify operand
537 (operand-class-encoding top-class)
538 top-type
539 template
540 instr
541 env)
542 collect it
543 else
544 do (return-from template-unify-operands nil)) ; no unify
545 :encoding-list (mapcar #'operand-class-encoding
546 (template-instr-operand-classes template))
547 :template template))
549 (defvar *instruction-compute-extra-prefix-map* nil)
551 (defun instruction-encode-from-teo (instr teo env)
552 (check-type instr instruction)
553 (let ((template (teo-template teo))
554 (resolved-operand-list (teo-resolved-operand-list teo))
555 (operand-encoding-list (teo-encoding-list teo)))
556 (let ((is (make-instr-symbolic-from-template template)))
557 (loop for operand in resolved-operand-list
558 and operand-encoding in operand-encoding-list
559 and operand-type in (template-instr-operand-types template)
560 do (operand-encode operand operand-encoding operand-type is))
561 (prefix-encode (encode-instr-symbolic template is)
562 (calculate-prefixes instr template)
563 (compute-instruction-extra-prefixes instr template env)))))
565 (defun compute-instruction-extra-prefixes (instr template env)
566 (funcall (or (instruction-finalizer instr)
567 (cdr (assoc (class-name (class-of instr)) *instruction-compute-extra-prefix-map*
568 :test #'string=))
569 (constantly nil))
570 instr env (+ (template-instr-numo template)
571 (length (calculate-prefixes instr template)))))
573 (defun template-match-by-cpu-mode (template cpu-mode)
574 (or (eq :any-mode (template-cpu-mode template))
575 (eq cpu-mode (template-cpu-mode template))))
577 (defun instruction-encode-to-teo (instr &optional env
578 (operand-list
579 (instruction-operands instr)))
580 (let ((templates (templates-lookup-by-class-name (type-of instr))))
581 (when (null templates)
582 (error "No templates for instruction ~A." instr))
583 (or (loop for template in templates
584 when (and (template-match-by-cpu-mode template *cpu-mode*)
585 (template-match-by-operand-classes template operand-list)
586 (template-unify-operands template instr operand-list env))
587 collect it)
588 (error "Can't unify operands for ~A ~A." instr
589 (mapcar #'type-of operand-list)))))
591 (defun pairwise-teopt (teo-list instr optimize-teo-fn)
592 (loop
593 with chosen-teo = (first teo-list)
594 for teo in (rest teo-list)
595 do (when (funcall optimize-teo-fn teo chosen-teo instr)
596 (setf chosen-teo teo))
597 finally (return chosen-teo)))
599 (defun optimize-teo-smallest (teo-list instr env)
600 "Prefer the smallest (as in fewest octets) encodings."
601 (declare (ignore env))
602 (pairwise-teopt teo-list
603 instr
604 #'(lambda (teo1 teo2 instr)
605 (< (+ (template-instr-numo (teo-template teo1))
606 (length (calculate-prefixes instr (teo-template teo1))))
607 (+ (template-instr-numo (teo-template teo2))
608 (length (calculate-prefixes instr (teo-template teo2))))))))
610 (defun template-is-16-bit-p (template)
611 (or (eq :16-bit (template-addressing-mode template))
612 (eq :16-bit (template-operand-mode template))
613 (member '16-bit-operand (template-req-prefixes template))
614 (member '16-bit-address (template-req-prefixes template))))
616 (defun optimize-teo-smallest-no16 (teo-list instr env)
617 "Prefer the smallest 32-bit encoding."
618 (declare (ignore env))
619 (pairwise-teopt teo-list
620 instr
621 #'(lambda (teo1 teo2 instr)
622 (let ((t1 (teo-template teo1))
623 (t2 (teo-template teo2)))
624 (or (and (not (template-is-16-bit-p t1))
625 (template-is-16-bit-p t2))
626 (< (+ (template-instr-numo t1)
627 (length (calculate-prefixes instr t1)))
628 (+ (template-instr-numo t2)
629 (length (calculate-prefixes instr t2)))))))))
631 (defun optimize-teo-original-size (teo-list instr env)
632 "Find an encoding that matches the size of the instruction's
633 original size (its instruction-original-datum)."
634 (let ((original-size (imagpart (instruction-original-datum instr))))
635 (find-if #'(lambda (teo)
636 (= original-size
637 (template-instr-and-prefix-length (teo-template teo) instr env)))
638 teo-list)))
640 (defun optimize-teo-user-size (teo-list instr env)
641 "Find an encoding that matches the user-specified size."
642 (find-if #'(lambda (teo)
643 (= (instruction-user-size instr)
644 (template-instr-and-prefix-length (teo-template teo) instr env)))
645 teo-list))
647 (defun instruction-encode (instr env &optional (optimize-teo-fn #'optimize-teo-smallest))
648 (let ((teo-list (instruction-encode-to-teo instr env)))
649 (if (null teo-list)
650 (error "Unable to encode ~A." instr)
651 (let ((teo (if (instruction-user-size instr)
652 (optimize-teo-user-size teo-list instr env)
653 (funcall optimize-teo-fn teo-list instr env))))
654 (if (not (teo-p teo))
655 (error "Optimization with ~S of instruction ~S failed for teo-list ~S"
656 optimize-teo-fn instr teo-list)
657 (instruction-encode-from-teo instr teo env))))))
661 (defun change-endian (old-byte numo)
662 "Returns the alternative endian version of a byte."
663 (let ((result 0))
664 (dotimes (i numo)
665 (setf (ldb (byte 8 (* 8 (- numo i 1))) result)
666 (ldb (byte 8 (* 8 i)) old-byte)))
667 result))
669 (defun sign-extend-complex (cdatum)
670 "Given a two's complement signed byte (where the most significant
671 byte represents the sign), return the natural representation of
672 that byte (i.e. #c(255 1) => -1)."
673 (let ((old-byte (realpart cdatum))
674 (numo (imagpart cdatum)))
675 (cond
676 ((= 0 numo)
678 ((zerop (ldb (byte 1 (1- (* 8 numo))) old-byte))
679 cdatum)
680 (t (complex (- old-byte (dpb 1 (byte 1 (* 8 numo)) 0))
681 numo)))))
683 (defun sign-extend (old-byte numo)
684 "Given a two's complement signed byte (where the most significant
685 byte represents the sign), return the natural representation of
686 that byte (i.e. (255 1) => -1)."
687 (if (zerop (ldb (byte 1 (1- (* 8 numo))) old-byte))
688 old-byte
689 (- old-byte (dpb 1 (byte 1 (* 8 numo)) 0))))
692 (defun decode-instr-symbolic (template datum)
693 "Given an instruction-template (a description of an instructions layout),
694 decode a datum into a symbolic instr-symbolic object."
695 (let ((is (make-instr-symbolic))
696 (byte-pos 0))
697 ;; immediate
698 (setf (instr-symbolic-immediate is)
699 (complex (change-endian (ldb (byte (* 8 (template-instr-immediate-numo template))
700 (* 8 byte-pos))
701 datum)
702 (template-instr-immediate-numo template))
703 (template-instr-immediate-numo template)))
704 (incf byte-pos (template-instr-immediate-numo template))
705 ;; displacement
706 (setf (instr-symbolic-displacement is)
707 (sign-extend-complex
708 (complex (change-endian (ldb (byte (* 8 (template-instr-displacement-numo template))
709 (* 8 byte-pos))
710 datum)
711 (template-instr-displacement-numo template))
712 (template-instr-displacement-numo template))))
713 (incf byte-pos (template-instr-displacement-numo template))
714 ;; SIB
715 (when (template-instr-sib-p template)
716 (let ((sib (ldb (byte 8 (* 8 byte-pos)) datum)))
717 (setf (instr-symbolic-scale is) (ldb (byte 2 6) sib)
718 (instr-symbolic-index is) (ldb (byte 3 3) sib)
719 (instr-symbolic-base is) (ldb (byte 3 0) sib))
720 (incf byte-pos)))
721 ;; ModR/M
722 (when (template-instr-modr/m-p template)
723 (let ((modr/m (ldb (byte 8 (* 8 byte-pos)) datum)))
724 (setf (instr-symbolic-mod is) (ldb (byte 2 6) modr/m)
725 (instr-symbolic-reg is) (ldb (byte 3 3) modr/m)
726 (instr-symbolic-r/m is) (ldb (byte 3 0) modr/m))
727 (incf byte-pos)))
728 ;; Opcode
729 (setf (instr-symbolic-opcode is) (ldb (byte (* 8 (template-instr-opcode-numo template))
730 (* 8 byte-pos))
731 datum))
732 ;;; (assert (= (realpart datum)
733 ;;; (realpart (encode-instr-symbolic template is)))
734 ;;; (datum)
735 ;;; (error "~&instr-symbolic codec inconsistency: ~A / ~A / ~A / ~A~%"
736 ;;; datum
737 ;;; (realpart (encode-instr-symbolic template is))
738 ;;; is template))
739 (values is)))
742 (defun encode-instr-symbolic (template is)
743 (let ((byte-datum 0)
744 (byte-pos 0))
745 ;; immediate
746 (unless (zerop (template-instr-immediate-numo template))
747 (setf (ldb (byte (* 8 (template-instr-immediate-numo template))
748 (* 8 byte-pos))
749 byte-datum)
750 (change-endian (realpart (instr-symbolic-immediate is))
751 (template-instr-immediate-numo template)))
752 (incf byte-pos (template-instr-immediate-numo template)))
753 ;; displacement
754 (unless (zerop (template-instr-displacement-numo template))
755 (setf (ldb (byte (* 8 (template-instr-displacement-numo template))
756 (* 8 byte-pos))
757 byte-datum)
758 (change-endian (realpart (instr-symbolic-displacement is))
759 (template-instr-displacement-numo template)))
760 (incf byte-pos (template-instr-displacement-numo template)))
761 ;; SIB
762 (when (template-instr-sib-p template)
763 (let ((sib 0))
764 (setf (ldb (byte 2 6) sib) (instr-symbolic-scale is)
765 (ldb (byte 3 3) sib) (instr-symbolic-index is)
766 (ldb (byte 3 0) sib) (instr-symbolic-base is))
767 (setf (ldb (byte 8 (* 8 byte-pos)) byte-datum)
768 sib)
769 (incf byte-pos 1)))
770 ;; ModR/M
771 (when (template-instr-modr/m-p template)
772 (let ((modr/m 0))
773 (setf (ldb (byte 2 6) modr/m) (instr-symbolic-mod is)
774 (ldb (byte 3 3) modr/m) (instr-symbolic-reg is)
775 (ldb (byte 3 0) modr/m) (instr-symbolic-r/m is))
776 (setf (ldb (byte 8 (* 8 byte-pos)) byte-datum)
777 modr/m)
778 (incf byte-pos 1)))
779 ;; Opcode
780 (setf (ldb (byte (* 8 (template-instr-opcode-numo template))
781 (* 8 byte-pos))
782 byte-datum)
783 (instr-symbolic-opcode is))
784 (incf byte-pos (template-instr-opcode-numo template))
785 (values (complex byte-datum byte-pos))))
787 ;;; ----------------------------------------------------------------
788 ;;; Disassemble functions
789 ;;; ----------------------------------------------------------------
791 (defun decode-by-template (read-octet-fn cdatum prefixes template)
792 "Given the current datum and list of prefixes and the template which
793 identifies the type of instruction, finish the job by reading the remaining
794 octets and creating the instruction (or prefix) object."
795 (ecase (type-of template)
796 ((instr-template)
797 ;; read remaining octets
798 (dotimes (i (- (template-instr-numo template)
799 (imagpart cdatum)))
800 (let ((fill-octet (funcall read-octet-fn)))
801 (unless fill-octet
802 (error "EOF in middle of an instruction (~W)." cdatum))
803 (setf cdatum (complex (logior (ash (realpart cdatum) 8)
804 fill-octet)
805 (1+ (imagpart cdatum))))))
806 (assert (template-match-datum-and-prefixes template
807 (ldb (byte (* 8 (template-numo template))
808 (* 8 (- (imagpart cdatum)
809 (template-numo template))))
810 (realpart cdatum))
811 (template-numo template)
812 prefixes)
813 (cdatum template)
814 "Decoding instruction, but ~A-template ~A doesn't match full datum ~A."
815 (template-instr-classname template)
816 (complex (template-value template)
817 (template-numo template))
818 cdatum)
819 ;; decode
820 (make-decode-instruction cdatum prefixes template))
821 ((prefix-template)
822 (multiple-value-bind (templates cdatum)
823 (template-lookup-next read-octet-fn)
824 (decode-filter read-octet-fn
825 cdatum
826 (cons (prefix-symbol (template-value template))
827 prefixes)
828 templates)))))
830 (defun decode-filter (read-octet-fn cdatum prefixes templates)
831 "Given an octet-reading function, the currently read datum, the list
832 of current prefixes and a list of eligible templates, attempt to
833 decode the instruction (or prefix) and return it.
835 This function works by reading the next octet, filter out which templates
836 are still eligible and recurse over the reduced list of templates. When
837 there is just one template left, we are home free."
838 (let ((filtered-templates (template-filter cdatum
839 prefixes
840 templates)))
841 (cond
842 ((= 0 (length filtered-templates)) ; no match
843 (when *error-on-unknown-instruction*
844 (error "No matching instruction for datum ~A." cdatum)))
845 ((= 1 (length filtered-templates)) ; found single match
846 (decode-by-template read-octet-fn
847 cdatum
848 prefixes
849 (first filtered-templates)))
850 ((< (imagpart cdatum) ; must read more octets
851 (apply #'min (mapcar #'template-numo
852 filtered-templates)))
853 (let ((new-octet (funcall read-octet-fn)))
854 (if (not new-octet)
855 (when *error-on-eof*
856 (error "EOF in instruction"))
857 (let ((new-cdatum (complex (dpb new-octet
858 (byte 8 0)
859 (ash (realpart cdatum) 8))
860 (1+ (imagpart cdatum)))))
861 (decode-filter read-octet-fn
862 new-cdatum
863 prefixes
864 filtered-templates)))))
865 (t ; many matches, try to prioritize
866 (let ((prioritized-templates (sort filtered-templates
868 :key #'template-priority)))
869 (assert (> (template-priority (first prioritized-templates))
870 (template-priority (second prioritized-templates)))
871 (cdatum)
872 "Inconsistent template set for datum ~A and prefixes ~A: [~A] v:~A/m:~A"
873 cdatum
874 prefixes
875 (mapcar #'(lambda (tt)
876 (etypecase tt
877 (instr-template (template-instr-classname tt))
878 (prefix-template 'prefix)))
879 filtered-templates)
880 (mapcar #'template-value
881 filtered-templates)
882 (mapcar #'template-mask
883 filtered-templates))
884 ;; We found a match by prioritization.
885 (decode-by-template read-octet-fn
886 cdatum
887 prefixes
888 (first prioritized-templates)))))))
891 (defun decode-read-octet (read-octet-fn)
892 "Disassemble the octets as returned by the (repeated) calling of
893 the READ-OCTET-FN funarg. The READ-OCTET-FN represents a stream of
894 octets which ends when the function returns nil."
895 (multiple-value-bind (templates cdatum)
896 (template-lookup-next read-octet-fn)
897 (decode-filter read-octet-fn
898 cdatum
899 nil ; prefixes
900 templates)))
902 (defun decode-datum (datum datum-numo)
903 "Disassemble a datum (i.e. a byte)."
904 (decode-read-octet #'(lambda () ; return consecutive octets of datum
905 (when (plusp datum-numo)
906 (decf datum-numo)
907 (ldb (byte 8 (* 8 datum-numo)) datum)))))
909 (defun decode-octet-list (&rest octet-list)
910 (let ((ol (copy-list octet-list)))
911 (decode-read-octet #'(lambda ()
912 (pop ol)))))
914 (defun decode (&rest octet-list)
915 (loop while octet-list
916 collect (decode-read-octet #'(lambda ()
917 (pop octet-list)))))
919 (defun gg (&rest octet-list)
920 (let ((i (apply #'ff octet-list)))
921 (cons (type-of i)
922 (mapcar #'operand-listform
923 (instruction-operands i)))))
925 (defun decode-sub-stream (stream length)
926 "Disassemble the contents of <stream> from its current position and
927 <length> octets on."
928 (loop
929 for counter from 0 by 1
930 with loop-length = length
931 while (plusp loop-length)
932 collecting (handler-bind
933 ((error #'(lambda (condition)
934 (declare (ignore condition))
935 (format t "~&;; file-position: ~A, icnt: ~A~%"
936 (file-position stream)
937 counter))))
938 (decode-read-octet #'(lambda ()
939 (when (plusp length)
940 (decf loop-length)
941 (read-byte stream)))))))