From 8775b797d8546448deab512e79de5cfde4facae6 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Fri, 5 Aug 2005 13:13:29 +0000 Subject: [PATCH 01/16] Initial revision --- TODO | 20 + cpuid-vop.lisp | 96 + cpuid.lisp | 139 ++ example-test.lisp | 23 + generate-sse-instructions.lisp | 79 + sbcl-src/makepatch.sh | 4 + sbcl-src/patch_against_sbcl_0_9_3 | 229 +++ sbcl-src/src-093/compiler/x86/insts.lisp | 2670 ++++++++++++++++++++++++++++ sbcl-src/src-093/compiler/x86/vm.lisp | 451 +++++ sbcl-src/src/compiler/x86/insts.lisp | 2794 ++++++++++++++++++++++++++++++ sbcl-src/src/compiler/x86/vm.lisp | 469 +++++ scratch/.emacs.desktop | 82 + scratch/README | 2 + scratch/asm-t1.asm | 10 + scratch/foo.lisp | 159 ++ scratch/sse.lisp | 125 ++ scratch/sse2.lisp | 33 + sse-vop.lisp | 207 +++ 18 files changed, 7592 insertions(+) create mode 100644 TODO create mode 100644 cpuid-vop.lisp create mode 100644 cpuid.lisp create mode 100644 example-test.lisp create mode 100644 generate-sse-instructions.lisp create mode 100755 sbcl-src/makepatch.sh create mode 100644 sbcl-src/patch_against_sbcl_0_9_3 create mode 100644 sbcl-src/src-093/compiler/x86/insts.lisp create mode 100644 sbcl-src/src-093/compiler/x86/vm.lisp create mode 100644 sbcl-src/src/compiler/x86/insts.lisp create mode 100644 sbcl-src/src/compiler/x86/vm.lisp create mode 100644 scratch/.emacs.desktop create mode 100644 scratch/README create mode 100644 scratch/asm-t1.asm create mode 100644 scratch/foo.lisp create mode 100644 scratch/sse.lisp create mode 100644 scratch/sse2.lisp create mode 100644 sse-vop.lisp diff --git a/TODO b/TODO new file mode 100644 index 0000000..0260311 --- /dev/null +++ b/TODO @@ -0,0 +1,20 @@ + +operations from AMD manual: + + +sign: + +andps xmm0, #x8000 0000 8000 0000 .. + +neg: + +xorps xmm0, #x8000 0000 8000 0000 .. + +abs: + +andps xmm0, #x7FFF FFFF 7FFF FFFF .. + +clear: + +xorps xmm0, xmm0 + diff --git a/cpuid-vop.lisp b/cpuid-vop.lisp new file mode 100644 index 0000000..2f79c70 --- /dev/null +++ b/cpuid-vop.lisp @@ -0,0 +1,96 @@ +(in-package :sb-c) +(ignore-errors (defknown cl-user::%read-cpu (unsigned-byte-32 simple-array-unsigned-byte-16) nil)) + +(in-package :sb-vm) + +(defmacro vect-ea (vect idx) + `(make-ea :dword :base ,vect :index ,idx + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + +(define-vop (%read-cpu/x86) + (:translate cl-user::%read-cpu) + (:policy :fast) + + (:args (n :scs (unsigned-reg) :target eax) + (result :scs (descriptor-reg))) + (:arg-types unsigned-byte-32 simple-array-unsigned-byte-16) + + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)) eax) + (:temporary (:sc unsigned-reg :offset ebx-offset) ebx) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) + (:temporary (:sc unsigned-reg :offset edx-offset) edx) + (:temporary (:sc unsigned-reg) index) + + (:generator 10 + + (inst xor index index) + + (inst mov eax n) + + ;; zero regs + (inst xor ebx ebx) + (inst xor ecx ecx) + (inst xor edx edx) + + ;; cpuid + (inst cpuid) + + (inst push edx) + + ;; EAX + (inst mov edx eax) + (inst shr edx 16) + (inst and edx #xFFFF) + (inst mov (vect-ea result index) edx) + (inst add index 2) + + (inst mov edx eax) + (inst and edx #xFFFF) + (inst mov (vect-ea result index) edx) + (inst add index 2) + + (inst pop edx) + + ;; EBX + + (inst mov eax ebx) + (inst shr eax 16) + (inst and eax #xFFFF) + (inst mov (vect-ea result index) eax) + (inst add index 2) + + (inst mov eax ebx) + (inst and eax #xFFFF) + (inst mov (vect-ea result index) eax) + (inst add index 2) + + ;; ECX + + (inst mov eax ecx) + (inst shr eax 16) + (inst and eax #xFFFF) + (inst mov (vect-ea result index) eax) + (inst add index 2) + + (inst mov eax ecx) + (inst and eax #xFFFF) + (inst mov (vect-ea result index) eax) + (inst add index 2) + + ;; EDX + (inst mov eax edx) + (inst shr eax 16) + (inst and eax #xFFFF) + (inst mov (vect-ea result index) eax) + (inst add index 2) + + (inst mov eax edx) + (inst and eax #xFFFF) + (inst mov (vect-ea result index) eax) + (inst add index 2) + )) + + + + + diff --git a/cpuid.lisp b/cpuid.lisp new file mode 100644 index 0000000..55c4f9c --- /dev/null +++ b/cpuid.lisp @@ -0,0 +1,139 @@ +(defpackage :cpuid (:use :cl)) +(in-package :cpuid) + + +(defvar +cpu-std-feat-fields+ + #(:fpu :vme :de :pse :tsc :k86-msr :pae :mce :cmpxchg8b :apic :res10 :sysenter/exit :mtrr :gpe :mca :cmov :pat :pse-36 :psn :clflush :res20 :ds :acpi :mmx :fxsave/rstor :sse :sse2 :self-snoop :htt :tm :res30 :pbe)) + +(defvar +cpu-std-feat-fields-ecx+ + #(:sse3 :res1 :res2 :monitor :ds-cpl :res5 :res6 :eist :tm2 :res9 :cid :res11 :res12 :cmpxchg16b :stpm :res14 :res15 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0)) + +;; low bits are mostly same as in std-feat, so skip them.. +(defvar +cpu-ext-feat-fields+ + #(0 1 2 3 4 5 6 7 8 9 10 :syscall/ret 12 13 14 15 16 17 18 19 :nx 21 :mmx-ext :mmx 24 25 26 27 28 :longmode/em64t :3dnow-ext :3dnow)) + + +(defvar *cpu-features* nil) +(defvar *cpu-vendor* nil) ;; eg authenticamd +(defvar *cpu-name* nil) ;; eg AMD Athlon XP +(defvar *cpu-cache* nil) + + +(defun inspect-cpu () + + (let ((array (make-array 8 :element-type '(unsigned-byte 16) :initial-element 0)) c d) + + ;; vendor + + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x00000000 array) + + (let ((temp (make-array 12 :element-type '(unsigned-byte 8) :initial-element 0))) + (flet ((load-word (i u v) (setf (aref temp (+ i 3)) (ash (aref array u) -8) + (aref temp (+ i 2)) (logand (aref array u) 255) + (aref temp (+ i 1)) (ash (aref array v) -8) + (aref temp (+ i 0)) (logand (aref array v) 255)))) + (loop for i from 0 below 12 by 4 + for (u v) in '((2 3) (6 7) (4 5)) + do (load-word i u v)) + + (setq *cpu-vendor* (map 'string #'code-char temp)))) + + + ;; std features + (setq *cpu-features* nil) + + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x00000001 array) + + (setq +;; a (logior (ash (aref array 0) 16) (aref array 1)) +;; b (logior (ash (aref array 2) 16) (aref array 3)) + c (logior (ash (aref array 4) 16) (aref array 5)) + d (logior (ash (aref array 6) 16) (aref array 7))) + + (loop for i from 0 below 32 do + (if (= (ldb (byte 1 i) d) 1) + (push (aref +cpu-std-feat-fields+ i) *cpu-features*))) + + (loop for i from 0 below 32 do + (if (= (ldb (byte 1 i) c) 1) + (push (aref +cpu-std-feat-fields-ecx+ i) *cpu-features*)))) + ) + +(defun inspect-cpu-ext () + (let ((array (make-array 8 :element-type '(unsigned-byte 16) :initial-element 0)) + c d (max-ext-func 0)) + + ;; determine max ext func + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000000 array) + (setq max-ext-func (logior (ash (aref array 0) 16) (aref array 1))) + + ;; ext features (AMD/Intel) + (if (>= max-ext-func #x80000001) + (progn + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000001 array) + (setq d (logior (ash (aref array 6) 16) (aref array 7))) + + (loop for i from 0 below 32 do + (let ((flag (ldb (byte 1 i) d)) + (feat (aref +cpu-ext-feat-fields+ i))) + (if (and (= flag 1) (keywordp feat)) + (pushnew feat *cpu-features*)))))) + + ;; cpu name (AMD/Intel) + (setq *cpu-name* nil) + (if (>= max-ext-func #x80000004) + (let ((cpuname #()) + (temp (make-array 16 :element-type '(unsigned-byte 8) :initial-element 0))) + (flet ((load-word (i u v) (setf (aref temp (+ i 3)) (ash (aref array u) -8) + (aref temp (+ i 2)) (logand (aref array u) 255) + (aref temp (+ i 1)) (ash (aref array v) -8) + (aref temp (+ i 0)) (logand (aref array v) 255)))) + (flet ((conc-word () + (loop for i from 0 below 16 by 4 + for (u v) in '((0 1) (2 3) (4 5) (6 7)) + do + (load-word i u v)) + (setq cpuname (concatenate '(simple-array (unsigned-byte 8) (*)) cpuname temp)))) + + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000002 array) (conc-word) + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000003 array) (conc-word) + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000004 array) (conc-word) + )) + + ;; cut to null + (if (position 0 cpuname) + (setq cpuname (subseq cpuname 0 (position 0 cpuname)))) + ;; coerce to string + (setq cpuname (map 'string #'code-char cpuname)) + (setq *cpu-name* cpuname))) + + ;; cache (AMD) + (setq *cpu-cache* nil) + (if (>= max-ext-func #x80000005) + (progn + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000005 array) + (setq c (logior (ash (aref array 4) 16) (aref array 5)) + d (logior (ash (aref array 6) 16) (aref array 7))) + + (push (list :l1-data :size (* 1024 (ldb (byte 8 24) c))) *cpu-cache*) + (push (list :l1-inst :size (* 1024 (ldb (byte 8 24) d))) *cpu-cache*))) + + + (if (>= max-ext-func #x80000006) + (progn + (sb-sys:%primitive sb-vm::%read-cpu/x86 #x80000006 array) + (setq c (logior (ash (aref array 4) 16) (aref array 5))) + (push (list :l2 :size (* 1024 (ldb (byte 16 16) c))) *cpu-cache*))) + + t)) + +(defun dump-cpu () + + ;; dump + + (format t "~&") + (format t "Vendor: ~A.~%" *cpu-vendor*) + (format t "Features: ~S.~%" *cpu-features*) + (format t "cpu: ~A, cache: ~S.~%" *cpu-name* *cpu-cache*) + ) diff --git a/example-test.lisp b/example-test.lisp new file mode 100644 index 0000000..222f522 --- /dev/null +++ b/example-test.lisp @@ -0,0 +1,23 @@ +(in-package :cl-user) + +(defun test-foo () + (let ((arr1 (make-array 10 :element-type 'single-float :initial-element 0f0)) + (arr2 (make-array 10 :element-type 'single-float :initial-element 0f0))) + + (loop for i from 0 below 10 + do (setf + (aref arr1 i) (float (* i 100)) + (aref arr2 i) (float i))) + + (format t "Before: ~S~%~S~%" arr1 arr2) + + (sb-sys:%primitive sb-vm::%sse-sqrt/simple-array-single-float-1 arr2 arr1 4) + + (format t "After: ~S~%~S~%" arr1 arr2) + + (sb-sys:%primitive sb-vm::%sse-recip/simple-array-single-float-1 arr1 arr2 4) + + (format t "After: ~S~%~S~%" arr1 arr2) + + )) + diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp new file mode 100644 index 0000000..6717ebc --- /dev/null +++ b/generate-sse-instructions.lisp @@ -0,0 +1,79 @@ +#| + +instruction reference: + +http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26568.pdf + +|# + +(declaim (optimize (debug 3))) + +(defun emit-ops (ops) + (loop for op in ops + collect `(emit-byte segment ,op) into result + finally (return result))) + + +(defun gen-ops (&optional (stream t)) + + ;; single prec packed sse + ;;; like : + ;;; ADDPS xmm1, xmm2/mem128 0F 58 /r + (loop for (inst . ops) in + '( + ;; single precision float + (addps #x0F #x58) + (addsubps #xF2 #x0F #xD0) + (andnps #x0F #x55) + (andps #x0F #x54) + (divps #x0F #x5E) + (maxps #x0F #x5F) + (minps #x0F #x5D) + (mulps #x0F #x59) + (orps #x0F #x56) + (rcpps #x0F #x53) + (rsqrtps #x0F #x52) + (sqrtps #x0F #x51) + (subps #x0F #x5C) + (xorps #x0F #x57) + ;; double precision float + (addpd #x66 #x0F #x58) + (addsubpd #x66 #x0F #xD0) + (andnpd #x66 #x0F #x55) + (andpd #x66 #x0F #x54) + (divpd #x66 #x0F #x5E) + (maxpd #x66 #x0F #x5F) + (minpd #x66 #x0F #x5D) + (mulpd #x66 #x0F #x59) + (orps #x66 #x0F #x56) + (rcppd #x66 #x0F #x53) + (rsqrtpd #x66 #x0F #x52) + (sqrtpd #x66 #x0F #x51) + (subpd #x66 #x0F #x5C) + (xorpd #x66 #x0F #x57) + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name inst)) (segment dst src) + (:emitter + ,@(emit-ops ops) + (emit-ea segment src (reg-tn-encoding dst)))))) + + ;; MOVUPS + (loop for (inst ops-m2r ops-r2m) in + '( + (movups (#x0F #x10) (#x0F #x11)) + (movupd (#x66 #x0F #x10) (#x66 #x0F #x11))) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name inst)) (segment dst src) + (:emitter + (cond ((sse-register-p dst) + ,@(emit-ops ops-m2r) + (emit-ea segment src (reg-tn-encoding dst))) + (t ,@(emit-ops ops-r2m) + (emit-ea segment dst (reg-tn-encoding src))))))))) + +(defun gen-ops-to-file (filename) + (with-open-file (stream filename :direction :output :if-exists :supersede) + (gen-ops stream))) \ No newline at end of file diff --git a/sbcl-src/makepatch.sh b/sbcl-src/makepatch.sh new file mode 100755 index 0000000..641b804 --- /dev/null +++ b/sbcl-src/makepatch.sh @@ -0,0 +1,4 @@ +#!/bin/sh +find . -name '.emacs*' |xargs rm 2>/dev/null +find . -name '*~' |xargs rm 2>/dev/null +diff -Naur src-093 src > patch_against_sbcl_0_9_3 diff --git a/sbcl-src/patch_against_sbcl_0_9_3 b/sbcl-src/patch_against_sbcl_0_9_3 new file mode 100644 index 0000000..234ef9d --- /dev/null +++ b/sbcl-src/patch_against_sbcl_0_9_3 @@ -0,0 +1,229 @@ +diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp +--- src-093/compiler/x86/insts.lisp 2005-08-05 15:31:17.723664255 +0300 ++++ src/compiler/x86/insts.lisp 2005-08-05 15:42:36.536109257 +0300 +@@ -192,6 +192,7 @@ + (:byte 8) + (:word 16) + (:dword 32) ++ (:dqword 128) + (:float 32) + (:double 64))) + +@@ -671,7 +672,7 @@ + + (defun reg-tn-encoding (tn) + (declare (type tn tn)) +- (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) ++; (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) +@@ -718,6 +719,8 @@ + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) ++ (sse-registers ++ (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) +@@ -830,6 +833,10 @@ + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + ++(defun sse-register-p (thing) ++ (and (tn-p thing) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) ++ + (defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) +@@ -2042,6 +2049,123 @@ + (:emitter + (emit-header-data segment return-pc-header-widetag))) + ++ ++;;;; SSE instructions ++;;;; ++;;;; Automatically generated ++ ++ ++(DEFINE-INSTRUCTION ADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++;;; SSE MOVE ++ ++(DEFINE-INSTRUCTION MOVUPS (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++ ++;;; CPUID ++ ++ ++(define-instruction cpuid (segment) ++ (:emitter ++ (emit-byte segment #x0F) ++ (emit-byte segment #xA2))) ++ ++ ++ ++ ++ + ;;;; fp instructions + ;;;; + ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +diff -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp +--- src-093/compiler/x86/vm.lisp 2005-08-05 15:32:19.810183044 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-05 15:38:26.784310770 +0300 +@@ -21,7 +21,8 @@ + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) +- (defvar *float-register-names* (make-array 8 :initial-element nil))) ++ (defvar *float-register-names* (make-array 8 :initial-element nil)) ++ (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + + (macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) +@@ -91,6 +92,17 @@ + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ++ ;; sse registers ++ (defreg xmm0 0 :dqword) ++ (defreg xmm1 1 :dqword) ++ (defreg xmm2 2 :dqword) ++ (defreg xmm3 3 :dqword) ++ (defreg xmm4 4 :dqword) ++ (defreg xmm5 5 :dqword) ++ (defreg xmm6 6 :dqword) ++ (defreg xmm7 7 :dqword) ++ (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers +@@ -118,6 +130,8 @@ + ;;; the new way: + (define-storage-base float-registers :finite :size 8) + ++(define-storage-base sse-registers :finite :size 8) ++ + (define-storage-base stack :unbounded :size 8) + (define-storage-base constant :non-packed) + (define-storage-base immediate-constant :non-packed) +@@ -320,6 +334,8 @@ + :save-p t + :alternate-scs (complex-long-stack)) + ++ (sse-reg sse-registers ++ :locations #.*sse-regs*) + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +@@ -337,6 +353,7 @@ + ;;; These are used to (at least) determine operand size. + (defparameter *float-sc-names* '(single-reg)) + (defparameter *double-sc-names* '(double-reg double-stack)) ++(defparameter *dqword-sc-names* '(sse-reg)) + ) ; EVAL-WHEN + + ;;;; miscellaneous TNs for the various registers +@@ -444,6 +461,7 @@ + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) ++ (sse-registers (format nil "XMM~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") diff --git a/sbcl-src/src-093/compiler/x86/insts.lisp b/sbcl-src/src-093/compiler/x86/insts.lisp new file mode 100644 index 0000000..9eed6f8 --- /dev/null +++ b/sbcl-src/src-093/compiler/x86/insts.lisp @@ -0,0 +1,2670 @@ +;;;; that part of the description of the x86 instruction set (for +;;;; 80386 and above) which can live on the cross-compilation host + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") +;;; FIXME: SB!DISASSEM: prefixes are used so widely in this file that +;;; I wonder whether the separation of the disassembler from the +;;; virtual machine is valid or adds value. + +;;; Note: In CMU CL, this used to be a call to SET-DISASSEM-PARAMS. +(setf sb!disassem:*disassem-inst-alignment-bytes* 1) + +(deftype reg () '(unsigned-byte 3)) + +(def!constant +default-operand-size+ :dword) + +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) + +(defun offset-next (value dstate) + (declare (type integer value) + (type sb!disassem:disassem-state dstate)) + (+ (sb!disassem:dstate-next-addr dstate) value)) + +(defparameter *default-address-size* + ;; Actually, :DWORD is the only one really supported. + :dword) + +(defparameter *byte-reg-names* + #(al cl dl bl ah ch dh bh)) +(defparameter *word-reg-names* + #(ax cx dx bx sp bp si di)) +(defparameter *dword-reg-names* + #(eax ecx edx ebx esp ebp esi edi)) + +(defun print-reg-with-width (value width stream dstate) + (declare (ignore dstate)) + (princ (aref (ecase width + (:byte *byte-reg-names*) + (:word *word-reg-names*) + (:dword *dword-reg-names*)) + value) + stream) + ;; XXX plus should do some source-var notes + ) + +(defun print-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value + (sb!disassem:dstate-get-prop dstate 'width) + stream + dstate)) + +(defun print-word-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+) + stream + dstate)) + +(defun print-byte-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value :byte stream dstate)) + +(defun print-addr-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value *default-address-size* stream dstate)) + +(defun print-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-reg value stream dstate) + (print-mem-access value stream nil dstate))) + +;; Same as print-reg/mem, but prints an explicit size indicator for +;; memory references. +(defun print-sized-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-reg value stream dstate) + (print-mem-access value stream t dstate))) + +(defun print-byte-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-byte-reg value stream dstate) + (print-mem-access value stream t dstate))) + +(defun print-word-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-word-reg value stream dstate) + (print-mem-access value stream nil dstate))) + +(defun print-label (value stream dstate) + (declare (ignore dstate)) + (sb!disassem:princ16 value stream)) + +;;; Returns either an integer, meaning a register, or a list of +;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component +;;; may be missing or nil to indicate that it's not used or has the +;;; obvious default value (e.g., 1 for the index-scale). +(defun prefilter-reg/mem (value dstate) + (declare (type list value) + (type sb!disassem:disassem-state dstate)) + (let ((mod (car value)) + (r/m (cadr value))) + (declare (type (unsigned-byte 2) mod) + (type (unsigned-byte 3) r/m)) + (cond ((= mod #b11) + ;; registers + r/m) + ((= r/m #b100) + ;; sib byte + (let ((sib (sb!disassem:read-suffix 8 dstate))) + (declare (type (unsigned-byte 8) sib)) + (let ((base-reg (ldb (byte 3 0) sib)) + (index-reg (ldb (byte 3 3) sib)) + (index-scale (ldb (byte 2 6) sib))) + (declare (type (unsigned-byte 3) base-reg index-reg) + (type (unsigned-byte 2) index-scale)) + (let* ((offset + (case mod + (#b00 + (if (= base-reg #b101) + (sb!disassem:read-signed-suffix 32 dstate) + nil)) + (#b01 + (sb!disassem:read-signed-suffix 8 dstate)) + (#b10 + (sb!disassem:read-signed-suffix 32 dstate))))) + (list (if (and (= mod #b00) (= base-reg #b101)) nil base-reg) + offset + (if (= index-reg #b100) nil index-reg) + (ash 1 index-scale)))))) + ((and (= mod #b00) (= r/m #b101)) + (list nil (sb!disassem:read-signed-suffix 32 dstate)) ) + ((= mod #b00) + (list r/m)) + ((= mod #b01) + (list r/m (sb!disassem:read-signed-suffix 8 dstate))) + (t ; (= mod #b10) + (list r/m (sb!disassem:read-signed-suffix 32 dstate)))))) + + +;;; This is a sort of bogus prefilter that just stores the info globally for +;;; other people to use; it probably never gets printed. +(defun prefilter-width (value dstate) + (setf (sb!disassem:dstate-get-prop dstate 'width) + (if (zerop value) + :byte + (let ((word-width + ;; set by a prefix instruction + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (when (not (eql word-width +default-operand-size+)) + ;; Reset it. + (setf (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+)) + word-width)))) + +(defun read-address (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix (width-bits *default-address-size*) dstate)) + +(defun width-bits (width) + (ecase width + (:byte 8) + (:word 16) + (:dword 32) + (:float 32) + (:double 64))) + +) ; EVAL-WHEN + +;;;; disassembler argument types + +(sb!disassem:define-arg-type displacement + :sign-extend t + :use-label #'offset-next + :printer (lambda (value stream dstate) + (sb!disassem:maybe-note-assembler-routine value nil dstate) + (print-label value stream dstate))) + +(sb!disassem:define-arg-type accum + :printer (lambda (value stream dstate) + (declare (ignore value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg 0 stream dstate))) + +(sb!disassem:define-arg-type word-accum + :printer (lambda (value stream dstate) + (declare (ignore value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-word-reg 0 stream dstate))) + +(sb!disassem:define-arg-type reg + :printer #'print-reg) + +(sb!disassem:define-arg-type addr-reg + :printer #'print-addr-reg) + +(sb!disassem:define-arg-type word-reg + :printer #'print-word-reg) + +(sb!disassem:define-arg-type imm-addr + :prefilter #'read-address + :printer #'print-label) + +(sb!disassem:define-arg-type imm-data + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix + (width-bits (sb!disassem:dstate-get-prop dstate 'width)) + dstate))) + +(sb!disassem:define-arg-type signed-imm-data + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width (sb!disassem:dstate-get-prop dstate 'width))) + (sb!disassem:read-signed-suffix (width-bits width) dstate)))) + +(sb!disassem:define-arg-type signed-imm-byte + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 8 dstate))) + +(sb!disassem:define-arg-type signed-imm-dword + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate))) + +(sb!disassem:define-arg-type imm-word + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (sb!disassem:read-suffix (width-bits width) dstate)))) + +(sb!disassem:define-arg-type signed-imm-word + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (sb!disassem:read-signed-suffix (width-bits width) dstate)))) + +;;; needed for the ret imm16 instruction +(sb!disassem:define-arg-type imm-word-16 + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix 16 dstate))) + +(sb!disassem:define-arg-type reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-reg/mem) +(sb!disassem:define-arg-type sized-reg/mem + ;; Same as reg/mem, but prints an explicit size indicator for + ;; memory references. + :prefilter #'prefilter-reg/mem + :printer #'print-sized-reg/mem) +(sb!disassem:define-arg-type byte-reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-byte-reg/mem) +(sb!disassem:define-arg-type word-reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-word-reg/mem) + +;;; added by jrd +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) +(defun print-fp-reg (value stream dstate) + (declare (ignore dstate)) + (format stream "FR~D" value)) +(defun prefilter-fp-reg (value dstate) + ;; just return it + (declare (ignore dstate)) + value) +) ; EVAL-WHEN +(sb!disassem:define-arg-type fp-reg + :prefilter #'prefilter-fp-reg + :printer #'print-fp-reg) + +(sb!disassem:define-arg-type width + :prefilter #'prefilter-width + :printer (lambda (value stream dstate) + (if;; (zerop value) + (or (null value) + (and (numberp value) (zerop value))) ; zzz jrd + (princ 'b stream) + (let ((word-width + ;; set by a prefix instruction + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (princ (schar (symbol-name word-width) 0) stream))))) + +(eval-when (:compile-toplevel :load-toplevel :execute) +(defparameter *conditions* + '((:o . 0) + (:no . 1) + (:b . 2) (:nae . 2) (:c . 2) + (:nb . 3) (:ae . 3) (:nc . 3) + (:eq . 4) (:e . 4) (:z . 4) + (:ne . 5) (:nz . 5) + (:be . 6) (:na . 6) + (:nbe . 7) (:a . 7) + (:s . 8) + (:ns . 9) + (:p . 10) (:pe . 10) + (:np . 11) (:po . 11) + (:l . 12) (:nge . 12) + (:nl . 13) (:ge . 13) + (:le . 14) (:ng . 14) + (:nle . 15) (:g . 15))) +(defparameter *condition-name-vec* + (let ((vec (make-array 16 :initial-element nil))) + (dolist (cond *conditions*) + (when (null (aref vec (cdr cond))) + (setf (aref vec (cdr cond)) (car cond)))) + vec)) +) ; EVAL-WHEN + +;;; Set assembler parameters. (In CMU CL, this was done with +;;; a call to a macro DEF-ASSEMBLER-PARAMS.) +(eval-when (:compile-toplevel :load-toplevel :execute) + (setf sb!assem:*assem-scheduler-p* nil)) + +(sb!disassem:define-arg-type condition-code + :printer *condition-name-vec*) + +(defun conditional-opcode (condition) + (cdr (assoc condition *conditions* :test #'eq))) + +;;;; disassembler instruction formats + +(eval-when (:compile-toplevel :execute) + (defun swap-if (direction field1 separator field2) + `(:if (,direction :constant 0) + (,field1 ,separator ,field2) + (,field2 ,separator ,field1)))) + +(sb!disassem:define-instruction-format (byte 8 :default-printer '(:name)) + (op :field (byte 8 0)) + ;; optional fields + (accum :type 'accum) + (imm)) + +(sb!disassem:define-instruction-format (simple 8) + (op :field (byte 7 1)) + (width :field (byte 1 0) :type 'width) + ;; optional fields + (accum :type 'accum) + (imm)) + +;;; Same as simple, but with direction bit +(sb!disassem:define-instruction-format (simple-dir 8 :include 'simple) + (op :field (byte 6 2)) + (dir :field (byte 1 1))) + +;;; Same as simple, but with the immediate value occurring by default, +;;; and with an appropiate printer. +(sb!disassem:define-instruction-format (accum-imm 8 + :include 'simple + :default-printer '(:name + :tab accum ", " imm)) + (imm :type 'imm-data)) + +(sb!disassem:define-instruction-format (reg-no-width 8 + :default-printer '(:name :tab reg)) + (op :field (byte 5 3)) + (reg :field (byte 3 0) :type 'word-reg) + ;; optional fields + (accum :type 'word-accum) + (imm)) + +;;; adds a width field to reg-no-width +(sb!disassem:define-instruction-format (reg 8 + :default-printer '(:name :tab reg)) + (op :field (byte 4 4)) + (width :field (byte 1 3) :type 'width) + (reg :field (byte 3 0) :type 'reg) + ;; optional fields + (accum :type 'accum) + (imm) + ) + +;;; Same as reg, but with direction bit +(sb!disassem:define-instruction-format (reg-dir 8 :include 'reg) + (op :field (byte 3 5)) + (dir :field (byte 1 4))) + +(sb!disassem:define-instruction-format (two-bytes 16 + :default-printer '(:name)) + (op :fields (list (byte 8 0) (byte 8 8)))) + +(sb!disassem:define-instruction-format (reg-reg/mem 16 + :default-printer + `(:name :tab reg ", " reg/mem)) + (op :field (byte 7 1)) + (width :field (byte 1 0) :type 'width) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) + :type 'reg/mem) + (reg :field (byte 3 11) :type 'reg) + ;; optional fields + (imm)) + +;;; same as reg-reg/mem, but with direction bit +(sb!disassem:define-instruction-format (reg-reg/mem-dir 16 + :include 'reg-reg/mem + :default-printer + `(:name + :tab + ,(swap-if 'dir 'reg/mem ", " 'reg))) + (op :field (byte 6 2)) + (dir :field (byte 1 1))) + +;;; Same as reg-rem/mem, but uses the reg field as a second op code. +(sb!disassem:define-instruction-format (reg/mem 16 + :default-printer '(:name :tab reg/mem)) + (op :fields (list (byte 7 1) (byte 3 11))) + (width :field (byte 1 0) :type 'width) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) + :type 'sized-reg/mem) + ;; optional fields + (imm)) + +;;; Same as reg/mem, but with the immediate value occurring by default, +;;; and with an appropiate printer. +(sb!disassem:define-instruction-format (reg/mem-imm 16 + :include 'reg/mem + :default-printer + '(:name :tab reg/mem ", " imm)) + (reg/mem :type 'sized-reg/mem) + (imm :type 'imm-data)) + +;;; Same as reg/mem, but with using the accumulator in the default printer +(sb!disassem:define-instruction-format + (accum-reg/mem 16 + :include 'reg/mem :default-printer '(:name :tab accum ", " reg/mem)) + (reg/mem :type 'reg/mem) ; don't need a size + (accum :type 'accum)) + +;;; Same as reg-reg/mem, but with a prefix of #b00001111 +(sb!disassem:define-instruction-format (ext-reg-reg/mem 24 + :default-printer + `(:name :tab reg ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 7 9)) + (width :field (byte 1 8) :type 'width) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'reg/mem) + (reg :field (byte 3 19) :type 'reg) + ;; optional fields + (imm)) + +;;; Same as reg/mem, but with a prefix of #b00001111 +(sb!disassem:define-instruction-format (ext-reg/mem 24 + :default-printer '(:name :tab reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :fields (list (byte 7 9) (byte 3 19))) + (width :field (byte 1 8) :type 'width) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'sized-reg/mem) + ;; optional fields + (imm)) + +(sb!disassem:define-instruction-format (ext-reg/mem-imm 24 + :include 'ext-reg/mem + :default-printer + '(:name :tab reg/mem ", " imm)) + (imm :type 'imm-data)) + +;;;; This section was added by jrd, for fp instructions. + +;;; regular fp inst to/from registers/memory +(sb!disassem:define-instruction-format (floating-point 16 + :default-printer + `(:name :tab reg/mem)) + (prefix :field (byte 5 3) :value #b11011) + (op :fields (list (byte 3 0) (byte 3 11))) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) :type 'reg/mem)) + +;;; fp insn to/from fp reg +(sb!disassem:define-instruction-format (floating-point-fp 16 + :default-printer `(:name :tab fp-reg)) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 3 0) (byte 3 11))) + (fp-reg :field (byte 3 8) :type 'fp-reg)) + +;;; fp insn to/from fp reg, with the reversed source/destination flag. +(sb!disassem:define-instruction-format + (floating-point-fp-d 16 + :default-printer `(:name :tab ,(swap-if 'd "ST0" ", " 'fp-reg))) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 2 0) (byte 3 11))) + (d :field (byte 1 2)) + (fp-reg :field (byte 3 8) :type 'fp-reg)) + + +;;; (added by (?) pfw) +;;; fp no operand isns +(sb!disassem:define-instruction-format (floating-point-no 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011001) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (floating-point-3 16 + :default-printer '(:name)) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 3 0) (byte 6 8)))) + +(sb!disassem:define-instruction-format (floating-point-5 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011011) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (floating-point-st 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011111) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (string-op 8 + :include 'simple + :default-printer '(:name width))) + +(sb!disassem:define-instruction-format (short-cond-jump 16) + (op :field (byte 4 4)) + (cc :field (byte 4 0) :type 'condition-code) + (label :field (byte 8 8) :type 'displacement)) + +(sb!disassem:define-instruction-format (short-jump 16 + :default-printer '(:name :tab label)) + (const :field (byte 4 4) :value #b1110) + (op :field (byte 4 0)) + (label :field (byte 8 8) :type 'displacement)) + +(sb!disassem:define-instruction-format (near-cond-jump 16) + (op :fields (list (byte 8 0) (byte 4 12)) :value '(#b00001111 #b1000)) + (cc :field (byte 4 8) :type 'condition-code) + ;; The disassembler currently doesn't let you have an instruction > 32 bits + ;; long, so we fake it by using a prefilter to read the offset. + (label :type 'displacement + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate)))) + +(sb!disassem:define-instruction-format (near-jump 8 + :default-printer '(:name :tab label)) + (op :field (byte 8 0)) + ;; The disassembler currently doesn't let you have an instruction > 32 bits + ;; long, so we fake it by using a prefilter to read the address. + (label :type 'displacement + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate)))) + + +(sb!disassem:define-instruction-format (cond-set 24 + :default-printer '('set cc :tab reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 4 12) :value #b1001) + (cc :field (byte 4 8) :type 'condition-code) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'byte-reg/mem) + (reg :field (byte 3 19) :value #b000)) + +(sb!disassem:define-instruction-format (cond-move 24 + :default-printer + '('cmov cc :tab reg ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 4 12) :value #b0100) + (cc :field (byte 4 8) :type 'condition-code) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'reg/mem) + (reg :field (byte 3 19) :type 'reg)) + +(sb!disassem:define-instruction-format (enter-format 32 + :default-printer '(:name + :tab disp + (:unless (:constant 0) + ", " level))) + (op :field (byte 8 0)) + (disp :field (byte 16 8)) + (level :field (byte 8 24))) + +(sb!disassem:define-instruction-format (prefetch 24 + :default-printer + '(:name ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 8 8) :value #b00011000) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) :type 'byte-reg/mem) + (reg :field (byte 3 19) :type 'reg)) + +;;; Single byte instruction with an immediate byte argument. +(sb!disassem:define-instruction-format (byte-imm 16 + :default-printer '(:name :tab code)) + (op :field (byte 8 0)) + (code :field (byte 8 8))) + +;;;; primitive emitters + +(define-bitfield-emitter emit-word 16 + (byte 16 0)) + +(define-bitfield-emitter emit-dword 32 + (byte 32 0)) + +(define-bitfield-emitter emit-byte-with-reg 8 + (byte 5 3) (byte 3 0)) + +(define-bitfield-emitter emit-mod-reg-r/m-byte 8 + (byte 2 6) (byte 3 3) (byte 3 0)) + +(define-bitfield-emitter emit-sib-byte 8 + (byte 2 6) (byte 3 3) (byte 3 0)) + +;;;; fixup emitters + +(defun emit-absolute-fixup (segment fixup) + (note-fixup segment :absolute fixup) + (let ((offset (fixup-offset fixup))) + (if (label-p offset) + (emit-back-patch segment + 4 ; FIXME: n-word-bytes + (lambda (segment posn) + (declare (ignore posn)) + (emit-dword segment + (- (+ (component-header-length) + (or (label-position offset) + 0)) + other-pointer-lowtag)))) + (emit-dword segment (or offset 0))))) + +(defun emit-relative-fixup (segment fixup) + (note-fixup segment :relative fixup) + (emit-dword segment (or (fixup-offset fixup) 0))) + +;;;; the effective-address (ea) structure + +(defun reg-tn-encoding (tn) + (declare (type tn tn)) + (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) + +(defstruct (ea (:constructor make-ea (size &key base index scale disp)) + (:copier nil)) + (size nil :type (member :byte :word :dword)) + (base nil :type (or tn null)) + (index nil :type (or tn null)) + (scale 1 :type (member 1 2 4 8)) + (disp 0 :type (or (unsigned-byte 32) (signed-byte 32) fixup))) +(def!method print-object ((ea ea) stream) + (cond ((or *print-escape* *print-readably*) + (print-unreadable-object (ea stream :type t) + (format stream + "~S~@[ base=~S~]~@[ index=~S~]~@[ scale=~S~]~@[ disp=~S~]" + (ea-size ea) + (ea-base ea) + (ea-index ea) + (let ((scale (ea-scale ea))) + (if (= scale 1) nil scale)) + (ea-disp ea)))) + (t + (format stream "~A PTR [" (symbol-name (ea-size ea))) + (when (ea-base ea) + (write-string (sb!c::location-print-name (ea-base ea)) stream) + (when (ea-index ea) + (write-string "+" stream))) + (when (ea-index ea) + (write-string (sb!c::location-print-name (ea-index ea)) stream)) + (unless (= (ea-scale ea) 1) + (format stream "*~A" (ea-scale ea))) + (typecase (ea-disp ea) + (null) + (integer + (format stream "~@D" (ea-disp ea))) + (t + (format stream "+~A" (ea-disp ea)))) + (write-char #\] stream)))) + +(defun emit-ea (segment thing reg &optional allow-constants) + (etypecase thing + (tn + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) + (cond ((< -128 disp 127) + (emit-mod-reg-r/m-byte segment #b01 reg #b101) + (emit-byte segment disp)) + (t + (emit-mod-reg-r/m-byte segment #b10 reg #b101) + (emit-dword segment disp))))) + (constant + (unless allow-constants + (error + "Constant TNs can only be directly used in MOV, PUSH, and CMP.")) + (emit-mod-reg-r/m-byte segment #b00 reg #b101) + (emit-absolute-fixup segment + (make-fixup nil + :code-object + (- (* (tn-offset thing) n-word-bytes) + other-pointer-lowtag)))))) + (ea + (let* ((base (ea-base thing)) + (index (ea-index thing)) + (scale (ea-scale thing)) + (disp (ea-disp thing)) + (mod (cond ((or (null base) + (and (eql disp 0) + (not (= (reg-tn-encoding base) #b101)))) + #b00) + ((and (fixnump disp) (<= -128 disp 127)) + #b01) + (t + #b10))) + (r/m (cond (index #b100) + ((null base) #b101) + (t (reg-tn-encoding base))))) + (emit-mod-reg-r/m-byte segment mod reg r/m) + (when (= r/m #b100) + (let ((ss (1- (integer-length scale))) + (index (if (null index) + #b100 + (let ((index (reg-tn-encoding index))) + (if (= index #b100) + (error "can't index off of ESP") + index)))) + (base (if (null base) + #b101 + (reg-tn-encoding base)))) + (emit-sib-byte segment ss index base))) + (cond ((= mod #b01) + (emit-byte segment disp)) + ((or (= mod #b10) (null base)) + (if (fixup-p disp) + (emit-absolute-fixup segment disp) + (emit-dword segment disp)))))) + (fixup + (emit-mod-reg-r/m-byte segment #b00 reg #b101) + (emit-absolute-fixup segment thing)))) + +(defun fp-reg-tn-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'float-registers))) + +;;; like the above, but for fp-instructions--jrd +(defun emit-fp-op (segment thing op) + (if (fp-reg-tn-p thing) + (emit-byte segment (dpb op (byte 3 3) (dpb (tn-offset thing) + (byte 3 0) + #b11000000))) + (emit-ea segment thing op))) + +(defun byte-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *byte-sc-names*) + t)) + +(defun byte-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :byte)) + (tn + (and (member (sc-name (tn-sc thing)) *byte-sc-names*) t)) + (t nil))) + +(defun word-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *word-sc-names*) + t)) + +(defun word-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :word)) + (tn (and (member (sc-name (tn-sc thing)) *word-sc-names*) t)) + (t nil))) + +(defun dword-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *dword-sc-names*) + t)) + +(defun dword-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :dword)) + (tn + (and (member (sc-name (tn-sc thing)) *dword-sc-names*) t)) + (t nil))) + +(defun register-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + +(defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) + +;;;; utilities + +(def!constant +operand-size-prefix-byte+ #b01100110) + +(defun maybe-emit-operand-size-prefix (segment size) + (unless (or (eq size :byte) (eq size +default-operand-size+)) + (emit-byte segment +operand-size-prefix-byte+))) + +(defun operand-size (thing) + (typecase thing + (tn + ;; FIXME: might as well be COND instead of having to use #. readmacro + ;; to hack up the code + (case (sc-name (tn-sc thing)) + (#.*dword-sc-names* + :dword) + (#.*word-sc-names* + :word) + (#.*byte-sc-names* + :byte) + ;; added by jrd: float-registers is a separate size (?) + (#.*float-sc-names* + :float) + (#.*double-sc-names* + :double) + (t + (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) + (ea + (ea-size thing)) + (t + nil))) + +(defun matching-operand-size (dst src) + (let ((dst-size (operand-size dst)) + (src-size (operand-size src))) + (if dst-size + (if src-size + (if (eq dst-size src-size) + dst-size + (error "size mismatch: ~S is a ~S and ~S is a ~S." + dst dst-size src src-size)) + dst-size) + (if src-size + src-size + (error "can't tell the size of either ~S or ~S" dst src))))) + +(defun emit-sized-immediate (segment size value) + (ecase size + (:byte + (emit-byte segment value)) + (:word + (emit-word segment value)) + (:dword + (emit-dword segment value)))) + +;;;; general data transfer + +(define-instruction mov (segment dst src) + ;; immediate to register + (:printer reg ((op #b1011) (imm nil :type 'imm-data)) + '(:name :tab reg ", " imm)) + ;; absolute mem to/from accumulator + (:printer simple-dir ((op #b101000) (imm nil :type 'imm-addr)) + `(:name :tab ,(swap-if 'dir 'accum ", " '("[" imm "]")))) + ;; register to/from register/memory + (:printer reg-reg/mem-dir ((op #b100010))) + ;; immediate to register/memory + (:printer reg/mem-imm ((op '(#b1100011 #b000)))) + + (:emitter + (let ((size (matching-operand-size dst src))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p dst) + (cond ((integerp src) + (emit-byte-with-reg segment + (if (eq size :byte) + #b10110 + #b10111) + (reg-tn-encoding dst)) + (emit-sized-immediate segment size src)) + ((and (fixup-p src) (accumulator-p dst)) + (emit-byte segment + (if (eq size :byte) + #b10100000 + #b10100001)) + (emit-absolute-fixup segment src)) + (t + (emit-byte segment + (if (eq size :byte) + #b10001010 + #b10001011)) + (emit-ea segment src (reg-tn-encoding dst) t)))) + ((and (fixup-p dst) (accumulator-p src)) + (emit-byte segment (if (eq size :byte) #b10100010 #b10100011)) + (emit-absolute-fixup segment dst)) + ((integerp src) + (emit-byte segment (if (eq size :byte) #b11000110 #b11000111)) + (emit-ea segment dst #b000) + (emit-sized-immediate segment size src)) + ((register-p src) + (emit-byte segment (if (eq size :byte) #b10001000 #b10001001)) + (emit-ea segment dst (reg-tn-encoding src))) + ((fixup-p src) + (aver (eq size :dword)) + (emit-byte segment #b11000111) + (emit-ea segment dst #b000) + (emit-absolute-fixup segment src)) + (t + (error "bogus arguments to MOV: ~S ~S" dst src)))))) + +(defun emit-move-with-extension (segment dst src opcode) + (aver (register-p dst)) + (let ((dst-size (operand-size dst)) + (src-size (operand-size src))) + (ecase dst-size + (:word + (aver (eq src-size :byte)) + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b00001111) + (emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))) + (:dword + (ecase src-size + (:byte + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b00001111) + (emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))) + (:word + (emit-byte segment #b00001111) + (emit-byte segment (logior opcode 1)) + (emit-ea segment src (reg-tn-encoding dst)))))))) + +(define-instruction movsx (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011111) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src #b10111110))) + +(define-instruction movzx (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011011) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src #b10110110))) + +(define-instruction push (segment src) + ;; register + (:printer reg-no-width ((op #b01010))) + ;; register/memory + (:printer reg/mem ((op '(#b1111111 #b110)) (width 1))) + ;; immediate + (:printer byte ((op #b01101010) (imm nil :type 'signed-imm-byte)) + '(:name :tab imm)) + (:printer byte ((op #b01101000) (imm nil :type 'imm-word)) + '(:name :tab imm)) + ;; ### segment registers? + + (:emitter + (cond ((integerp src) + (cond ((<= -128 src 127) + (emit-byte segment #b01101010) + (emit-byte segment src)) + (t + (emit-byte segment #b01101000) + (emit-dword segment src)))) + ((fixup-p src) + ;; Interpret the fixup as an immediate dword to push. + (emit-byte segment #b01101000) + (emit-absolute-fixup segment src)) + (t + (let ((size (operand-size src))) + (aver (not (eq size :byte))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p src) + (emit-byte-with-reg segment #b01010 (reg-tn-encoding src))) + (t + (emit-byte segment #b11111111) + (emit-ea segment src #b110 t)))))))) + +(define-instruction pusha (segment) + (:printer byte ((op #b01100000))) + (:emitter + (emit-byte segment #b01100000))) + +(define-instruction pop (segment dst) + (:printer reg-no-width ((op #b01011))) + (:printer reg/mem ((op '(#b1000111 #b000)) (width 1))) + (:emitter + (let ((size (operand-size dst))) + (aver (not (eq size :byte))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p dst) + (emit-byte-with-reg segment #b01011 (reg-tn-encoding dst))) + (t + (emit-byte segment #b10001111) + (emit-ea segment dst #b000)))))) + +(define-instruction popa (segment) + (:printer byte ((op #b01100001))) + (:emitter + (emit-byte segment #b01100001))) + +(define-instruction xchg (segment operand1 operand2) + ;; Register with accumulator. + (:printer reg-no-width ((op #b10010)) '(:name :tab accum ", " reg)) + ;; Register/Memory with Register. + (:printer reg-reg/mem ((op #b1000011))) + (:emitter + (let ((size (matching-operand-size operand1 operand2))) + (maybe-emit-operand-size-prefix segment size) + (labels ((xchg-acc-with-something (acc something) + (if (and (not (eq size :byte)) (register-p something)) + (emit-byte-with-reg segment + #b10010 + (reg-tn-encoding something)) + (xchg-reg-with-something acc something))) + (xchg-reg-with-something (reg something) + (emit-byte segment (if (eq size :byte) #b10000110 #b10000111)) + (emit-ea segment something (reg-tn-encoding reg)))) + (cond ((accumulator-p operand1) + (xchg-acc-with-something operand1 operand2)) + ((accumulator-p operand2) + (xchg-acc-with-something operand2 operand1)) + ((register-p operand1) + (xchg-reg-with-something operand1 operand2)) + ((register-p operand2) + (xchg-reg-with-something operand2 operand1)) + (t + (error "bogus args to XCHG: ~S ~S" operand1 operand2))))))) + +(define-instruction lea (segment dst src) + (:printer reg-reg/mem ((op #b1000110) (width 1))) + (:emitter + (aver (dword-reg-p dst)) + (emit-byte segment #b10001101) + (emit-ea segment src (reg-tn-encoding dst)))) + +(define-instruction cmpxchg (segment dst src) + ;; Register/Memory with Register. + (:printer ext-reg-reg/mem ((op #b1011000)) '(:name :tab reg/mem ", " reg)) + (:emitter + (aver (register-p src)) + (let ((size (matching-operand-size src dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment (if (eq size :byte) #b10110000 #b10110001)) + (emit-ea segment dst (reg-tn-encoding src))))) + + + +(define-instruction fs-segment-prefix (segment) + (:emitter + (emit-byte segment #x64))) + +;;;; flag control instructions + +;;; CLC -- Clear Carry Flag. +(define-instruction clc (segment) + (:printer byte ((op #b11111000))) + (:emitter + (emit-byte segment #b11111000))) + +;;; CLD -- Clear Direction Flag. +(define-instruction cld (segment) + (:printer byte ((op #b11111100))) + (:emitter + (emit-byte segment #b11111100))) + +;;; CLI -- Clear Iterrupt Enable Flag. +(define-instruction cli (segment) + (:printer byte ((op #b11111010))) + (:emitter + (emit-byte segment #b11111010))) + +;;; CMC -- Complement Carry Flag. +(define-instruction cmc (segment) + (:printer byte ((op #b11110101))) + (:emitter + (emit-byte segment #b11110101))) + +;;; LAHF -- Load AH into flags. +(define-instruction lahf (segment) + (:printer byte ((op #b10011111))) + (:emitter + (emit-byte segment #b10011111))) + +;;; POPF -- Pop flags. +(define-instruction popf (segment) + (:printer byte ((op #b10011101))) + (:emitter + (emit-byte segment #b10011101))) + +;;; PUSHF -- push flags. +(define-instruction pushf (segment) + (:printer byte ((op #b10011100))) + (:emitter + (emit-byte segment #b10011100))) + +;;; SAHF -- Store AH into flags. +(define-instruction sahf (segment) + (:printer byte ((op #b10011110))) + (:emitter + (emit-byte segment #b10011110))) + +;;; STC -- Set Carry Flag. +(define-instruction stc (segment) + (:printer byte ((op #b11111001))) + (:emitter + (emit-byte segment #b11111001))) + +;;; STD -- Set Direction Flag. +(define-instruction std (segment) + (:printer byte ((op #b11111101))) + (:emitter + (emit-byte segment #b11111101))) + +;;; STI -- Set Interrupt Enable Flag. +(define-instruction sti (segment) + (:printer byte ((op #b11111011))) + (:emitter + (emit-byte segment #b11111011))) + +;;;; arithmetic + +(defun emit-random-arith-inst (name segment dst src opcode + &optional allow-constants) + (let ((size (matching-operand-size dst src))) + (maybe-emit-operand-size-prefix segment size) + (cond + ((integerp src) + (cond ((and (not (eq size :byte)) (<= -128 src 127)) + (emit-byte segment #b10000011) + (emit-ea segment dst opcode allow-constants) + (emit-byte segment src)) + ((accumulator-p dst) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) + #b00000100 + #b00000101))) + (emit-sized-immediate segment size src)) + (t + (emit-byte segment (if (eq size :byte) #b10000000 #b10000001)) + (emit-ea segment dst opcode allow-constants) + (emit-sized-immediate segment size src)))) + ((register-p src) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) #b00000000 #b00000001))) + (emit-ea segment dst (reg-tn-encoding src) allow-constants)) + ((register-p dst) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) #b00000010 #b00000011))) + (emit-ea segment src (reg-tn-encoding dst) allow-constants)) + (t + (error "bogus operands to ~A" name))))) + +(eval-when (:compile-toplevel :execute) + (defun arith-inst-printer-list (subop) + `((accum-imm ((op ,(dpb subop (byte 3 2) #b0000010)))) + (reg/mem-imm ((op (#b1000000 ,subop)))) + (reg/mem-imm ((op (#b1000001 ,subop)) + (imm nil :type signed-imm-byte))) + (reg-reg/mem-dir ((op ,(dpb subop (byte 3 1) #b000000)))))) + ) + +(define-instruction add (segment dst src) + (:printer-list (arith-inst-printer-list #b000)) + (:emitter (emit-random-arith-inst "ADD" segment dst src #b000))) + +(define-instruction adc (segment dst src) + (:printer-list (arith-inst-printer-list #b010)) + (:emitter (emit-random-arith-inst "ADC" segment dst src #b010))) + +(define-instruction sub (segment dst src) + (:printer-list (arith-inst-printer-list #b101)) + (:emitter (emit-random-arith-inst "SUB" segment dst src #b101))) + +(define-instruction sbb (segment dst src) + (:printer-list (arith-inst-printer-list #b011)) + (:emitter (emit-random-arith-inst "SBB" segment dst src #b011))) + +(define-instruction cmp (segment dst src) + (:printer-list (arith-inst-printer-list #b111)) + (:emitter (emit-random-arith-inst "CMP" segment dst src #b111 t))) + +(define-instruction inc (segment dst) + ;; Register. + (:printer reg-no-width ((op #b01000))) + ;; Register/Memory + (:printer reg/mem ((op '(#b1111111 #b000)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (cond ((and (not (eq size :byte)) (register-p dst)) + (emit-byte-with-reg segment #b01000 (reg-tn-encoding dst))) + (t + (emit-byte segment (if (eq size :byte) #b11111110 #b11111111)) + (emit-ea segment dst #b000)))))) + +(define-instruction dec (segment dst) + ;; Register. + (:printer reg-no-width ((op #b01001))) + ;; Register/Memory + (:printer reg/mem ((op '(#b1111111 #b001)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (cond ((and (not (eq size :byte)) (register-p dst)) + (emit-byte-with-reg segment #b01001 (reg-tn-encoding dst))) + (t + (emit-byte segment (if (eq size :byte) #b11111110 #b11111111)) + (emit-ea segment dst #b001)))))) + +(define-instruction neg (segment dst) + (:printer reg/mem ((op '(#b1111011 #b011)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b011)))) + +(define-instruction aaa (segment) + (:printer byte ((op #b00110111))) + (:emitter + (emit-byte segment #b00110111))) + +(define-instruction aas (segment) + (:printer byte ((op #b00111111))) + (:emitter + (emit-byte segment #b00111111))) + +(define-instruction daa (segment) + (:printer byte ((op #b00100111))) + (:emitter + (emit-byte segment #b00100111))) + +(define-instruction das (segment) + (:printer byte ((op #b00101111))) + (:emitter + (emit-byte segment #b00101111))) + +(define-instruction mul (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b100)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b100)))) + +(define-instruction imul (segment dst &optional src1 src2) + (:printer accum-reg/mem ((op '(#b1111011 #b101)))) + (:printer ext-reg-reg/mem ((op #b1010111))) + (:printer reg-reg/mem ((op #b0110100) (width 1) + (imm nil :type 'signed-imm-word)) + '(:name :tab reg ", " reg/mem ", " imm)) + (:printer reg-reg/mem ((op #b0110101) (width 1) + (imm nil :type 'signed-imm-byte)) + '(:name :tab reg ", " reg/mem ", " imm)) + (:emitter + (flet ((r/m-with-immed-to-reg (reg r/m immed) + (let* ((size (matching-operand-size reg r/m)) + (sx (and (not (eq size :byte)) (<= -128 immed 127)))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if sx #b01101011 #b01101001)) + (emit-ea segment r/m (reg-tn-encoding reg)) + (if sx + (emit-byte segment immed) + (emit-sized-immediate segment size immed))))) + (cond (src2 + (r/m-with-immed-to-reg dst src1 src2)) + (src1 + (if (integerp src1) + (r/m-with-immed-to-reg dst dst src1) + (let ((size (matching-operand-size dst src1))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment #b10101111) + (emit-ea segment src1 (reg-tn-encoding dst))))) + (t + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b101))))))) + +(define-instruction div (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b110)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b110)))) + +(define-instruction idiv (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b111)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b111)))) + +(define-instruction aad (segment) + (:printer two-bytes ((op '(#b11010101 #b00001010)))) + (:emitter + (emit-byte segment #b11010101) + (emit-byte segment #b00001010))) + +(define-instruction aam (segment) + (:printer two-bytes ((op '(#b11010100 #b00001010)))) + (:emitter + (emit-byte segment #b11010100) + (emit-byte segment #b00001010))) + +;;; CBW -- Convert Byte to Word. AX <- sign_xtnd(AL) +(define-instruction cbw (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b10011000))) + +;;; CWDE -- Convert Word To Double Word Extened. EAX <- sign_xtnd(AX) +(define-instruction cwde (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b10011000))) + +;;; CWD -- Convert Word to Double Word. DX:AX <- sign_xtnd(AX) +(define-instruction cwd (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b10011001))) + +;;; CDQ -- Convert Double Word to Quad Word. EDX:EAX <- sign_xtnd(EAX) +(define-instruction cdq (segment) + (:printer byte ((op #b10011001))) + (:emitter + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b10011001))) + +(define-instruction xadd (segment dst src) + ;; Register/Memory with Register. + (:printer ext-reg-reg/mem ((op #b1100000)) '(:name :tab reg/mem ", " reg)) + (:emitter + (aver (register-p src)) + (let ((size (matching-operand-size src dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment (if (eq size :byte) #b11000000 #b11000001)) + (emit-ea segment dst (reg-tn-encoding src))))) + + +;;;; logic + +(defun emit-shift-inst (segment dst amount opcode) + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (multiple-value-bind (major-opcode immed) + (case amount + (:cl (values #b11010010 nil)) + (1 (values #b11010000 nil)) + (t (values #b11000000 t))) + (emit-byte segment + (if (eq size :byte) major-opcode (logior major-opcode 1))) + (emit-ea segment dst opcode) + (when immed + (emit-byte segment amount))))) + +(eval-when (:compile-toplevel :execute) + (defun shift-inst-printer-list (subop) + `((reg/mem ((op (#b1101000 ,subop))) + (:name :tab reg/mem ", 1")) + (reg/mem ((op (#b1101001 ,subop))) + (:name :tab reg/mem ", " 'cl)) + (reg/mem-imm ((op (#b1100000 ,subop)) + (imm nil :type signed-imm-byte)))))) + +(define-instruction rol (segment dst amount) + (:printer-list + (shift-inst-printer-list #b000)) + (:emitter + (emit-shift-inst segment dst amount #b000))) + +(define-instruction ror (segment dst amount) + (:printer-list + (shift-inst-printer-list #b001)) + (:emitter + (emit-shift-inst segment dst amount #b001))) + +(define-instruction rcl (segment dst amount) + (:printer-list + (shift-inst-printer-list #b010)) + (:emitter + (emit-shift-inst segment dst amount #b010))) + +(define-instruction rcr (segment dst amount) + (:printer-list + (shift-inst-printer-list #b011)) + (:emitter + (emit-shift-inst segment dst amount #b011))) + +(define-instruction shl (segment dst amount) + (:printer-list + (shift-inst-printer-list #b100)) + (:emitter + (emit-shift-inst segment dst amount #b100))) + +(define-instruction shr (segment dst amount) + (:printer-list + (shift-inst-printer-list #b101)) + (:emitter + (emit-shift-inst segment dst amount #b101))) + +(define-instruction sar (segment dst amount) + (:printer-list + (shift-inst-printer-list #b111)) + (:emitter + (emit-shift-inst segment dst amount #b111))) + +(defun emit-double-shift (segment opcode dst src amt) + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "Double shifts can only be used with words.")) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment (dpb opcode (byte 1 3) + (if (eq amt :cl) #b10100101 #b10100100))) + #+nil + (emit-ea segment dst src) + (emit-ea segment dst (reg-tn-encoding src)) ; pw tries this + (unless (eq amt :cl) + (emit-byte segment amt)))) + +(eval-when (:compile-toplevel :execute) + (defun double-shift-inst-printer-list (op) + `(#+nil + (ext-reg-reg/mem-imm ((op ,(logior op #b10)) + (imm nil :type signed-imm-byte))) + (ext-reg-reg/mem ((op ,(logior op #b10))) + (:name :tab reg/mem ", " reg ", " 'cl))))) + +(define-instruction shld (segment dst src amt) + (:declare (type (or (member :cl) (mod 32)) amt)) + (:printer-list (double-shift-inst-printer-list #b1010000)) + (:emitter + (emit-double-shift segment #b0 dst src amt))) + +(define-instruction shrd (segment dst src amt) + (:declare (type (or (member :cl) (mod 32)) amt)) + (:printer-list (double-shift-inst-printer-list #b1010100)) + (:emitter + (emit-double-shift segment #b1 dst src amt))) + +(define-instruction and (segment dst src) + (:printer-list + (arith-inst-printer-list #b100)) + (:emitter + (emit-random-arith-inst "AND" segment dst src #b100))) + +(define-instruction test (segment this that) + (:printer accum-imm ((op #b1010100))) + (:printer reg/mem-imm ((op '(#b1111011 #b000)))) + (:printer reg-reg/mem ((op #b1000010))) + (:emitter + (let ((size (matching-operand-size this that))) + (maybe-emit-operand-size-prefix segment size) + (flet ((test-immed-and-something (immed something) + (cond ((accumulator-p something) + (emit-byte segment + (if (eq size :byte) #b10101000 #b10101001)) + (emit-sized-immediate segment size immed)) + (t + (emit-byte segment + (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment something #b000) + (emit-sized-immediate segment size immed)))) + (test-reg-and-something (reg something) + (emit-byte segment (if (eq size :byte) #b10000100 #b10000101)) + (emit-ea segment something (reg-tn-encoding reg)))) + (cond ((integerp that) + (test-immed-and-something that this)) + ((integerp this) + (test-immed-and-something this that)) + ((register-p this) + (test-reg-and-something this that)) + ((register-p that) + (test-reg-and-something that this)) + (t + (error "bogus operands for TEST: ~S and ~S" this that))))))) + +(define-instruction or (segment dst src) + (:printer-list + (arith-inst-printer-list #b001)) + (:emitter + (emit-random-arith-inst "OR" segment dst src #b001))) + +(define-instruction xor (segment dst src) + (:printer-list + (arith-inst-printer-list #b110)) + (:emitter + (emit-random-arith-inst "XOR" segment dst src #b110))) + +(define-instruction not (segment dst) + (:printer reg/mem ((op '(#b1111011 #b010)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b010)))) + +;;;; string manipulation + +(define-instruction cmps (segment size) + (:printer string-op ((op #b1010011))) + (:emitter + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10100110 #b10100111)))) + +(define-instruction ins (segment acc) + (:printer string-op ((op #b0110110))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b01101100 #b01101101))))) + +(define-instruction lods (segment acc) + (:printer string-op ((op #b1010110))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10101100 #b10101101))))) + +(define-instruction movs (segment size) + (:printer string-op ((op #b1010010))) + (:emitter + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10100100 #b10100101)))) + +(define-instruction outs (segment acc) + (:printer string-op ((op #b0110111))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b01101110 #b01101111))))) + +(define-instruction scas (segment acc) + (:printer string-op ((op #b1010111))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10101110 #b10101111))))) + +(define-instruction stos (segment acc) + (:printer string-op ((op #b1010101))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10101010 #b10101011))))) + +(define-instruction xlat (segment) + (:printer byte ((op #b11010111))) + (:emitter + (emit-byte segment #b11010111))) + +(define-instruction rep (segment) + (:emitter + (emit-byte segment #b11110010))) + +(define-instruction repe (segment) + (:printer byte ((op #b11110011))) + (:emitter + (emit-byte segment #b11110011))) + +(define-instruction repne (segment) + (:printer byte ((op #b11110010))) + (:emitter + (emit-byte segment #b11110010))) + + +;;;; bit manipulation + +(define-instruction bsf (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011110) (width 0))) + (:emitter + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment #b10111100) + (emit-ea segment src (reg-tn-encoding dst))))) + +(define-instruction bsr (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011110) (width 1))) + (:emitter + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment #b10111101) + (emit-ea segment src (reg-tn-encoding dst))))) + +(defun emit-bit-test-and-mumble (segment src index opcode) + (let ((size (operand-size src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (cond ((integerp index) + (emit-byte segment #b10111010) + (emit-ea segment src opcode) + (emit-byte segment index)) + (t + (emit-byte segment (dpb opcode (byte 3 3) #b10000011)) + (emit-ea segment src (reg-tn-encoding index)))))) + +(eval-when (:compile-toplevel :execute) + (defun bit-test-inst-printer-list (subop) + `((ext-reg/mem-imm ((op (#b1011101 ,subop)) + (reg/mem nil :type word-reg/mem) + (imm nil :type imm-data) + (width 0))) + (ext-reg-reg/mem ((op ,(dpb subop (byte 3 2) #b1000001)) + (width 1)) + (:name :tab reg/mem ", " reg))))) + +(define-instruction bt (segment src index) + (:printer-list (bit-test-inst-printer-list #b100)) + (:emitter + (emit-bit-test-and-mumble segment src index #b100))) + +(define-instruction btc (segment src index) + (:printer-list (bit-test-inst-printer-list #b111)) + (:emitter + (emit-bit-test-and-mumble segment src index #b111))) + +(define-instruction btr (segment src index) + (:printer-list (bit-test-inst-printer-list #b110)) + (:emitter + (emit-bit-test-and-mumble segment src index #b110))) + +(define-instruction bts (segment src index) + (:printer-list (bit-test-inst-printer-list #b101)) + (:emitter + (emit-bit-test-and-mumble segment src index #b101))) + + +;;;; control transfer + +(define-instruction call (segment where) + (:printer near-jump ((op #b11101000))) + (:printer reg/mem ((op '(#b1111111 #b010)) (width 1))) + (:emitter + (typecase where + (label + (emit-byte segment #b11101000) + (emit-back-patch segment + 4 + (lambda (segment posn) + (emit-dword segment + (- (label-position where) + (+ posn 4)))))) + (fixup + (emit-byte segment #b11101000) + (emit-relative-fixup segment where)) + (t + (emit-byte segment #b11111111) + (emit-ea segment where #b010))))) + +(defun emit-byte-displacement-backpatch (segment target) + (emit-back-patch segment + 1 + (lambda (segment posn) + (let ((disp (- (label-position target) (1+ posn)))) + (aver (<= -128 disp 127)) + (emit-byte segment disp))))) + +(define-instruction jmp (segment cond &optional where) + ;; conditional jumps + (:printer short-cond-jump ((op #b0111)) '('j cc :tab label)) + (:printer near-cond-jump () '('j cc :tab label)) + ;; unconditional jumps + (:printer short-jump ((op #b1011))) + (:printer near-jump ((op #b11101001)) ) + (:printer reg/mem ((op '(#b1111111 #b100)) (width 1))) + (:emitter + (cond (where + (emit-chooser + segment 6 2 + (lambda (segment posn delta-if-after) + (let ((disp (- (label-position where posn delta-if-after) + (+ posn 2)))) + (when (<= -128 disp 127) + (emit-byte segment + (dpb (conditional-opcode cond) + (byte 4 0) + #b01110000)) + (emit-byte-displacement-backpatch segment where) + t))) + (lambda (segment posn) + (let ((disp (- (label-position where) (+ posn 6)))) + (emit-byte segment #b00001111) + (emit-byte segment + (dpb (conditional-opcode cond) + (byte 4 0) + #b10000000)) + (emit-dword segment disp))))) + ((label-p (setq where cond)) + (emit-chooser + segment 5 0 + (lambda (segment posn delta-if-after) + (let ((disp (- (label-position where posn delta-if-after) + (+ posn 2)))) + (when (<= -128 disp 127) + (emit-byte segment #b11101011) + (emit-byte-displacement-backpatch segment where) + t))) + (lambda (segment posn) + (let ((disp (- (label-position where) (+ posn 5)))) + (emit-byte segment #b11101001) + (emit-dword segment disp))))) + ((fixup-p where) + (emit-byte segment #b11101001) + (emit-relative-fixup segment where)) + (t + (unless (or (ea-p where) (tn-p where)) + (error "don't know what to do with ~A" where)) + (emit-byte segment #b11111111) + (emit-ea segment where #b100))))) + +(define-instruction jmp-short (segment label) + (:emitter + (emit-byte segment #b11101011) + (emit-byte-displacement-backpatch segment label))) + +(define-instruction ret (segment &optional stack-delta) + (:printer byte ((op #b11000011))) + (:printer byte ((op #b11000010) (imm nil :type 'imm-word-16)) + '(:name :tab imm)) + (:emitter + (cond (stack-delta + (emit-byte segment #b11000010) + (emit-word segment stack-delta)) + (t + (emit-byte segment #b11000011))))) + +(define-instruction jecxz (segment target) + (:printer short-jump ((op #b0011))) + (:emitter + (emit-byte segment #b11100011) + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loop (segment target) + (:printer short-jump ((op #b0010))) + (:emitter + (emit-byte segment #b11100010) ; pfw this was 11100011, or jecxz!!!! + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loopz (segment target) + (:printer short-jump ((op #b0001))) + (:emitter + (emit-byte segment #b11100001) + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loopnz (segment target) + (:printer short-jump ((op #b0000))) + (:emitter + (emit-byte segment #b11100000) + (emit-byte-displacement-backpatch segment target))) + +;;;; conditional move +(define-instruction cmov (segment cond dst src) + (:printer cond-move ()) + (:emitter + (aver (register-p dst)) + (let ((size (matching-operand-size dst src))) + (aver (or (eq size :word) (eq size :dword))) + (maybe-emit-operand-size-prefix segment size)) + (emit-byte segment #b00001111) + (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b01000000)) + (emit-ea segment src (reg-tn-encoding dst)))) + +;;;; conditional byte set + +(define-instruction set (segment dst cond) + (:printer cond-set ()) + (:emitter + (emit-byte segment #b00001111) + (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b10010000)) + (emit-ea segment dst #b000))) + +;;;; enter/leave + +(define-instruction enter (segment disp &optional (level 0)) + (:declare (type (unsigned-byte 16) disp) + (type (unsigned-byte 8) level)) + (:printer enter-format ((op #b11001000))) + (:emitter + (emit-byte segment #b11001000) + (emit-word segment disp) + (emit-byte segment level))) + +(define-instruction leave (segment) + (:printer byte ((op #b11001001))) + (:emitter + (emit-byte segment #b11001001))) + +;;;; prefetch +(define-instruction prefetchnta (segment ea) + (:printer prefetch ((op #b00011000) (reg #b000))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b000))) + +(define-instruction prefetcht0 (segment ea) + (:printer prefetch ((op #b00011000) (reg #b001))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b001))) + +(define-instruction prefetcht1 (segment ea) + (:printer prefetch ((op #b00011000) (reg #b010))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b010))) + +(define-instruction prefetcht2 (segment ea) + (:printer prefetch ((op #b00011000) (reg #b011))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b011))) + +;;;; interrupt instructions + +(defun snarf-error-junk (sap offset &optional length-only) + (let* ((length (sb!sys:sap-ref-8 sap offset)) + (vector (make-array length :element-type '(unsigned-byte 8)))) + (declare (type sb!sys:system-area-pointer sap) + (type (unsigned-byte 8) length) + (type (simple-array (unsigned-byte 8) (*)) vector)) + (cond (length-only + (values 0 (1+ length) nil nil)) + (t + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) + (collect ((sc-offsets) + (lengths)) + (lengths 1) ; the length byte + (let* ((index 0) + (error-number (sb!c:read-var-integer vector index))) + (lengths index) + (loop + (when (>= index length) + (return)) + (let ((old-index index)) + (sc-offsets (sb!c:read-var-integer vector index)) + (lengths (- index old-index)))) + (values error-number + (1+ length) + (sc-offsets) + (lengths)))))))) + +#| +(defmacro break-cases (breaknum &body cases) + (let ((bn-temp (gensym))) + (collect ((clauses)) + (dolist (case cases) + (clauses `((= ,bn-temp ,(car case)) ,@(cdr case)))) + `(let ((,bn-temp ,breaknum)) + (cond ,@(clauses)))))) +|# + +(defun break-control (chunk inst stream dstate) + (declare (ignore inst)) + (flet ((nt (x) (if stream (sb!disassem:note x dstate)))) + ;; FIXME: Make sure that BYTE-IMM-CODE is defined. The genesis + ;; map has it undefined; and it should be easier to look in the target + ;; Lisp (with (DESCRIBE 'BYTE-IMM-CODE)) than to definitively deduce + ;; from first principles whether it's defined in some way that genesis + ;; can't grok. + (case (byte-imm-code chunk dstate) + (#.error-trap + (nt "error trap") + (sb!disassem:handle-break-args #'snarf-error-junk stream dstate)) + (#.cerror-trap + (nt "cerror trap") + (sb!disassem:handle-break-args #'snarf-error-junk stream dstate)) + (#.breakpoint-trap + (nt "breakpoint trap")) + (#.pending-interrupt-trap + (nt "pending interrupt trap")) + (#.halt-trap + (nt "halt trap")) + (#.fun-end-breakpoint-trap + (nt "function end breakpoint trap"))))) + +(define-instruction break (segment code) + (:declare (type (unsigned-byte 8) code)) + (:printer byte-imm ((op #b11001100)) '(:name :tab code) + :control #'break-control) + (:emitter + (emit-byte segment #b11001100) + (emit-byte segment code))) + +(define-instruction int (segment number) + (:declare (type (unsigned-byte 8) number)) + (:printer byte-imm ((op #b11001101))) + (:emitter + (etypecase number + ((member 3) + (emit-byte segment #b11001100)) + ((unsigned-byte 8) + (emit-byte segment #b11001101) + (emit-byte segment number))))) + +(define-instruction into (segment) + (:printer byte ((op #b11001110))) + (:emitter + (emit-byte segment #b11001110))) + +(define-instruction bound (segment reg bounds) + (:emitter + (let ((size (matching-operand-size reg bounds))) + (when (eq size :byte) + (error "can't bounds-test bytes: ~S" reg)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b01100010) + (emit-ea segment bounds (reg-tn-encoding reg))))) + +(define-instruction iret (segment) + (:printer byte ((op #b11001111))) + (:emitter + (emit-byte segment #b11001111))) + +;;;; processor control + +(define-instruction hlt (segment) + (:printer byte ((op #b11110100))) + (:emitter + (emit-byte segment #b11110100))) + +(define-instruction nop (segment) + (:printer byte ((op #b10010000))) + (:emitter + (emit-byte segment #b10010000))) + +(define-instruction wait (segment) + (:printer byte ((op #b10011011))) + (:emitter + (emit-byte segment #b10011011))) + +(define-instruction lock (segment) + (:printer byte ((op #b11110000))) + (:emitter + (emit-byte segment #b11110000))) + +;;;; miscellaneous hackery + +(define-instruction byte (segment byte) + (:emitter + (emit-byte segment byte))) + +(define-instruction word (segment word) + (:emitter + (emit-word segment word))) + +(define-instruction dword (segment dword) + (:emitter + (emit-dword segment dword))) + +(defun emit-header-data (segment type) + (emit-back-patch segment + 4 + (lambda (segment posn) + (emit-dword segment + (logior type + (ash (+ posn + (component-header-length)) + (- n-widetag-bits + word-shift))))))) + +(define-instruction simple-fun-header-word (segment) + (:emitter + (emit-header-data segment simple-fun-header-widetag))) + +(define-instruction lra-header-word (segment) + (:emitter + (emit-header-data segment return-pc-header-widetag))) + +;;;; fp instructions +;;;; +;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +;;;; +;;;; Note: We treat the single-precision and double-precision variants +;;;; as separate instructions. + +;;; Load single to st(0). +(define-instruction fld (segment source) + (:printer floating-point ((op '(#b001 #b000)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment source #b000))) + +;;; Load double to st(0). +(define-instruction fldd (segment source) + (:printer floating-point ((op '(#b101 #b000)))) + (:printer floating-point-fp ((op '(#b001 #b000)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011001) + (emit-byte segment #b11011101)) + (emit-fp-op segment source #b000))) + +;;; Load long to st(0). +(define-instruction fldl (segment source) + (:printer floating-point ((op '(#b011 #b101)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment source #b101))) + +;;; Store single from st(0). +(define-instruction fst (segment dest) + (:printer floating-point ((op '(#b001 #b010)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010)) + (t + (emit-byte segment #b11011001) + (emit-fp-op segment dest #b010))))) + +;;; Store double from st(0). +(define-instruction fstd (segment dest) + (:printer floating-point ((op '(#b101 #b010)))) + (:printer floating-point-fp ((op '(#b101 #b010)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010)) + (t + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010))))) + +;;; Arithmetic ops are all done with at least one operand at top of +;;; stack. The other operand is is another register or a 32/64 bit +;;; memory loc. + +;;; dtc: I've tried to follow the Intel ASM386 conventions, but note +;;; that these conflict with the Gdb conventions for binops. To reduce +;;; the confusion I've added comments showing the mathamatical +;;; operation and the two syntaxes. By the ASM386 convention the +;;; instruction syntax is: +;;; +;;; Fop Source +;;; or Fop Destination, Source +;;; +;;; If only one operand is given then it is the source and the +;;; destination is ST(0). There are reversed forms of the fsub and +;;; fdiv instructions inducated by an 'R' suffix. +;;; +;;; The mathematical operation for the non-reverse form is always: +;;; destination = destination op source +;;; +;;; For the reversed form it is: +;;; destination = source op destination +;;; +;;; The instructions below only accept one operand at present which is +;;; usually the source. I've hack in extra instructions to implement +;;; the fops with a ST(i) destination, these have a -sti suffix and +;;; the operand is the destination with the source being ST(0). + +;;; Add single: +;;; st(0) = st(0) + memory or st(i). +(define-instruction fadd (segment source) + (:printer floating-point ((op '(#b000 #b000)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b000))) + +;;; Add double: +;;; st(0) = st(0) + memory or st(i). +(define-instruction faddd (segment source) + (:printer floating-point ((op '(#b100 #b000)))) + (:printer floating-point-fp ((op '(#b000 #b000)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b000))) + +;;; Add double destination st(i): +;;; st(i) = st(0) + st(i). +(define-instruction fadd-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b000)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b000))) +;;; with pop +(define-instruction faddp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b000)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b000))) + +;;; Subtract single: +;;; st(0) = st(0) - memory or st(i). +(define-instruction fsub (segment source) + (:printer floating-point ((op '(#b000 #b100)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b100))) + +;;; Subtract single, reverse: +;;; st(0) = memory or st(i) - st(0). +(define-instruction fsubr (segment source) + (:printer floating-point ((op '(#b000 #b101)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b101))) + +;;; Subtract double: +;;; st(0) = st(0) - memory or st(i). +(define-instruction fsubd (segment source) + (:printer floating-point ((op '(#b100 #b100)))) + (:printer floating-point-fp ((op '(#b000 #b100)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b100))) + +;;; Subtract double, reverse: +;;; st(0) = memory or st(i) - st(0). +(define-instruction fsubrd (segment source) + (:printer floating-point ((op '(#b100 #b101)))) + (:printer floating-point-fp ((op '(#b000 #b101)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b101))) + +;;; Subtract double, destination st(i): +;;; st(i) = st(i) - st(0). +;;; +;;; ASM386 syntax: FSUB ST(i), ST +;;; Gdb syntax: fsubr %st,%st(i) +(define-instruction fsub-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b101)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b101))) +;;; with a pop +(define-instruction fsubp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b101)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b101))) + +;;; Subtract double, reverse, destination st(i): +;;; st(i) = st(0) - st(i). +;;; +;;; ASM386 syntax: FSUBR ST(i), ST +;;; Gdb syntax: fsub %st,%st(i) +(define-instruction fsubr-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b100)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b100))) +;;; with a pop +(define-instruction fsubrp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b100)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b100))) + +;;; Multiply single: +;;; st(0) = st(0) * memory or st(i). +(define-instruction fmul (segment source) + (:printer floating-point ((op '(#b000 #b001)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b001))) + +;;; Multiply double: +;;; st(0) = st(0) * memory or st(i). +(define-instruction fmuld (segment source) + (:printer floating-point ((op '(#b100 #b001)))) + (:printer floating-point-fp ((op '(#b000 #b001)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b001))) + +;;; Multiply double, destination st(i): +;;; st(i) = st(i) * st(0). +(define-instruction fmul-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b001)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b001))) + +;;; Divide single: +;;; st(0) = st(0) / memory or st(i). +(define-instruction fdiv (segment source) + (:printer floating-point ((op '(#b000 #b110)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b110))) + +;;; Divide single, reverse: +;;; st(0) = memory or st(i) / st(0). +(define-instruction fdivr (segment source) + (:printer floating-point ((op '(#b000 #b111)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b111))) + +;;; Divide double: +;;; st(0) = st(0) / memory or st(i). +(define-instruction fdivd (segment source) + (:printer floating-point ((op '(#b100 #b110)))) + (:printer floating-point-fp ((op '(#b000 #b110)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b110))) + +;;; Divide double, reverse: +;;; st(0) = memory or st(i) / st(0). +(define-instruction fdivrd (segment source) + (:printer floating-point ((op '(#b100 #b111)))) + (:printer floating-point-fp ((op '(#b000 #b111)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b111))) + +;;; Divide double, destination st(i): +;;; st(i) = st(i) / st(0). +;;; +;;; ASM386 syntax: FDIV ST(i), ST +;;; Gdb syntax: fdivr %st,%st(i) +(define-instruction fdiv-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b111)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b111))) + +;;; Divide double, reverse, destination st(i): +;;; st(i) = st(0) / st(i). +;;; +;;; ASM386 syntax: FDIVR ST(i), ST +;;; Gdb syntax: fdiv %st,%st(i) +(define-instruction fdivr-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b110)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b110))) + +;;; Exchange fr0 with fr(n). (There is no double precision variant.) +(define-instruction fxch (segment source) + (:printer floating-point-fp ((op '(#b001 #b001)))) + (:emitter + (unless (and (tn-p source) + (eq (sb-name (sc-sb (tn-sc source))) 'float-registers)) + (cl:break)) + (emit-byte segment #b11011001) + (emit-fp-op segment source #b001))) + +;;; Push 32-bit integer to st0. +(define-instruction fild (segment source) + (:printer floating-point ((op '(#b011 #b000)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment source #b000))) + +;;; Push 64-bit integer to st0. +(define-instruction fildl (segment source) + (:printer floating-point ((op '(#b111 #b101)))) + (:emitter + (emit-byte segment #b11011111) + (emit-fp-op segment source #b101))) + +;;; Store 32-bit integer. +(define-instruction fist (segment dest) + (:printer floating-point ((op '(#b011 #b010)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b010))) + +;;; Store and pop 32-bit integer. +(define-instruction fistp (segment dest) + (:printer floating-point ((op '(#b011 #b011)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b011))) + +;;; Store and pop 64-bit integer. +(define-instruction fistpl (segment dest) + (:printer floating-point ((op '(#b111 #b111)))) + (:emitter + (emit-byte segment #b11011111) + (emit-fp-op segment dest #b111))) + +;;; Store single from st(0) and pop. +(define-instruction fstp (segment dest) + (:printer floating-point ((op '(#b001 #b011)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011)) + (t + (emit-byte segment #b11011001) + (emit-fp-op segment dest #b011))))) + +;;; Store double from st(0) and pop. +(define-instruction fstpd (segment dest) + (:printer floating-point ((op '(#b101 #b011)))) + (:printer floating-point-fp ((op '(#b101 #b011)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011)) + (t + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011))))) + +;;; Store long from st(0) and pop. +(define-instruction fstpl (segment dest) + (:printer floating-point ((op '(#b011 #b111)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b111))) + +;;; Decrement stack-top pointer. +(define-instruction fdecstp (segment) + (:printer floating-point-no ((op #b10110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110110))) + +;;; Increment stack-top pointer. +(define-instruction fincstp (segment) + (:printer floating-point-no ((op #b10111))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110111))) + +;;; Free fp register. +(define-instruction ffree (segment dest) + (:printer floating-point-fp ((op '(#b101 #b000)))) + (:emitter + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b000))) + +(define-instruction fabs (segment) + (:printer floating-point-no ((op #b00001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100001))) + +(define-instruction fchs (segment) + (:printer floating-point-no ((op #b00000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100000))) + +(define-instruction frndint(segment) + (:printer floating-point-no ((op #b11100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111100))) + +;;; Initialize NPX. +(define-instruction fninit(segment) + (:printer floating-point-5 ((op #b00011))) + (:emitter + (emit-byte segment #b11011011) + (emit-byte segment #b11100011))) + +;;; Store Status Word to AX. +(define-instruction fnstsw(segment) + (:printer floating-point-st ((op #b00000))) + (:emitter + (emit-byte segment #b11011111) + (emit-byte segment #b11100000))) + +;;; Load Control Word. +;;; +;;; src must be a memory location +(define-instruction fldcw(segment src) + (:printer floating-point ((op '(#b001 #b101)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment src #b101))) + +;;; Store Control Word. +(define-instruction fnstcw(segment dst) + (:printer floating-point ((op '(#b001 #b111)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment dst #b111))) + +;;; Store FP Environment. +(define-instruction fstenv(segment dst) + (:printer floating-point ((op '(#b001 #b110)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment dst #b110))) + +;;; Restore FP Environment. +(define-instruction fldenv(segment src) + (:printer floating-point ((op '(#b001 #b100)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment src #b100))) + +;;; Save FP State. +(define-instruction fsave(segment dst) + (:printer floating-point ((op '(#b101 #b110)))) + (:emitter + (emit-byte segment #b11011101) + (emit-fp-op segment dst #b110))) + +;;; Restore FP State. +(define-instruction frstor(segment src) + (:printer floating-point ((op '(#b101 #b100)))) + (:emitter + (emit-byte segment #b11011101) + (emit-fp-op segment src #b100))) + +;;; Clear exceptions. +(define-instruction fnclex(segment) + (:printer floating-point-5 ((op #b00010))) + (:emitter + (emit-byte segment #b11011011) + (emit-byte segment #b11100010))) + +;;; comparison +(define-instruction fcom (segment src) + (:printer floating-point ((op '(#b000 #b010)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment src #b010))) + +(define-instruction fcomd (segment src) + (:printer floating-point ((op '(#b100 #b010)))) + (:printer floating-point-fp ((op '(#b000 #b010)))) + (:emitter + (if (fp-reg-tn-p src) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment src #b010))) + +;;; Compare ST1 to ST0, popping the stack twice. +(define-instruction fcompp (segment) + (:printer floating-point-3 ((op '(#b110 #b011001)))) + (:emitter + (emit-byte segment #b11011110) + (emit-byte segment #b11011001))) + +;;; unordered comparison +(define-instruction fucom (segment src) + (:printer floating-point-fp ((op '(#b101 #b100)))) + (:emitter + (aver (fp-reg-tn-p src)) + (emit-byte segment #b11011101) + (emit-fp-op segment src #b100))) + +(define-instruction ftst (segment) + (:printer floating-point-no ((op #b00100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100100))) + +;;;; 80387 specials + +(define-instruction fsqrt(segment) + (:printer floating-point-no ((op #b11010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111010))) + +(define-instruction fscale(segment) + (:printer floating-point-no ((op #b11101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111101))) + +(define-instruction fxtract(segment) + (:printer floating-point-no ((op #b10100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110100))) + +(define-instruction fsin(segment) + (:printer floating-point-no ((op #b11110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111110))) + +(define-instruction fcos(segment) + (:printer floating-point-no ((op #b11111))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111111))) + +(define-instruction fprem1(segment) + (:printer floating-point-no ((op #b10101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110101))) + +(define-instruction fprem(segment) + (:printer floating-point-no ((op #b11000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111000))) + +(define-instruction fxam (segment) + (:printer floating-point-no ((op #b00101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100101))) + +;;; These do push/pop to stack and need special handling +;;; in any VOPs that use them. See the book. + +;;; st0 <- st1*log2(st0) +(define-instruction fyl2x(segment) ; pops stack + (:printer floating-point-no ((op #b10001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110001))) + +(define-instruction fyl2xp1(segment) + (:printer floating-point-no ((op #b11001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111001))) + +(define-instruction f2xm1(segment) + (:printer floating-point-no ((op #b10000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110000))) + +(define-instruction fptan(segment) ; st(0) <- 1; st(1) <- tan + (:printer floating-point-no ((op #b10010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110010))) + +(define-instruction fpatan(segment) ; POPS STACK + (:printer floating-point-no ((op #b10011))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110011))) + +;;;; loading constants + +(define-instruction fldz(segment) + (:printer floating-point-no ((op #b01110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101110))) + +(define-instruction fld1(segment) + (:printer floating-point-no ((op #b01000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101000))) + +(define-instruction fldpi(segment) + (:printer floating-point-no ((op #b01011))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101011))) + +(define-instruction fldl2t(segment) + (:printer floating-point-no ((op #b01001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101001))) + +(define-instruction fldl2e(segment) + (:printer floating-point-no ((op #b01010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101010))) + +(define-instruction fldlg2(segment) + (:printer floating-point-no ((op #b01100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101100))) + +(define-instruction fldln2(segment) + (:printer floating-point-no ((op #b01101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101101))) diff --git a/sbcl-src/src-093/compiler/x86/vm.lisp b/sbcl-src/src-093/compiler/x86/vm.lisp new file mode 100644 index 0000000..833dd73 --- /dev/null +++ b/sbcl-src/src-093/compiler/x86/vm.lisp @@ -0,0 +1,451 @@ +;;;; miscellaneous VM definition noise for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; the size of an INTEGER representation of a SYSTEM-AREA-POINTER, i.e. +;;; size of a native memory address +(deftype sap-int () '(unsigned-byte 32)) + +;;;; register specs + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) + (defvar *float-register-names* (make-array 8 :initial-element nil))) + +(macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) + (names-vector (symbolicate "*" size "-REGISTER-NAMES*"))) + `(progn + (eval-when (:compile-toplevel :load-toplevel :execute) + ;; EVAL-WHEN is necessary because stuff like #.EAX-OFFSET + ;; (in the same file) depends on compile-time evaluation + ;; of the DEFCONSTANT. -- AL 20010224 + (def!constant ,offset-sym ,offset)) + (setf (svref ,names-vector ,offset-sym) + ,(symbol-name name))))) + ;; FIXME: It looks to me as though DEFREGSET should also + ;; define the related *FOO-REGISTER-NAMES* variable. + (defregset (name &rest regs) + `(eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter ,name + (list ,@(mapcar (lambda (name) + (symbolicate name "-OFFSET")) + regs)))))) + + ;; byte registers + ;; + ;; Note: the encoding here is different than that used by the chip. + ;; We use this encoding so that the compiler thinks that AX (and + ;; EAX) overlap AL and AH instead of AL and CL. + (defreg al 0 :byte) + (defreg ah 1 :byte) + (defreg cl 2 :byte) + (defreg ch 3 :byte) + (defreg dl 4 :byte) + (defreg dh 5 :byte) + (defreg bl 6 :byte) + (defreg bh 7 :byte) + (defregset *byte-regs* al ah cl ch dl dh bl bh) + + ;; word registers + (defreg ax 0 :word) + (defreg cx 2 :word) + (defreg dx 4 :word) + (defreg bx 6 :word) + (defreg sp 8 :word) + (defreg bp 10 :word) + (defreg si 12 :word) + (defreg di 14 :word) + (defregset *word-regs* ax cx dx bx si di) + + ;; double word registers + (defreg eax 0 :dword) + (defreg ecx 2 :dword) + (defreg edx 4 :dword) + (defreg ebx 6 :dword) + (defreg esp 8 :dword) + (defreg ebp 10 :dword) + (defreg esi 12 :dword) + (defreg edi 14 :dword) + (defregset *dword-regs* eax ecx edx ebx esi edi) + + ;; floating point registers + (defreg fr0 0 :float) + (defreg fr1 1 :float) + (defreg fr2 2 :float) + (defreg fr3 3 :float) + (defreg fr4 4 :float) + (defreg fr5 5 :float) + (defreg fr6 6 :float) + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers + (def!constant register-arg-count 3) + ;; names and offsets for registers used to pass arguments + (eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter *register-arg-names* '(edx edi esi))) + (defregset *register-arg-offsets* edx edi esi)) + +;;;; SB definitions + +;;; Despite the fact that there are only 8 different registers, we consider +;;; them 16 in order to describe the overlap of byte registers. The only +;;; thing we need to represent is what registers overlap. Therefore, we +;;; consider bytes to take one unit, and words or dwords to take two. We +;;; don't need to tell the difference between words and dwords, because +;;; you can't put two words in a dword register. +(define-storage-base registers :finite :size 16) + +;;; jrd changed this from size 1 to size 8. It doesn't seem to make much +;;; sense to use the 387's idea of a stack; 8 separate registers is easier +;;; to deal with. +;;; the old way: +;;; (define-storage-base float-registers :finite :size 1) +;;; the new way: +(define-storage-base float-registers :finite :size 8) + +(define-storage-base stack :unbounded :size 8) +(define-storage-base constant :non-packed) +(define-storage-base immediate-constant :non-packed) +(define-storage-base noise :unbounded :size 2) + +;;;; SC definitions + +;;; a handy macro so we don't have to keep changing all the numbers whenever +;;; we insert a new storage class +;;; +(defmacro !define-storage-classes (&rest classes) + (collect ((forms)) + (let ((index 0)) + (dolist (class classes) + (let* ((sc-name (car class)) + (constant-name (symbolicate sc-name "-SC-NUMBER"))) + (forms `(define-storage-class ,sc-name ,index + ,@(cdr class))) + (forms `(def!constant ,constant-name ,index)) + (incf index)))) + `(progn + ,@(forms)))) + +;;; The DEFINE-STORAGE-CLASS call for CATCH-BLOCK refers to the size +;;; of CATCH-BLOCK. The size of CATCH-BLOCK isn't calculated until +;;; later in the build process, and the calculation is entangled with +;;; code which has lots of predependencies, including dependencies on +;;; the prior call of DEFINE-STORAGE-CLASS. The proper way to +;;; unscramble this would be to untangle the code, so that the code +;;; which calculates the size of CATCH-BLOCK can be separated from the +;;; other lots-of-dependencies code, so that the code which calculates +;;; the size of CATCH-BLOCK can be executed early, so that this value +;;; is known properly at this point in compilation. However, that +;;; would be a lot of editing of code that I (WHN 19990131) can't test +;;; until the project is complete. So instead, I set the correct value +;;; by hand here (a sort of nondeterministic guess of the right +;;; answer:-) and add an assertion later, after the value is +;;; calculated, that the original guess was correct. +;;; +;;; (What a KLUDGE! Anyone who wants to come in and clean up this mess +;;; has my gratitude.) (FIXME: Maybe this should be me..) +(eval-when (:compile-toplevel :load-toplevel :execute) + (def!constant kludge-nondeterministic-catch-block-size 6)) + +(!define-storage-classes + + ;; non-immediate constants in the constant pool + (constant constant) + + ;; some FP constants can be generated in the i387 silicon + (fp-constant immediate-constant) + + (immediate immediate-constant) + + ;; + ;; the stacks + ;; + + ;; the control stack + (control-stack stack) ; may be pointers, scanned by GC + + ;; the non-descriptor stacks + (signed-stack stack) ; (signed-byte 32) + (unsigned-stack stack) ; (unsigned-byte 32) + (character-stack stack) ; non-descriptor characters. + (sap-stack stack) ; System area pointers. + (single-stack stack) ; single-floats + (double-stack stack :element-size 2) ; double-floats. + #!+long-float + (long-stack stack :element-size 3) ; long-floats. + (complex-single-stack stack :element-size 2) ; complex-single-floats + (complex-double-stack stack :element-size 4) ; complex-double-floats + #!+long-float + (complex-long-stack stack :element-size 6) ; complex-long-floats + + ;; + ;; magic SCs + ;; + + (ignore-me noise) + + ;; + ;; things that can go in the integer registers + ;; + + ;; On the X86, we don't have to distinguish between descriptor and + ;; non-descriptor registers, because of the conservative GC. + ;; Therefore, we use different scs only to distinguish between + ;; descriptor and non-descriptor values and to specify size. + + ;; immediate descriptor objects. Don't have to be seen by GC, but nothing + ;; bad will happen if they are. (fixnums, characters, header values, etc). + (any-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (control-stack)) + + ;; pointer descriptor objects -- must be seen by GC + (descriptor-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (constant immediate) + :save-p t + :alternate-scs (control-stack)) + + ;; non-descriptor characters + (character-reg registers + :locations #!-sb-unicode #.*byte-regs* + #!+sb-unicode #.*dword-regs* + #!-sb-unicode #!-sb-unicode + :reserve-locations (#.ah-offset #.al-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (character-stack)) + + ;; non-descriptor SAPs (arbitrary pointers into address space) + (sap-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (sap-stack)) + + ;; non-descriptor (signed or unsigned) numbers + (signed-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (signed-stack)) + (unsigned-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (unsigned-stack)) + + ;; miscellaneous objects that must not be seen by GC. Used only as + ;; temporaries. + (word-reg registers + :locations #.*word-regs* + :element-size 2 +; :reserve-locations (#.ax-offset) + ) + (byte-reg registers + :locations #.*byte-regs* +; :reserve-locations (#.al-offset #.ah-offset) + ) + + ;; that can go in the floating point registers + + ;; non-descriptor SINGLE-FLOATs + (single-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (single-stack)) + + ;; non-descriptor DOUBLE-FLOATs + (double-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (double-stack)) + + ;; non-descriptor LONG-FLOATs + #!+long-float + (long-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (long-stack)) + + (complex-single-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-single-stack)) + + (complex-double-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-double-stack)) + + #!+long-float + (complex-long-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-long-stack)) + + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +(eval-when (:compile-toplevel :load-toplevel :execute) +(defparameter *byte-sc-names* + '(#!-sb-unicode character-reg byte-reg #!-sb-unicode character-stack)) +(defparameter *word-sc-names* '(word-reg)) +(defparameter *dword-sc-names* + '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack + signed-stack unsigned-stack sap-stack single-stack + #!+sb-unicode character-reg #!+sb-unicode character-stack constant)) +;;; added by jrd. I guess the right thing to do is to treat floats +;;; as a separate size... +;;; +;;; These are used to (at least) determine operand size. +(defparameter *float-sc-names* '(single-reg)) +(defparameter *double-sc-names* '(double-reg double-stack)) +) ; EVAL-WHEN + +;;;; miscellaneous TNs for the various registers + +(macrolet ((def-misc-reg-tns (sc-name &rest reg-names) + (collect ((forms)) + (dolist (reg-name reg-names) + (let ((tn-name (symbolicate reg-name "-TN")) + (offset-name (symbolicate reg-name "-OFFSET"))) + ;; FIXME: It'd be good to have the special + ;; variables here be named with the *FOO* + ;; convention. + (forms `(defparameter ,tn-name + (make-random-tn :kind :normal + :sc (sc-or-lose ',sc-name) + :offset + ,offset-name))))) + `(progn ,@(forms))))) + + (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) + (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) + (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) + (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) + +;;; TNs for registers used to pass arguments +(defparameter *register-arg-tns* + (mapcar (lambda (register-arg-name) + (symbol-value (symbolicate register-arg-name "-TN"))) + *register-arg-names*)) + +;;; FIXME: doesn't seem to be used in SBCL +#| +;;; added by pw +(defparameter fp-constant-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'fp-constant) + :offset 31)) ; Offset doesn't get used. +|# + +;;; If value can be represented as an immediate constant, then return +;;; the appropriate SC number, otherwise return NIL. +(!def-vm-support-routine immediate-constant-sc (value) + (typecase value + ((or (integer #.sb!xc:most-negative-fixnum #.sb!xc:most-positive-fixnum) + #-sb-xc-host system-area-pointer character) + (sc-number-or-lose 'immediate)) + (symbol + (when (static-symbol-p value) + (sc-number-or-lose 'immediate))) + (single-float + (when (or (eql value 0f0) (eql value 1f0)) + (sc-number-or-lose 'fp-constant))) + (double-float + (when (or (eql value 0d0) (eql value 1d0)) + (sc-number-or-lose 'fp-constant))) + #!+long-float + (long-float + (when (or (eql value 0l0) (eql value 1l0) + (eql value pi) + (eql value (log 10l0 2l0)) + (eql value (log 2.718281828459045235360287471352662L0 2l0)) + (eql value (log 2l0 10l0)) + (eql value (log 2l0 2.718281828459045235360287471352662L0))) + (sc-number-or-lose 'fp-constant))))) + +;;;; miscellaneous function call parameters + +;;; offsets of special stack frame locations +(def!constant ocfp-save-offset 0) +(def!constant return-pc-save-offset 1) +(def!constant code-save-offset 2) + +;;; FIXME: This is a bad comment (changed since when?) and there are others +;;; like it in this file. It'd be nice to clarify them. Failing that deleting +;;; them or flagging them with KLUDGE might be better than nothing. +;;; +;;; names of these things seem to have changed. these aliases by jrd +(def!constant lra-save-offset return-pc-save-offset) + +(def!constant cfp-offset ebp-offset) ; pfw - needed by stuff in /code + ; related to signal context stuff + +;;; This is used by the debugger. +(def!constant single-value-return-byte-offset 2) + +;;; This function is called by debug output routines that want a pretty name +;;; for a TN's location. It returns a thing that can be printed with PRINC. +(!def-vm-support-routine location-print-name (tn) + (declare (type tn tn)) + (let* ((sc (tn-sc tn)) + (sb (sb-name (sc-sb sc))) + (offset (tn-offset tn))) + (ecase sb + (registers + (let* ((sc-name (sc-name sc)) + (name-vec (cond ((member sc-name *byte-sc-names*) + *byte-register-names*) + ((member sc-name *word-sc-names*) + *word-register-names*) + ((member sc-name *dword-sc-names*) + *dword-register-names*)))) + (or (and name-vec + (< -1 offset (length name-vec)) + (svref name-vec offset)) + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") + (noise (symbol-name (sc-name sc)))))) +;;; FIXME: Could this, and everything that uses it, be made #!+SB-SHOW? diff --git a/sbcl-src/src/compiler/x86/insts.lisp b/sbcl-src/src/compiler/x86/insts.lisp new file mode 100644 index 0000000..da683b8 --- /dev/null +++ b/sbcl-src/src/compiler/x86/insts.lisp @@ -0,0 +1,2794 @@ +;;;; that part of the description of the x86 instruction set (for +;;;; 80386 and above) which can live on the cross-compilation host + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") +;;; FIXME: SB!DISASSEM: prefixes are used so widely in this file that +;;; I wonder whether the separation of the disassembler from the +;;; virtual machine is valid or adds value. + +;;; Note: In CMU CL, this used to be a call to SET-DISASSEM-PARAMS. +(setf sb!disassem:*disassem-inst-alignment-bytes* 1) + +(deftype reg () '(unsigned-byte 3)) + +(def!constant +default-operand-size+ :dword) + +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) + +(defun offset-next (value dstate) + (declare (type integer value) + (type sb!disassem:disassem-state dstate)) + (+ (sb!disassem:dstate-next-addr dstate) value)) + +(defparameter *default-address-size* + ;; Actually, :DWORD is the only one really supported. + :dword) + +(defparameter *byte-reg-names* + #(al cl dl bl ah ch dh bh)) +(defparameter *word-reg-names* + #(ax cx dx bx sp bp si di)) +(defparameter *dword-reg-names* + #(eax ecx edx ebx esp ebp esi edi)) + +(defun print-reg-with-width (value width stream dstate) + (declare (ignore dstate)) + (princ (aref (ecase width + (:byte *byte-reg-names*) + (:word *word-reg-names*) + (:dword *dword-reg-names*)) + value) + stream) + ;; XXX plus should do some source-var notes + ) + +(defun print-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value + (sb!disassem:dstate-get-prop dstate 'width) + stream + dstate)) + +(defun print-word-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+) + stream + dstate)) + +(defun print-byte-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value :byte stream dstate)) + +(defun print-addr-reg (value stream dstate) + (declare (type reg value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg-with-width value *default-address-size* stream dstate)) + +(defun print-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-reg value stream dstate) + (print-mem-access value stream nil dstate))) + +;; Same as print-reg/mem, but prints an explicit size indicator for +;; memory references. +(defun print-sized-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-reg value stream dstate) + (print-mem-access value stream t dstate))) + +(defun print-byte-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-byte-reg value stream dstate) + (print-mem-access value stream t dstate))) + +(defun print-word-reg/mem (value stream dstate) + (declare (type (or list reg) value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (if (typep value 'reg) + (print-word-reg value stream dstate) + (print-mem-access value stream nil dstate))) + +(defun print-label (value stream dstate) + (declare (ignore dstate)) + (sb!disassem:princ16 value stream)) + +;;; Returns either an integer, meaning a register, or a list of +;;; (BASE-REG OFFSET INDEX-REG INDEX-SCALE), where any component +;;; may be missing or nil to indicate that it's not used or has the +;;; obvious default value (e.g., 1 for the index-scale). +(defun prefilter-reg/mem (value dstate) + (declare (type list value) + (type sb!disassem:disassem-state dstate)) + (let ((mod (car value)) + (r/m (cadr value))) + (declare (type (unsigned-byte 2) mod) + (type (unsigned-byte 3) r/m)) + (cond ((= mod #b11) + ;; registers + r/m) + ((= r/m #b100) + ;; sib byte + (let ((sib (sb!disassem:read-suffix 8 dstate))) + (declare (type (unsigned-byte 8) sib)) + (let ((base-reg (ldb (byte 3 0) sib)) + (index-reg (ldb (byte 3 3) sib)) + (index-scale (ldb (byte 2 6) sib))) + (declare (type (unsigned-byte 3) base-reg index-reg) + (type (unsigned-byte 2) index-scale)) + (let* ((offset + (case mod + (#b00 + (if (= base-reg #b101) + (sb!disassem:read-signed-suffix 32 dstate) + nil)) + (#b01 + (sb!disassem:read-signed-suffix 8 dstate)) + (#b10 + (sb!disassem:read-signed-suffix 32 dstate))))) + (list (if (and (= mod #b00) (= base-reg #b101)) nil base-reg) + offset + (if (= index-reg #b100) nil index-reg) + (ash 1 index-scale)))))) + ((and (= mod #b00) (= r/m #b101)) + (list nil (sb!disassem:read-signed-suffix 32 dstate)) ) + ((= mod #b00) + (list r/m)) + ((= mod #b01) + (list r/m (sb!disassem:read-signed-suffix 8 dstate))) + (t ; (= mod #b10) + (list r/m (sb!disassem:read-signed-suffix 32 dstate)))))) + + +;;; This is a sort of bogus prefilter that just stores the info globally for +;;; other people to use; it probably never gets printed. +(defun prefilter-width (value dstate) + (setf (sb!disassem:dstate-get-prop dstate 'width) + (if (zerop value) + :byte + (let ((word-width + ;; set by a prefix instruction + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (when (not (eql word-width +default-operand-size+)) + ;; Reset it. + (setf (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+)) + word-width)))) + +(defun read-address (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix (width-bits *default-address-size*) dstate)) + +(defun width-bits (width) + (ecase width + (:byte 8) + (:word 16) + (:dword 32) + (:dqword 128) + (:float 32) + (:double 64))) + +) ; EVAL-WHEN + +;;;; disassembler argument types + +(sb!disassem:define-arg-type displacement + :sign-extend t + :use-label #'offset-next + :printer (lambda (value stream dstate) + (sb!disassem:maybe-note-assembler-routine value nil dstate) + (print-label value stream dstate))) + +(sb!disassem:define-arg-type accum + :printer (lambda (value stream dstate) + (declare (ignore value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-reg 0 stream dstate))) + +(sb!disassem:define-arg-type word-accum + :printer (lambda (value stream dstate) + (declare (ignore value) + (type stream stream) + (type sb!disassem:disassem-state dstate)) + (print-word-reg 0 stream dstate))) + +(sb!disassem:define-arg-type reg + :printer #'print-reg) + +(sb!disassem:define-arg-type addr-reg + :printer #'print-addr-reg) + +(sb!disassem:define-arg-type word-reg + :printer #'print-word-reg) + +(sb!disassem:define-arg-type imm-addr + :prefilter #'read-address + :printer #'print-label) + +(sb!disassem:define-arg-type imm-data + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix + (width-bits (sb!disassem:dstate-get-prop dstate 'width)) + dstate))) + +(sb!disassem:define-arg-type signed-imm-data + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width (sb!disassem:dstate-get-prop dstate 'width))) + (sb!disassem:read-signed-suffix (width-bits width) dstate)))) + +(sb!disassem:define-arg-type signed-imm-byte + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 8 dstate))) + +(sb!disassem:define-arg-type signed-imm-dword + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate))) + +(sb!disassem:define-arg-type imm-word + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (sb!disassem:read-suffix (width-bits width) dstate)))) + +(sb!disassem:define-arg-type signed-imm-word + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (let ((width + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (sb!disassem:read-signed-suffix (width-bits width) dstate)))) + +;;; needed for the ret imm16 instruction +(sb!disassem:define-arg-type imm-word-16 + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-suffix 16 dstate))) + +(sb!disassem:define-arg-type reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-reg/mem) +(sb!disassem:define-arg-type sized-reg/mem + ;; Same as reg/mem, but prints an explicit size indicator for + ;; memory references. + :prefilter #'prefilter-reg/mem + :printer #'print-sized-reg/mem) +(sb!disassem:define-arg-type byte-reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-byte-reg/mem) +(sb!disassem:define-arg-type word-reg/mem + :prefilter #'prefilter-reg/mem + :printer #'print-word-reg/mem) + +;;; added by jrd +(eval-when (#-sb-xc :compile-toplevel :load-toplevel :execute) +(defun print-fp-reg (value stream dstate) + (declare (ignore dstate)) + (format stream "FR~D" value)) +(defun prefilter-fp-reg (value dstate) + ;; just return it + (declare (ignore dstate)) + value) +) ; EVAL-WHEN +(sb!disassem:define-arg-type fp-reg + :prefilter #'prefilter-fp-reg + :printer #'print-fp-reg) + +(sb!disassem:define-arg-type width + :prefilter #'prefilter-width + :printer (lambda (value stream dstate) + (if;; (zerop value) + (or (null value) + (and (numberp value) (zerop value))) ; zzz jrd + (princ 'b stream) + (let ((word-width + ;; set by a prefix instruction + (or (sb!disassem:dstate-get-prop dstate 'word-width) + +default-operand-size+))) + (princ (schar (symbol-name word-width) 0) stream))))) + +(eval-when (:compile-toplevel :load-toplevel :execute) +(defparameter *conditions* + '((:o . 0) + (:no . 1) + (:b . 2) (:nae . 2) (:c . 2) + (:nb . 3) (:ae . 3) (:nc . 3) + (:eq . 4) (:e . 4) (:z . 4) + (:ne . 5) (:nz . 5) + (:be . 6) (:na . 6) + (:nbe . 7) (:a . 7) + (:s . 8) + (:ns . 9) + (:p . 10) (:pe . 10) + (:np . 11) (:po . 11) + (:l . 12) (:nge . 12) + (:nl . 13) (:ge . 13) + (:le . 14) (:ng . 14) + (:nle . 15) (:g . 15))) +(defparameter *condition-name-vec* + (let ((vec (make-array 16 :initial-element nil))) + (dolist (cond *conditions*) + (when (null (aref vec (cdr cond))) + (setf (aref vec (cdr cond)) (car cond)))) + vec)) +) ; EVAL-WHEN + +;;; Set assembler parameters. (In CMU CL, this was done with +;;; a call to a macro DEF-ASSEMBLER-PARAMS.) +(eval-when (:compile-toplevel :load-toplevel :execute) + (setf sb!assem:*assem-scheduler-p* nil)) + +(sb!disassem:define-arg-type condition-code + :printer *condition-name-vec*) + +(defun conditional-opcode (condition) + (cdr (assoc condition *conditions* :test #'eq))) + +;;;; disassembler instruction formats + +(eval-when (:compile-toplevel :execute) + (defun swap-if (direction field1 separator field2) + `(:if (,direction :constant 0) + (,field1 ,separator ,field2) + (,field2 ,separator ,field1)))) + +(sb!disassem:define-instruction-format (byte 8 :default-printer '(:name)) + (op :field (byte 8 0)) + ;; optional fields + (accum :type 'accum) + (imm)) + +(sb!disassem:define-instruction-format (simple 8) + (op :field (byte 7 1)) + (width :field (byte 1 0) :type 'width) + ;; optional fields + (accum :type 'accum) + (imm)) + +;;; Same as simple, but with direction bit +(sb!disassem:define-instruction-format (simple-dir 8 :include 'simple) + (op :field (byte 6 2)) + (dir :field (byte 1 1))) + +;;; Same as simple, but with the immediate value occurring by default, +;;; and with an appropiate printer. +(sb!disassem:define-instruction-format (accum-imm 8 + :include 'simple + :default-printer '(:name + :tab accum ", " imm)) + (imm :type 'imm-data)) + +(sb!disassem:define-instruction-format (reg-no-width 8 + :default-printer '(:name :tab reg)) + (op :field (byte 5 3)) + (reg :field (byte 3 0) :type 'word-reg) + ;; optional fields + (accum :type 'word-accum) + (imm)) + +;;; adds a width field to reg-no-width +(sb!disassem:define-instruction-format (reg 8 + :default-printer '(:name :tab reg)) + (op :field (byte 4 4)) + (width :field (byte 1 3) :type 'width) + (reg :field (byte 3 0) :type 'reg) + ;; optional fields + (accum :type 'accum) + (imm) + ) + +;;; Same as reg, but with direction bit +(sb!disassem:define-instruction-format (reg-dir 8 :include 'reg) + (op :field (byte 3 5)) + (dir :field (byte 1 4))) + +(sb!disassem:define-instruction-format (two-bytes 16 + :default-printer '(:name)) + (op :fields (list (byte 8 0) (byte 8 8)))) + +(sb!disassem:define-instruction-format (reg-reg/mem 16 + :default-printer + `(:name :tab reg ", " reg/mem)) + (op :field (byte 7 1)) + (width :field (byte 1 0) :type 'width) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) + :type 'reg/mem) + (reg :field (byte 3 11) :type 'reg) + ;; optional fields + (imm)) + +;;; same as reg-reg/mem, but with direction bit +(sb!disassem:define-instruction-format (reg-reg/mem-dir 16 + :include 'reg-reg/mem + :default-printer + `(:name + :tab + ,(swap-if 'dir 'reg/mem ", " 'reg))) + (op :field (byte 6 2)) + (dir :field (byte 1 1))) + +;;; Same as reg-rem/mem, but uses the reg field as a second op code. +(sb!disassem:define-instruction-format (reg/mem 16 + :default-printer '(:name :tab reg/mem)) + (op :fields (list (byte 7 1) (byte 3 11))) + (width :field (byte 1 0) :type 'width) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) + :type 'sized-reg/mem) + ;; optional fields + (imm)) + +;;; Same as reg/mem, but with the immediate value occurring by default, +;;; and with an appropiate printer. +(sb!disassem:define-instruction-format (reg/mem-imm 16 + :include 'reg/mem + :default-printer + '(:name :tab reg/mem ", " imm)) + (reg/mem :type 'sized-reg/mem) + (imm :type 'imm-data)) + +;;; Same as reg/mem, but with using the accumulator in the default printer +(sb!disassem:define-instruction-format + (accum-reg/mem 16 + :include 'reg/mem :default-printer '(:name :tab accum ", " reg/mem)) + (reg/mem :type 'reg/mem) ; don't need a size + (accum :type 'accum)) + +;;; Same as reg-reg/mem, but with a prefix of #b00001111 +(sb!disassem:define-instruction-format (ext-reg-reg/mem 24 + :default-printer + `(:name :tab reg ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 7 9)) + (width :field (byte 1 8) :type 'width) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'reg/mem) + (reg :field (byte 3 19) :type 'reg) + ;; optional fields + (imm)) + +;;; Same as reg/mem, but with a prefix of #b00001111 +(sb!disassem:define-instruction-format (ext-reg/mem 24 + :default-printer '(:name :tab reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :fields (list (byte 7 9) (byte 3 19))) + (width :field (byte 1 8) :type 'width) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'sized-reg/mem) + ;; optional fields + (imm)) + +(sb!disassem:define-instruction-format (ext-reg/mem-imm 24 + :include 'ext-reg/mem + :default-printer + '(:name :tab reg/mem ", " imm)) + (imm :type 'imm-data)) + +;;;; This section was added by jrd, for fp instructions. + +;;; regular fp inst to/from registers/memory +(sb!disassem:define-instruction-format (floating-point 16 + :default-printer + `(:name :tab reg/mem)) + (prefix :field (byte 5 3) :value #b11011) + (op :fields (list (byte 3 0) (byte 3 11))) + (reg/mem :fields (list (byte 2 14) (byte 3 8)) :type 'reg/mem)) + +;;; fp insn to/from fp reg +(sb!disassem:define-instruction-format (floating-point-fp 16 + :default-printer `(:name :tab fp-reg)) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 3 0) (byte 3 11))) + (fp-reg :field (byte 3 8) :type 'fp-reg)) + +;;; fp insn to/from fp reg, with the reversed source/destination flag. +(sb!disassem:define-instruction-format + (floating-point-fp-d 16 + :default-printer `(:name :tab ,(swap-if 'd "ST0" ", " 'fp-reg))) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 2 0) (byte 3 11))) + (d :field (byte 1 2)) + (fp-reg :field (byte 3 8) :type 'fp-reg)) + + +;;; (added by (?) pfw) +;;; fp no operand isns +(sb!disassem:define-instruction-format (floating-point-no 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011001) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (floating-point-3 16 + :default-printer '(:name)) + (prefix :field (byte 5 3) :value #b11011) + (suffix :field (byte 2 14) :value #b11) + (op :fields (list (byte 3 0) (byte 6 8)))) + +(sb!disassem:define-instruction-format (floating-point-5 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011011) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (floating-point-st 16 + :default-printer '(:name)) + (prefix :field (byte 8 0) :value #b11011111) + (suffix :field (byte 3 13) :value #b111) + (op :field (byte 5 8))) + +(sb!disassem:define-instruction-format (string-op 8 + :include 'simple + :default-printer '(:name width))) + +(sb!disassem:define-instruction-format (short-cond-jump 16) + (op :field (byte 4 4)) + (cc :field (byte 4 0) :type 'condition-code) + (label :field (byte 8 8) :type 'displacement)) + +(sb!disassem:define-instruction-format (short-jump 16 + :default-printer '(:name :tab label)) + (const :field (byte 4 4) :value #b1110) + (op :field (byte 4 0)) + (label :field (byte 8 8) :type 'displacement)) + +(sb!disassem:define-instruction-format (near-cond-jump 16) + (op :fields (list (byte 8 0) (byte 4 12)) :value '(#b00001111 #b1000)) + (cc :field (byte 4 8) :type 'condition-code) + ;; The disassembler currently doesn't let you have an instruction > 32 bits + ;; long, so we fake it by using a prefilter to read the offset. + (label :type 'displacement + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate)))) + +(sb!disassem:define-instruction-format (near-jump 8 + :default-printer '(:name :tab label)) + (op :field (byte 8 0)) + ;; The disassembler currently doesn't let you have an instruction > 32 bits + ;; long, so we fake it by using a prefilter to read the address. + (label :type 'displacement + :prefilter (lambda (value dstate) + (declare (ignore value)) ; always nil anyway + (sb!disassem:read-signed-suffix 32 dstate)))) + + +(sb!disassem:define-instruction-format (cond-set 24 + :default-printer '('set cc :tab reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 4 12) :value #b1001) + (cc :field (byte 4 8) :type 'condition-code) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'byte-reg/mem) + (reg :field (byte 3 19) :value #b000)) + +(sb!disassem:define-instruction-format (cond-move 24 + :default-printer + '('cmov cc :tab reg ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 4 12) :value #b0100) + (cc :field (byte 4 8) :type 'condition-code) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) + :type 'reg/mem) + (reg :field (byte 3 19) :type 'reg)) + +(sb!disassem:define-instruction-format (enter-format 32 + :default-printer '(:name + :tab disp + (:unless (:constant 0) + ", " level))) + (op :field (byte 8 0)) + (disp :field (byte 16 8)) + (level :field (byte 8 24))) + +(sb!disassem:define-instruction-format (prefetch 24 + :default-printer + '(:name ", " reg/mem)) + (prefix :field (byte 8 0) :value #b00001111) + (op :field (byte 8 8) :value #b00011000) + (reg/mem :fields (list (byte 2 22) (byte 3 16)) :type 'byte-reg/mem) + (reg :field (byte 3 19) :type 'reg)) + +;;; Single byte instruction with an immediate byte argument. +(sb!disassem:define-instruction-format (byte-imm 16 + :default-printer '(:name :tab code)) + (op :field (byte 8 0)) + (code :field (byte 8 8))) + +;;;; primitive emitters + +(define-bitfield-emitter emit-word 16 + (byte 16 0)) + +(define-bitfield-emitter emit-dword 32 + (byte 32 0)) + +(define-bitfield-emitter emit-byte-with-reg 8 + (byte 5 3) (byte 3 0)) + +(define-bitfield-emitter emit-mod-reg-r/m-byte 8 + (byte 2 6) (byte 3 3) (byte 3 0)) + +(define-bitfield-emitter emit-sib-byte 8 + (byte 2 6) (byte 3 3) (byte 3 0)) + +;;;; fixup emitters + +(defun emit-absolute-fixup (segment fixup) + (note-fixup segment :absolute fixup) + (let ((offset (fixup-offset fixup))) + (if (label-p offset) + (emit-back-patch segment + 4 ; FIXME: n-word-bytes + (lambda (segment posn) + (declare (ignore posn)) + (emit-dword segment + (- (+ (component-header-length) + (or (label-position offset) + 0)) + other-pointer-lowtag)))) + (emit-dword segment (or offset 0))))) + +(defun emit-relative-fixup (segment fixup) + (note-fixup segment :relative fixup) + (emit-dword segment (or (fixup-offset fixup) 0))) + +;;;; the effective-address (ea) structure + +(defun reg-tn-encoding (tn) + (declare (type tn tn)) +; (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) + +(defstruct (ea (:constructor make-ea (size &key base index scale disp)) + (:copier nil)) + (size nil :type (member :byte :word :dword)) + (base nil :type (or tn null)) + (index nil :type (or tn null)) + (scale 1 :type (member 1 2 4 8)) + (disp 0 :type (or (unsigned-byte 32) (signed-byte 32) fixup))) +(def!method print-object ((ea ea) stream) + (cond ((or *print-escape* *print-readably*) + (print-unreadable-object (ea stream :type t) + (format stream + "~S~@[ base=~S~]~@[ index=~S~]~@[ scale=~S~]~@[ disp=~S~]" + (ea-size ea) + (ea-base ea) + (ea-index ea) + (let ((scale (ea-scale ea))) + (if (= scale 1) nil scale)) + (ea-disp ea)))) + (t + (format stream "~A PTR [" (symbol-name (ea-size ea))) + (when (ea-base ea) + (write-string (sb!c::location-print-name (ea-base ea)) stream) + (when (ea-index ea) + (write-string "+" stream))) + (when (ea-index ea) + (write-string (sb!c::location-print-name (ea-index ea)) stream)) + (unless (= (ea-scale ea) 1) + (format stream "*~A" (ea-scale ea))) + (typecase (ea-disp ea) + (null) + (integer + (format stream "~@D" (ea-disp ea))) + (t + (format stream "+~A" (ea-disp ea)))) + (write-char #\] stream)))) + +(defun emit-ea (segment thing reg &optional allow-constants) + (etypecase thing + (tn + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (sse-registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) + (cond ((< -128 disp 127) + (emit-mod-reg-r/m-byte segment #b01 reg #b101) + (emit-byte segment disp)) + (t + (emit-mod-reg-r/m-byte segment #b10 reg #b101) + (emit-dword segment disp))))) + (constant + (unless allow-constants + (error + "Constant TNs can only be directly used in MOV, PUSH, and CMP.")) + (emit-mod-reg-r/m-byte segment #b00 reg #b101) + (emit-absolute-fixup segment + (make-fixup nil + :code-object + (- (* (tn-offset thing) n-word-bytes) + other-pointer-lowtag)))))) + (ea + (let* ((base (ea-base thing)) + (index (ea-index thing)) + (scale (ea-scale thing)) + (disp (ea-disp thing)) + (mod (cond ((or (null base) + (and (eql disp 0) + (not (= (reg-tn-encoding base) #b101)))) + #b00) + ((and (fixnump disp) (<= -128 disp 127)) + #b01) + (t + #b10))) + (r/m (cond (index #b100) + ((null base) #b101) + (t (reg-tn-encoding base))))) + (emit-mod-reg-r/m-byte segment mod reg r/m) + (when (= r/m #b100) + (let ((ss (1- (integer-length scale))) + (index (if (null index) + #b100 + (let ((index (reg-tn-encoding index))) + (if (= index #b100) + (error "can't index off of ESP") + index)))) + (base (if (null base) + #b101 + (reg-tn-encoding base)))) + (emit-sib-byte segment ss index base))) + (cond ((= mod #b01) + (emit-byte segment disp)) + ((or (= mod #b10) (null base)) + (if (fixup-p disp) + (emit-absolute-fixup segment disp) + (emit-dword segment disp)))))) + (fixup + (emit-mod-reg-r/m-byte segment #b00 reg #b101) + (emit-absolute-fixup segment thing)))) + +(defun fp-reg-tn-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'float-registers))) + +;;; like the above, but for fp-instructions--jrd +(defun emit-fp-op (segment thing op) + (if (fp-reg-tn-p thing) + (emit-byte segment (dpb op (byte 3 3) (dpb (tn-offset thing) + (byte 3 0) + #b11000000))) + (emit-ea segment thing op))) + +(defun byte-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *byte-sc-names*) + t)) + +(defun byte-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :byte)) + (tn + (and (member (sc-name (tn-sc thing)) *byte-sc-names*) t)) + (t nil))) + +(defun word-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *word-sc-names*) + t)) + +(defun word-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :word)) + (tn (and (member (sc-name (tn-sc thing)) *word-sc-names*) t)) + (t nil))) + +(defun dword-reg-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers) + (member (sc-name (tn-sc thing)) *dword-sc-names*) + t)) + +(defun dword-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :dword)) + (tn + (and (member (sc-name (tn-sc thing)) *dword-sc-names*) t)) + (t nil))) + +(defun register-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + +(defun sse-register-p (thing) + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) + +(defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) + +;;;; utilities + +(def!constant +operand-size-prefix-byte+ #b01100110) + +(defun maybe-emit-operand-size-prefix (segment size) + (unless (or (eq size :byte) (eq size +default-operand-size+)) + (emit-byte segment +operand-size-prefix-byte+))) + +(defun operand-size (thing) + (typecase thing + (tn + ;; FIXME: might as well be COND instead of having to use #. readmacro + ;; to hack up the code + (case (sc-name (tn-sc thing)) + (#.*dword-sc-names* + :dword) + (#.*word-sc-names* + :word) + (#.*byte-sc-names* + :byte) + ;; added by jrd: float-registers is a separate size (?) + (#.*float-sc-names* + :float) + (#.*double-sc-names* + :double) + (t + (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) + (ea + (ea-size thing)) + (t + nil))) + +(defun matching-operand-size (dst src) + (let ((dst-size (operand-size dst)) + (src-size (operand-size src))) + (if dst-size + (if src-size + (if (eq dst-size src-size) + dst-size + (error "size mismatch: ~S is a ~S and ~S is a ~S." + dst dst-size src src-size)) + dst-size) + (if src-size + src-size + (error "can't tell the size of either ~S or ~S" dst src))))) + +(defun emit-sized-immediate (segment size value) + (ecase size + (:byte + (emit-byte segment value)) + (:word + (emit-word segment value)) + (:dword + (emit-dword segment value)))) + +;;;; general data transfer + +(define-instruction mov (segment dst src) + ;; immediate to register + (:printer reg ((op #b1011) (imm nil :type 'imm-data)) + '(:name :tab reg ", " imm)) + ;; absolute mem to/from accumulator + (:printer simple-dir ((op #b101000) (imm nil :type 'imm-addr)) + `(:name :tab ,(swap-if 'dir 'accum ", " '("[" imm "]")))) + ;; register to/from register/memory + (:printer reg-reg/mem-dir ((op #b100010))) + ;; immediate to register/memory + (:printer reg/mem-imm ((op '(#b1100011 #b000)))) + + (:emitter + (let ((size (matching-operand-size dst src))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p dst) + (cond ((integerp src) + (emit-byte-with-reg segment + (if (eq size :byte) + #b10110 + #b10111) + (reg-tn-encoding dst)) + (emit-sized-immediate segment size src)) + ((and (fixup-p src) (accumulator-p dst)) + (emit-byte segment + (if (eq size :byte) + #b10100000 + #b10100001)) + (emit-absolute-fixup segment src)) + (t + (emit-byte segment + (if (eq size :byte) + #b10001010 + #b10001011)) + (emit-ea segment src (reg-tn-encoding dst) t)))) + ((and (fixup-p dst) (accumulator-p src)) + (emit-byte segment (if (eq size :byte) #b10100010 #b10100011)) + (emit-absolute-fixup segment dst)) + ((integerp src) + (emit-byte segment (if (eq size :byte) #b11000110 #b11000111)) + (emit-ea segment dst #b000) + (emit-sized-immediate segment size src)) + ((register-p src) + (emit-byte segment (if (eq size :byte) #b10001000 #b10001001)) + (emit-ea segment dst (reg-tn-encoding src))) + ((fixup-p src) + (aver (eq size :dword)) + (emit-byte segment #b11000111) + (emit-ea segment dst #b000) + (emit-absolute-fixup segment src)) + (t + (error "bogus arguments to MOV: ~S ~S" dst src)))))) + +(defun emit-move-with-extension (segment dst src opcode) + (aver (register-p dst)) + (let ((dst-size (operand-size dst)) + (src-size (operand-size src))) + (ecase dst-size + (:word + (aver (eq src-size :byte)) + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b00001111) + (emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))) + (:dword + (ecase src-size + (:byte + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b00001111) + (emit-byte segment opcode) + (emit-ea segment src (reg-tn-encoding dst))) + (:word + (emit-byte segment #b00001111) + (emit-byte segment (logior opcode 1)) + (emit-ea segment src (reg-tn-encoding dst)))))))) + +(define-instruction movsx (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011111) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src #b10111110))) + +(define-instruction movzx (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011011) (reg nil :type 'word-reg))) + (:emitter (emit-move-with-extension segment dst src #b10110110))) + +(define-instruction push (segment src) + ;; register + (:printer reg-no-width ((op #b01010))) + ;; register/memory + (:printer reg/mem ((op '(#b1111111 #b110)) (width 1))) + ;; immediate + (:printer byte ((op #b01101010) (imm nil :type 'signed-imm-byte)) + '(:name :tab imm)) + (:printer byte ((op #b01101000) (imm nil :type 'imm-word)) + '(:name :tab imm)) + ;; ### segment registers? + + (:emitter + (cond ((integerp src) + (cond ((<= -128 src 127) + (emit-byte segment #b01101010) + (emit-byte segment src)) + (t + (emit-byte segment #b01101000) + (emit-dword segment src)))) + ((fixup-p src) + ;; Interpret the fixup as an immediate dword to push. + (emit-byte segment #b01101000) + (emit-absolute-fixup segment src)) + (t + (let ((size (operand-size src))) + (aver (not (eq size :byte))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p src) + (emit-byte-with-reg segment #b01010 (reg-tn-encoding src))) + (t + (emit-byte segment #b11111111) + (emit-ea segment src #b110 t)))))))) + +(define-instruction pusha (segment) + (:printer byte ((op #b01100000))) + (:emitter + (emit-byte segment #b01100000))) + +(define-instruction pop (segment dst) + (:printer reg-no-width ((op #b01011))) + (:printer reg/mem ((op '(#b1000111 #b000)) (width 1))) + (:emitter + (let ((size (operand-size dst))) + (aver (not (eq size :byte))) + (maybe-emit-operand-size-prefix segment size) + (cond ((register-p dst) + (emit-byte-with-reg segment #b01011 (reg-tn-encoding dst))) + (t + (emit-byte segment #b10001111) + (emit-ea segment dst #b000)))))) + +(define-instruction popa (segment) + (:printer byte ((op #b01100001))) + (:emitter + (emit-byte segment #b01100001))) + +(define-instruction xchg (segment operand1 operand2) + ;; Register with accumulator. + (:printer reg-no-width ((op #b10010)) '(:name :tab accum ", " reg)) + ;; Register/Memory with Register. + (:printer reg-reg/mem ((op #b1000011))) + (:emitter + (let ((size (matching-operand-size operand1 operand2))) + (maybe-emit-operand-size-prefix segment size) + (labels ((xchg-acc-with-something (acc something) + (if (and (not (eq size :byte)) (register-p something)) + (emit-byte-with-reg segment + #b10010 + (reg-tn-encoding something)) + (xchg-reg-with-something acc something))) + (xchg-reg-with-something (reg something) + (emit-byte segment (if (eq size :byte) #b10000110 #b10000111)) + (emit-ea segment something (reg-tn-encoding reg)))) + (cond ((accumulator-p operand1) + (xchg-acc-with-something operand1 operand2)) + ((accumulator-p operand2) + (xchg-acc-with-something operand2 operand1)) + ((register-p operand1) + (xchg-reg-with-something operand1 operand2)) + ((register-p operand2) + (xchg-reg-with-something operand2 operand1)) + (t + (error "bogus args to XCHG: ~S ~S" operand1 operand2))))))) + +(define-instruction lea (segment dst src) + (:printer reg-reg/mem ((op #b1000110) (width 1))) + (:emitter + (aver (dword-reg-p dst)) + (emit-byte segment #b10001101) + (emit-ea segment src (reg-tn-encoding dst)))) + +(define-instruction cmpxchg (segment dst src) + ;; Register/Memory with Register. + (:printer ext-reg-reg/mem ((op #b1011000)) '(:name :tab reg/mem ", " reg)) + (:emitter + (aver (register-p src)) + (let ((size (matching-operand-size src dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment (if (eq size :byte) #b10110000 #b10110001)) + (emit-ea segment dst (reg-tn-encoding src))))) + + + +(define-instruction fs-segment-prefix (segment) + (:emitter + (emit-byte segment #x64))) + +;;;; flag control instructions + +;;; CLC -- Clear Carry Flag. +(define-instruction clc (segment) + (:printer byte ((op #b11111000))) + (:emitter + (emit-byte segment #b11111000))) + +;;; CLD -- Clear Direction Flag. +(define-instruction cld (segment) + (:printer byte ((op #b11111100))) + (:emitter + (emit-byte segment #b11111100))) + +;;; CLI -- Clear Iterrupt Enable Flag. +(define-instruction cli (segment) + (:printer byte ((op #b11111010))) + (:emitter + (emit-byte segment #b11111010))) + +;;; CMC -- Complement Carry Flag. +(define-instruction cmc (segment) + (:printer byte ((op #b11110101))) + (:emitter + (emit-byte segment #b11110101))) + +;;; LAHF -- Load AH into flags. +(define-instruction lahf (segment) + (:printer byte ((op #b10011111))) + (:emitter + (emit-byte segment #b10011111))) + +;;; POPF -- Pop flags. +(define-instruction popf (segment) + (:printer byte ((op #b10011101))) + (:emitter + (emit-byte segment #b10011101))) + +;;; PUSHF -- push flags. +(define-instruction pushf (segment) + (:printer byte ((op #b10011100))) + (:emitter + (emit-byte segment #b10011100))) + +;;; SAHF -- Store AH into flags. +(define-instruction sahf (segment) + (:printer byte ((op #b10011110))) + (:emitter + (emit-byte segment #b10011110))) + +;;; STC -- Set Carry Flag. +(define-instruction stc (segment) + (:printer byte ((op #b11111001))) + (:emitter + (emit-byte segment #b11111001))) + +;;; STD -- Set Direction Flag. +(define-instruction std (segment) + (:printer byte ((op #b11111101))) + (:emitter + (emit-byte segment #b11111101))) + +;;; STI -- Set Interrupt Enable Flag. +(define-instruction sti (segment) + (:printer byte ((op #b11111011))) + (:emitter + (emit-byte segment #b11111011))) + +;;;; arithmetic + +(defun emit-random-arith-inst (name segment dst src opcode + &optional allow-constants) + (let ((size (matching-operand-size dst src))) + (maybe-emit-operand-size-prefix segment size) + (cond + ((integerp src) + (cond ((and (not (eq size :byte)) (<= -128 src 127)) + (emit-byte segment #b10000011) + (emit-ea segment dst opcode allow-constants) + (emit-byte segment src)) + ((accumulator-p dst) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) + #b00000100 + #b00000101))) + (emit-sized-immediate segment size src)) + (t + (emit-byte segment (if (eq size :byte) #b10000000 #b10000001)) + (emit-ea segment dst opcode allow-constants) + (emit-sized-immediate segment size src)))) + ((register-p src) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) #b00000000 #b00000001))) + (emit-ea segment dst (reg-tn-encoding src) allow-constants)) + ((register-p dst) + (emit-byte segment + (dpb opcode + (byte 3 3) + (if (eq size :byte) #b00000010 #b00000011))) + (emit-ea segment src (reg-tn-encoding dst) allow-constants)) + (t + (error "bogus operands to ~A" name))))) + +(eval-when (:compile-toplevel :execute) + (defun arith-inst-printer-list (subop) + `((accum-imm ((op ,(dpb subop (byte 3 2) #b0000010)))) + (reg/mem-imm ((op (#b1000000 ,subop)))) + (reg/mem-imm ((op (#b1000001 ,subop)) + (imm nil :type signed-imm-byte))) + (reg-reg/mem-dir ((op ,(dpb subop (byte 3 1) #b000000)))))) + ) + +(define-instruction add (segment dst src) + (:printer-list (arith-inst-printer-list #b000)) + (:emitter (emit-random-arith-inst "ADD" segment dst src #b000))) + +(define-instruction adc (segment dst src) + (:printer-list (arith-inst-printer-list #b010)) + (:emitter (emit-random-arith-inst "ADC" segment dst src #b010))) + +(define-instruction sub (segment dst src) + (:printer-list (arith-inst-printer-list #b101)) + (:emitter (emit-random-arith-inst "SUB" segment dst src #b101))) + +(define-instruction sbb (segment dst src) + (:printer-list (arith-inst-printer-list #b011)) + (:emitter (emit-random-arith-inst "SBB" segment dst src #b011))) + +(define-instruction cmp (segment dst src) + (:printer-list (arith-inst-printer-list #b111)) + (:emitter (emit-random-arith-inst "CMP" segment dst src #b111 t))) + +(define-instruction inc (segment dst) + ;; Register. + (:printer reg-no-width ((op #b01000))) + ;; Register/Memory + (:printer reg/mem ((op '(#b1111111 #b000)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (cond ((and (not (eq size :byte)) (register-p dst)) + (emit-byte-with-reg segment #b01000 (reg-tn-encoding dst))) + (t + (emit-byte segment (if (eq size :byte) #b11111110 #b11111111)) + (emit-ea segment dst #b000)))))) + +(define-instruction dec (segment dst) + ;; Register. + (:printer reg-no-width ((op #b01001))) + ;; Register/Memory + (:printer reg/mem ((op '(#b1111111 #b001)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (cond ((and (not (eq size :byte)) (register-p dst)) + (emit-byte-with-reg segment #b01001 (reg-tn-encoding dst))) + (t + (emit-byte segment (if (eq size :byte) #b11111110 #b11111111)) + (emit-ea segment dst #b001)))))) + +(define-instruction neg (segment dst) + (:printer reg/mem ((op '(#b1111011 #b011)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b011)))) + +(define-instruction aaa (segment) + (:printer byte ((op #b00110111))) + (:emitter + (emit-byte segment #b00110111))) + +(define-instruction aas (segment) + (:printer byte ((op #b00111111))) + (:emitter + (emit-byte segment #b00111111))) + +(define-instruction daa (segment) + (:printer byte ((op #b00100111))) + (:emitter + (emit-byte segment #b00100111))) + +(define-instruction das (segment) + (:printer byte ((op #b00101111))) + (:emitter + (emit-byte segment #b00101111))) + +(define-instruction mul (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b100)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b100)))) + +(define-instruction imul (segment dst &optional src1 src2) + (:printer accum-reg/mem ((op '(#b1111011 #b101)))) + (:printer ext-reg-reg/mem ((op #b1010111))) + (:printer reg-reg/mem ((op #b0110100) (width 1) + (imm nil :type 'signed-imm-word)) + '(:name :tab reg ", " reg/mem ", " imm)) + (:printer reg-reg/mem ((op #b0110101) (width 1) + (imm nil :type 'signed-imm-byte)) + '(:name :tab reg ", " reg/mem ", " imm)) + (:emitter + (flet ((r/m-with-immed-to-reg (reg r/m immed) + (let* ((size (matching-operand-size reg r/m)) + (sx (and (not (eq size :byte)) (<= -128 immed 127)))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if sx #b01101011 #b01101001)) + (emit-ea segment r/m (reg-tn-encoding reg)) + (if sx + (emit-byte segment immed) + (emit-sized-immediate segment size immed))))) + (cond (src2 + (r/m-with-immed-to-reg dst src1 src2)) + (src1 + (if (integerp src1) + (r/m-with-immed-to-reg dst dst src1) + (let ((size (matching-operand-size dst src1))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment #b10101111) + (emit-ea segment src1 (reg-tn-encoding dst))))) + (t + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b101))))))) + +(define-instruction div (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b110)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b110)))) + +(define-instruction idiv (segment dst src) + (:printer accum-reg/mem ((op '(#b1111011 #b111)))) + (:emitter + (let ((size (matching-operand-size dst src))) + (aver (accumulator-p dst)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment src #b111)))) + +(define-instruction aad (segment) + (:printer two-bytes ((op '(#b11010101 #b00001010)))) + (:emitter + (emit-byte segment #b11010101) + (emit-byte segment #b00001010))) + +(define-instruction aam (segment) + (:printer two-bytes ((op '(#b11010100 #b00001010)))) + (:emitter + (emit-byte segment #b11010100) + (emit-byte segment #b00001010))) + +;;; CBW -- Convert Byte to Word. AX <- sign_xtnd(AL) +(define-instruction cbw (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b10011000))) + +;;; CWDE -- Convert Word To Double Word Extened. EAX <- sign_xtnd(AX) +(define-instruction cwde (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b10011000))) + +;;; CWD -- Convert Word to Double Word. DX:AX <- sign_xtnd(AX) +(define-instruction cwd (segment) + (:emitter + (maybe-emit-operand-size-prefix segment :word) + (emit-byte segment #b10011001))) + +;;; CDQ -- Convert Double Word to Quad Word. EDX:EAX <- sign_xtnd(EAX) +(define-instruction cdq (segment) + (:printer byte ((op #b10011001))) + (:emitter + (maybe-emit-operand-size-prefix segment :dword) + (emit-byte segment #b10011001))) + +(define-instruction xadd (segment dst src) + ;; Register/Memory with Register. + (:printer ext-reg-reg/mem ((op #b1100000)) '(:name :tab reg/mem ", " reg)) + (:emitter + (aver (register-p src)) + (let ((size (matching-operand-size src dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment (if (eq size :byte) #b11000000 #b11000001)) + (emit-ea segment dst (reg-tn-encoding src))))) + + +;;;; logic + +(defun emit-shift-inst (segment dst amount opcode) + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (multiple-value-bind (major-opcode immed) + (case amount + (:cl (values #b11010010 nil)) + (1 (values #b11010000 nil)) + (t (values #b11000000 t))) + (emit-byte segment + (if (eq size :byte) major-opcode (logior major-opcode 1))) + (emit-ea segment dst opcode) + (when immed + (emit-byte segment amount))))) + +(eval-when (:compile-toplevel :execute) + (defun shift-inst-printer-list (subop) + `((reg/mem ((op (#b1101000 ,subop))) + (:name :tab reg/mem ", 1")) + (reg/mem ((op (#b1101001 ,subop))) + (:name :tab reg/mem ", " 'cl)) + (reg/mem-imm ((op (#b1100000 ,subop)) + (imm nil :type signed-imm-byte)))))) + +(define-instruction rol (segment dst amount) + (:printer-list + (shift-inst-printer-list #b000)) + (:emitter + (emit-shift-inst segment dst amount #b000))) + +(define-instruction ror (segment dst amount) + (:printer-list + (shift-inst-printer-list #b001)) + (:emitter + (emit-shift-inst segment dst amount #b001))) + +(define-instruction rcl (segment dst amount) + (:printer-list + (shift-inst-printer-list #b010)) + (:emitter + (emit-shift-inst segment dst amount #b010))) + +(define-instruction rcr (segment dst amount) + (:printer-list + (shift-inst-printer-list #b011)) + (:emitter + (emit-shift-inst segment dst amount #b011))) + +(define-instruction shl (segment dst amount) + (:printer-list + (shift-inst-printer-list #b100)) + (:emitter + (emit-shift-inst segment dst amount #b100))) + +(define-instruction shr (segment dst amount) + (:printer-list + (shift-inst-printer-list #b101)) + (:emitter + (emit-shift-inst segment dst amount #b101))) + +(define-instruction sar (segment dst amount) + (:printer-list + (shift-inst-printer-list #b111)) + (:emitter + (emit-shift-inst segment dst amount #b111))) + +(defun emit-double-shift (segment opcode dst src amt) + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "Double shifts can only be used with words.")) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment (dpb opcode (byte 1 3) + (if (eq amt :cl) #b10100101 #b10100100))) + #+nil + (emit-ea segment dst src) + (emit-ea segment dst (reg-tn-encoding src)) ; pw tries this + (unless (eq amt :cl) + (emit-byte segment amt)))) + +(eval-when (:compile-toplevel :execute) + (defun double-shift-inst-printer-list (op) + `(#+nil + (ext-reg-reg/mem-imm ((op ,(logior op #b10)) + (imm nil :type signed-imm-byte))) + (ext-reg-reg/mem ((op ,(logior op #b10))) + (:name :tab reg/mem ", " reg ", " 'cl))))) + +(define-instruction shld (segment dst src amt) + (:declare (type (or (member :cl) (mod 32)) amt)) + (:printer-list (double-shift-inst-printer-list #b1010000)) + (:emitter + (emit-double-shift segment #b0 dst src amt))) + +(define-instruction shrd (segment dst src amt) + (:declare (type (or (member :cl) (mod 32)) amt)) + (:printer-list (double-shift-inst-printer-list #b1010100)) + (:emitter + (emit-double-shift segment #b1 dst src amt))) + +(define-instruction and (segment dst src) + (:printer-list + (arith-inst-printer-list #b100)) + (:emitter + (emit-random-arith-inst "AND" segment dst src #b100))) + +(define-instruction test (segment this that) + (:printer accum-imm ((op #b1010100))) + (:printer reg/mem-imm ((op '(#b1111011 #b000)))) + (:printer reg-reg/mem ((op #b1000010))) + (:emitter + (let ((size (matching-operand-size this that))) + (maybe-emit-operand-size-prefix segment size) + (flet ((test-immed-and-something (immed something) + (cond ((accumulator-p something) + (emit-byte segment + (if (eq size :byte) #b10101000 #b10101001)) + (emit-sized-immediate segment size immed)) + (t + (emit-byte segment + (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment something #b000) + (emit-sized-immediate segment size immed)))) + (test-reg-and-something (reg something) + (emit-byte segment (if (eq size :byte) #b10000100 #b10000101)) + (emit-ea segment something (reg-tn-encoding reg)))) + (cond ((integerp that) + (test-immed-and-something that this)) + ((integerp this) + (test-immed-and-something this that)) + ((register-p this) + (test-reg-and-something this that)) + ((register-p that) + (test-reg-and-something that this)) + (t + (error "bogus operands for TEST: ~S and ~S" this that))))))) + +(define-instruction or (segment dst src) + (:printer-list + (arith-inst-printer-list #b001)) + (:emitter + (emit-random-arith-inst "OR" segment dst src #b001))) + +(define-instruction xor (segment dst src) + (:printer-list + (arith-inst-printer-list #b110)) + (:emitter + (emit-random-arith-inst "XOR" segment dst src #b110))) + +(define-instruction not (segment dst) + (:printer reg/mem ((op '(#b1111011 #b010)))) + (:emitter + (let ((size (operand-size dst))) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b11110110 #b11110111)) + (emit-ea segment dst #b010)))) + +;;;; string manipulation + +(define-instruction cmps (segment size) + (:printer string-op ((op #b1010011))) + (:emitter + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10100110 #b10100111)))) + +(define-instruction ins (segment acc) + (:printer string-op ((op #b0110110))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b01101100 #b01101101))))) + +(define-instruction lods (segment acc) + (:printer string-op ((op #b1010110))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10101100 #b10101101))))) + +(define-instruction movs (segment size) + (:printer string-op ((op #b1010010))) + (:emitter + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10100100 #b10100101)))) + +(define-instruction outs (segment acc) + (:printer string-op ((op #b0110111))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b01101110 #b01101111))))) + +(define-instruction scas (segment acc) + (:printer string-op ((op #b1010111))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10101110 #b10101111))))) + +(define-instruction stos (segment acc) + (:printer string-op ((op #b1010101))) + (:emitter + (let ((size (operand-size acc))) + (aver (accumulator-p acc)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment (if (eq size :byte) #b10101010 #b10101011))))) + +(define-instruction xlat (segment) + (:printer byte ((op #b11010111))) + (:emitter + (emit-byte segment #b11010111))) + +(define-instruction rep (segment) + (:emitter + (emit-byte segment #b11110010))) + +(define-instruction repe (segment) + (:printer byte ((op #b11110011))) + (:emitter + (emit-byte segment #b11110011))) + +(define-instruction repne (segment) + (:printer byte ((op #b11110010))) + (:emitter + (emit-byte segment #b11110010))) + + +;;;; bit manipulation + +(define-instruction bsf (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011110) (width 0))) + (:emitter + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment #b10111100) + (emit-ea segment src (reg-tn-encoding dst))))) + +(define-instruction bsr (segment dst src) + (:printer ext-reg-reg/mem ((op #b1011110) (width 1))) + (:emitter + (let ((size (matching-operand-size dst src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (emit-byte segment #b10111101) + (emit-ea segment src (reg-tn-encoding dst))))) + +(defun emit-bit-test-and-mumble (segment src index opcode) + (let ((size (operand-size src))) + (when (eq size :byte) + (error "can't scan bytes: ~S" src)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b00001111) + (cond ((integerp index) + (emit-byte segment #b10111010) + (emit-ea segment src opcode) + (emit-byte segment index)) + (t + (emit-byte segment (dpb opcode (byte 3 3) #b10000011)) + (emit-ea segment src (reg-tn-encoding index)))))) + +(eval-when (:compile-toplevel :execute) + (defun bit-test-inst-printer-list (subop) + `((ext-reg/mem-imm ((op (#b1011101 ,subop)) + (reg/mem nil :type word-reg/mem) + (imm nil :type imm-data) + (width 0))) + (ext-reg-reg/mem ((op ,(dpb subop (byte 3 2) #b1000001)) + (width 1)) + (:name :tab reg/mem ", " reg))))) + +(define-instruction bt (segment src index) + (:printer-list (bit-test-inst-printer-list #b100)) + (:emitter + (emit-bit-test-and-mumble segment src index #b100))) + +(define-instruction btc (segment src index) + (:printer-list (bit-test-inst-printer-list #b111)) + (:emitter + (emit-bit-test-and-mumble segment src index #b111))) + +(define-instruction btr (segment src index) + (:printer-list (bit-test-inst-printer-list #b110)) + (:emitter + (emit-bit-test-and-mumble segment src index #b110))) + +(define-instruction bts (segment src index) + (:printer-list (bit-test-inst-printer-list #b101)) + (:emitter + (emit-bit-test-and-mumble segment src index #b101))) + + +;;;; control transfer + +(define-instruction call (segment where) + (:printer near-jump ((op #b11101000))) + (:printer reg/mem ((op '(#b1111111 #b010)) (width 1))) + (:emitter + (typecase where + (label + (emit-byte segment #b11101000) + (emit-back-patch segment + 4 + (lambda (segment posn) + (emit-dword segment + (- (label-position where) + (+ posn 4)))))) + (fixup + (emit-byte segment #b11101000) + (emit-relative-fixup segment where)) + (t + (emit-byte segment #b11111111) + (emit-ea segment where #b010))))) + +(defun emit-byte-displacement-backpatch (segment target) + (emit-back-patch segment + 1 + (lambda (segment posn) + (let ((disp (- (label-position target) (1+ posn)))) + (aver (<= -128 disp 127)) + (emit-byte segment disp))))) + +(define-instruction jmp (segment cond &optional where) + ;; conditional jumps + (:printer short-cond-jump ((op #b0111)) '('j cc :tab label)) + (:printer near-cond-jump () '('j cc :tab label)) + ;; unconditional jumps + (:printer short-jump ((op #b1011))) + (:printer near-jump ((op #b11101001)) ) + (:printer reg/mem ((op '(#b1111111 #b100)) (width 1))) + (:emitter + (cond (where + (emit-chooser + segment 6 2 + (lambda (segment posn delta-if-after) + (let ((disp (- (label-position where posn delta-if-after) + (+ posn 2)))) + (when (<= -128 disp 127) + (emit-byte segment + (dpb (conditional-opcode cond) + (byte 4 0) + #b01110000)) + (emit-byte-displacement-backpatch segment where) + t))) + (lambda (segment posn) + (let ((disp (- (label-position where) (+ posn 6)))) + (emit-byte segment #b00001111) + (emit-byte segment + (dpb (conditional-opcode cond) + (byte 4 0) + #b10000000)) + (emit-dword segment disp))))) + ((label-p (setq where cond)) + (emit-chooser + segment 5 0 + (lambda (segment posn delta-if-after) + (let ((disp (- (label-position where posn delta-if-after) + (+ posn 2)))) + (when (<= -128 disp 127) + (emit-byte segment #b11101011) + (emit-byte-displacement-backpatch segment where) + t))) + (lambda (segment posn) + (let ((disp (- (label-position where) (+ posn 5)))) + (emit-byte segment #b11101001) + (emit-dword segment disp))))) + ((fixup-p where) + (emit-byte segment #b11101001) + (emit-relative-fixup segment where)) + (t + (unless (or (ea-p where) (tn-p where)) + (error "don't know what to do with ~A" where)) + (emit-byte segment #b11111111) + (emit-ea segment where #b100))))) + +(define-instruction jmp-short (segment label) + (:emitter + (emit-byte segment #b11101011) + (emit-byte-displacement-backpatch segment label))) + +(define-instruction ret (segment &optional stack-delta) + (:printer byte ((op #b11000011))) + (:printer byte ((op #b11000010) (imm nil :type 'imm-word-16)) + '(:name :tab imm)) + (:emitter + (cond (stack-delta + (emit-byte segment #b11000010) + (emit-word segment stack-delta)) + (t + (emit-byte segment #b11000011))))) + +(define-instruction jecxz (segment target) + (:printer short-jump ((op #b0011))) + (:emitter + (emit-byte segment #b11100011) + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loop (segment target) + (:printer short-jump ((op #b0010))) + (:emitter + (emit-byte segment #b11100010) ; pfw this was 11100011, or jecxz!!!! + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loopz (segment target) + (:printer short-jump ((op #b0001))) + (:emitter + (emit-byte segment #b11100001) + (emit-byte-displacement-backpatch segment target))) + +(define-instruction loopnz (segment target) + (:printer short-jump ((op #b0000))) + (:emitter + (emit-byte segment #b11100000) + (emit-byte-displacement-backpatch segment target))) + +;;;; conditional move +(define-instruction cmov (segment cond dst src) + (:printer cond-move ()) + (:emitter + (aver (register-p dst)) + (let ((size (matching-operand-size dst src))) + (aver (or (eq size :word) (eq size :dword))) + (maybe-emit-operand-size-prefix segment size)) + (emit-byte segment #b00001111) + (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b01000000)) + (emit-ea segment src (reg-tn-encoding dst)))) + +;;;; conditional byte set + +(define-instruction set (segment dst cond) + (:printer cond-set ()) + (:emitter + (emit-byte segment #b00001111) + (emit-byte segment (dpb (conditional-opcode cond) (byte 4 0) #b10010000)) + (emit-ea segment dst #b000))) + +;;;; enter/leave + +(define-instruction enter (segment disp &optional (level 0)) + (:declare (type (unsigned-byte 16) disp) + (type (unsigned-byte 8) level)) + (:printer enter-format ((op #b11001000))) + (:emitter + (emit-byte segment #b11001000) + (emit-word segment disp) + (emit-byte segment level))) + +(define-instruction leave (segment) + (:printer byte ((op #b11001001))) + (:emitter + (emit-byte segment #b11001001))) + +;;;; prefetch +(define-instruction prefetchnta (segment ea) + (:printer prefetch ((op #b00011000) (reg #b000))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b000))) + +(define-instruction prefetcht0 (segment ea) + (:printer prefetch ((op #b00011000) (reg #b001))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b001))) + +(define-instruction prefetcht1 (segment ea) + (:printer prefetch ((op #b00011000) (reg #b010))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b010))) + +(define-instruction prefetcht2 (segment ea) + (:printer prefetch ((op #b00011000) (reg #b011))) + (:emitter + (aver (typep ea 'ea)) + (aver (eq :byte (ea-size ea))) + (emit-byte segment #b00001111) + (emit-byte segment #b00011000) + (emit-ea segment ea #b011))) + +;;;; interrupt instructions + +(defun snarf-error-junk (sap offset &optional length-only) + (let* ((length (sb!sys:sap-ref-8 sap offset)) + (vector (make-array length :element-type '(unsigned-byte 8)))) + (declare (type sb!sys:system-area-pointer sap) + (type (unsigned-byte 8) length) + (type (simple-array (unsigned-byte 8) (*)) vector)) + (cond (length-only + (values 0 (1+ length) nil nil)) + (t + (sb!kernel:copy-ub8-from-system-area sap (1+ offset) + vector 0 length) + (collect ((sc-offsets) + (lengths)) + (lengths 1) ; the length byte + (let* ((index 0) + (error-number (sb!c:read-var-integer vector index))) + (lengths index) + (loop + (when (>= index length) + (return)) + (let ((old-index index)) + (sc-offsets (sb!c:read-var-integer vector index)) + (lengths (- index old-index)))) + (values error-number + (1+ length) + (sc-offsets) + (lengths)))))))) + +#| +(defmacro break-cases (breaknum &body cases) + (let ((bn-temp (gensym))) + (collect ((clauses)) + (dolist (case cases) + (clauses `((= ,bn-temp ,(car case)) ,@(cdr case)))) + `(let ((,bn-temp ,breaknum)) + (cond ,@(clauses)))))) +|# + +(defun break-control (chunk inst stream dstate) + (declare (ignore inst)) + (flet ((nt (x) (if stream (sb!disassem:note x dstate)))) + ;; FIXME: Make sure that BYTE-IMM-CODE is defined. The genesis + ;; map has it undefined; and it should be easier to look in the target + ;; Lisp (with (DESCRIBE 'BYTE-IMM-CODE)) than to definitively deduce + ;; from first principles whether it's defined in some way that genesis + ;; can't grok. + (case (byte-imm-code chunk dstate) + (#.error-trap + (nt "error trap") + (sb!disassem:handle-break-args #'snarf-error-junk stream dstate)) + (#.cerror-trap + (nt "cerror trap") + (sb!disassem:handle-break-args #'snarf-error-junk stream dstate)) + (#.breakpoint-trap + (nt "breakpoint trap")) + (#.pending-interrupt-trap + (nt "pending interrupt trap")) + (#.halt-trap + (nt "halt trap")) + (#.fun-end-breakpoint-trap + (nt "function end breakpoint trap"))))) + +(define-instruction break (segment code) + (:declare (type (unsigned-byte 8) code)) + (:printer byte-imm ((op #b11001100)) '(:name :tab code) + :control #'break-control) + (:emitter + (emit-byte segment #b11001100) + (emit-byte segment code))) + +(define-instruction int (segment number) + (:declare (type (unsigned-byte 8) number)) + (:printer byte-imm ((op #b11001101))) + (:emitter + (etypecase number + ((member 3) + (emit-byte segment #b11001100)) + ((unsigned-byte 8) + (emit-byte segment #b11001101) + (emit-byte segment number))))) + +(define-instruction into (segment) + (:printer byte ((op #b11001110))) + (:emitter + (emit-byte segment #b11001110))) + +(define-instruction bound (segment reg bounds) + (:emitter + (let ((size (matching-operand-size reg bounds))) + (when (eq size :byte) + (error "can't bounds-test bytes: ~S" reg)) + (maybe-emit-operand-size-prefix segment size) + (emit-byte segment #b01100010) + (emit-ea segment bounds (reg-tn-encoding reg))))) + +(define-instruction iret (segment) + (:printer byte ((op #b11001111))) + (:emitter + (emit-byte segment #b11001111))) + +;;;; processor control + +(define-instruction hlt (segment) + (:printer byte ((op #b11110100))) + (:emitter + (emit-byte segment #b11110100))) + +(define-instruction nop (segment) + (:printer byte ((op #b10010000))) + (:emitter + (emit-byte segment #b10010000))) + +(define-instruction wait (segment) + (:printer byte ((op #b10011011))) + (:emitter + (emit-byte segment #b10011011))) + +(define-instruction lock (segment) + (:printer byte ((op #b11110000))) + (:emitter + (emit-byte segment #b11110000))) + +;;;; miscellaneous hackery + +(define-instruction byte (segment byte) + (:emitter + (emit-byte segment byte))) + +(define-instruction word (segment word) + (:emitter + (emit-word segment word))) + +(define-instruction dword (segment dword) + (:emitter + (emit-dword segment dword))) + +(defun emit-header-data (segment type) + (emit-back-patch segment + 4 + (lambda (segment posn) + (emit-dword segment + (logior type + (ash (+ posn + (component-header-length)) + (- n-widetag-bits + word-shift))))))) + +(define-instruction simple-fun-header-word (segment) + (:emitter + (emit-header-data segment simple-fun-header-widetag))) + +(define-instruction lra-header-word (segment) + (:emitter + (emit-header-data segment return-pc-header-widetag))) + + +;;;; SSE instructions +;;;; +;;;; Automatically generated + + +(DEFINE-INSTRUCTION ADDPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 88) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ADDSUBPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 208) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ANDNPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 85) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ANDPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 84) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION DIVPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 94) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MAXPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 95) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MINPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 93) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MULPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 89) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ORPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 86) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION RCPPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 83) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION RSQRTPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 82) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SQRTPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 81) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SUBPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 92) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION XORPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 87) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +;;; SSE MOVE + +(DEFINE-INSTRUCTION MOVUPS (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + + +;;; CPUID + + +(define-instruction cpuid (segment) + (:emitter + (emit-byte segment #x0F) + (emit-byte segment #xA2))) + + + + + +;;;; fp instructions +;;;; +;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +;;;; +;;;; Note: We treat the single-precision and double-precision variants +;;;; as separate instructions. + +;;; Load single to st(0). +(define-instruction fld (segment source) + (:printer floating-point ((op '(#b001 #b000)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment source #b000))) + +;;; Load double to st(0). +(define-instruction fldd (segment source) + (:printer floating-point ((op '(#b101 #b000)))) + (:printer floating-point-fp ((op '(#b001 #b000)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011001) + (emit-byte segment #b11011101)) + (emit-fp-op segment source #b000))) + +;;; Load long to st(0). +(define-instruction fldl (segment source) + (:printer floating-point ((op '(#b011 #b101)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment source #b101))) + +;;; Store single from st(0). +(define-instruction fst (segment dest) + (:printer floating-point ((op '(#b001 #b010)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010)) + (t + (emit-byte segment #b11011001) + (emit-fp-op segment dest #b010))))) + +;;; Store double from st(0). +(define-instruction fstd (segment dest) + (:printer floating-point ((op '(#b101 #b010)))) + (:printer floating-point-fp ((op '(#b101 #b010)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010)) + (t + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b010))))) + +;;; Arithmetic ops are all done with at least one operand at top of +;;; stack. The other operand is is another register or a 32/64 bit +;;; memory loc. + +;;; dtc: I've tried to follow the Intel ASM386 conventions, but note +;;; that these conflict with the Gdb conventions for binops. To reduce +;;; the confusion I've added comments showing the mathamatical +;;; operation and the two syntaxes. By the ASM386 convention the +;;; instruction syntax is: +;;; +;;; Fop Source +;;; or Fop Destination, Source +;;; +;;; If only one operand is given then it is the source and the +;;; destination is ST(0). There are reversed forms of the fsub and +;;; fdiv instructions inducated by an 'R' suffix. +;;; +;;; The mathematical operation for the non-reverse form is always: +;;; destination = destination op source +;;; +;;; For the reversed form it is: +;;; destination = source op destination +;;; +;;; The instructions below only accept one operand at present which is +;;; usually the source. I've hack in extra instructions to implement +;;; the fops with a ST(i) destination, these have a -sti suffix and +;;; the operand is the destination with the source being ST(0). + +;;; Add single: +;;; st(0) = st(0) + memory or st(i). +(define-instruction fadd (segment source) + (:printer floating-point ((op '(#b000 #b000)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b000))) + +;;; Add double: +;;; st(0) = st(0) + memory or st(i). +(define-instruction faddd (segment source) + (:printer floating-point ((op '(#b100 #b000)))) + (:printer floating-point-fp ((op '(#b000 #b000)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b000))) + +;;; Add double destination st(i): +;;; st(i) = st(0) + st(i). +(define-instruction fadd-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b000)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b000))) +;;; with pop +(define-instruction faddp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b000)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b000))) + +;;; Subtract single: +;;; st(0) = st(0) - memory or st(i). +(define-instruction fsub (segment source) + (:printer floating-point ((op '(#b000 #b100)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b100))) + +;;; Subtract single, reverse: +;;; st(0) = memory or st(i) - st(0). +(define-instruction fsubr (segment source) + (:printer floating-point ((op '(#b000 #b101)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b101))) + +;;; Subtract double: +;;; st(0) = st(0) - memory or st(i). +(define-instruction fsubd (segment source) + (:printer floating-point ((op '(#b100 #b100)))) + (:printer floating-point-fp ((op '(#b000 #b100)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b100))) + +;;; Subtract double, reverse: +;;; st(0) = memory or st(i) - st(0). +(define-instruction fsubrd (segment source) + (:printer floating-point ((op '(#b100 #b101)))) + (:printer floating-point-fp ((op '(#b000 #b101)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b101))) + +;;; Subtract double, destination st(i): +;;; st(i) = st(i) - st(0). +;;; +;;; ASM386 syntax: FSUB ST(i), ST +;;; Gdb syntax: fsubr %st,%st(i) +(define-instruction fsub-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b101)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b101))) +;;; with a pop +(define-instruction fsubp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b101)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b101))) + +;;; Subtract double, reverse, destination st(i): +;;; st(i) = st(0) - st(i). +;;; +;;; ASM386 syntax: FSUBR ST(i), ST +;;; Gdb syntax: fsub %st,%st(i) +(define-instruction fsubr-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b100)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b100))) +;;; with a pop +(define-instruction fsubrp-sti (segment destination) + (:printer floating-point-fp ((op '(#b110 #b100)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011110) + (emit-fp-op segment destination #b100))) + +;;; Multiply single: +;;; st(0) = st(0) * memory or st(i). +(define-instruction fmul (segment source) + (:printer floating-point ((op '(#b000 #b001)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b001))) + +;;; Multiply double: +;;; st(0) = st(0) * memory or st(i). +(define-instruction fmuld (segment source) + (:printer floating-point ((op '(#b100 #b001)))) + (:printer floating-point-fp ((op '(#b000 #b001)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b001))) + +;;; Multiply double, destination st(i): +;;; st(i) = st(i) * st(0). +(define-instruction fmul-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b001)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b001))) + +;;; Divide single: +;;; st(0) = st(0) / memory or st(i). +(define-instruction fdiv (segment source) + (:printer floating-point ((op '(#b000 #b110)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b110))) + +;;; Divide single, reverse: +;;; st(0) = memory or st(i) / st(0). +(define-instruction fdivr (segment source) + (:printer floating-point ((op '(#b000 #b111)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment source #b111))) + +;;; Divide double: +;;; st(0) = st(0) / memory or st(i). +(define-instruction fdivd (segment source) + (:printer floating-point ((op '(#b100 #b110)))) + (:printer floating-point-fp ((op '(#b000 #b110)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b110))) + +;;; Divide double, reverse: +;;; st(0) = memory or st(i) / st(0). +(define-instruction fdivrd (segment source) + (:printer floating-point ((op '(#b100 #b111)))) + (:printer floating-point-fp ((op '(#b000 #b111)))) + (:emitter + (if (fp-reg-tn-p source) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment source #b111))) + +;;; Divide double, destination st(i): +;;; st(i) = st(i) / st(0). +;;; +;;; ASM386 syntax: FDIV ST(i), ST +;;; Gdb syntax: fdivr %st,%st(i) +(define-instruction fdiv-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b111)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b111))) + +;;; Divide double, reverse, destination st(i): +;;; st(i) = st(0) / st(i). +;;; +;;; ASM386 syntax: FDIVR ST(i), ST +;;; Gdb syntax: fdiv %st,%st(i) +(define-instruction fdivr-sti (segment destination) + (:printer floating-point-fp ((op '(#b100 #b110)))) + (:emitter + (aver (fp-reg-tn-p destination)) + (emit-byte segment #b11011100) + (emit-fp-op segment destination #b110))) + +;;; Exchange fr0 with fr(n). (There is no double precision variant.) +(define-instruction fxch (segment source) + (:printer floating-point-fp ((op '(#b001 #b001)))) + (:emitter + (unless (and (tn-p source) + (eq (sb-name (sc-sb (tn-sc source))) 'float-registers)) + (cl:break)) + (emit-byte segment #b11011001) + (emit-fp-op segment source #b001))) + +;;; Push 32-bit integer to st0. +(define-instruction fild (segment source) + (:printer floating-point ((op '(#b011 #b000)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment source #b000))) + +;;; Push 64-bit integer to st0. +(define-instruction fildl (segment source) + (:printer floating-point ((op '(#b111 #b101)))) + (:emitter + (emit-byte segment #b11011111) + (emit-fp-op segment source #b101))) + +;;; Store 32-bit integer. +(define-instruction fist (segment dest) + (:printer floating-point ((op '(#b011 #b010)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b010))) + +;;; Store and pop 32-bit integer. +(define-instruction fistp (segment dest) + (:printer floating-point ((op '(#b011 #b011)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b011))) + +;;; Store and pop 64-bit integer. +(define-instruction fistpl (segment dest) + (:printer floating-point ((op '(#b111 #b111)))) + (:emitter + (emit-byte segment #b11011111) + (emit-fp-op segment dest #b111))) + +;;; Store single from st(0) and pop. +(define-instruction fstp (segment dest) + (:printer floating-point ((op '(#b001 #b011)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011)) + (t + (emit-byte segment #b11011001) + (emit-fp-op segment dest #b011))))) + +;;; Store double from st(0) and pop. +(define-instruction fstpd (segment dest) + (:printer floating-point ((op '(#b101 #b011)))) + (:printer floating-point-fp ((op '(#b101 #b011)))) + (:emitter + (cond ((fp-reg-tn-p dest) + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011)) + (t + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b011))))) + +;;; Store long from st(0) and pop. +(define-instruction fstpl (segment dest) + (:printer floating-point ((op '(#b011 #b111)))) + (:emitter + (emit-byte segment #b11011011) + (emit-fp-op segment dest #b111))) + +;;; Decrement stack-top pointer. +(define-instruction fdecstp (segment) + (:printer floating-point-no ((op #b10110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110110))) + +;;; Increment stack-top pointer. +(define-instruction fincstp (segment) + (:printer floating-point-no ((op #b10111))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110111))) + +;;; Free fp register. +(define-instruction ffree (segment dest) + (:printer floating-point-fp ((op '(#b101 #b000)))) + (:emitter + (emit-byte segment #b11011101) + (emit-fp-op segment dest #b000))) + +(define-instruction fabs (segment) + (:printer floating-point-no ((op #b00001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100001))) + +(define-instruction fchs (segment) + (:printer floating-point-no ((op #b00000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100000))) + +(define-instruction frndint(segment) + (:printer floating-point-no ((op #b11100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111100))) + +;;; Initialize NPX. +(define-instruction fninit(segment) + (:printer floating-point-5 ((op #b00011))) + (:emitter + (emit-byte segment #b11011011) + (emit-byte segment #b11100011))) + +;;; Store Status Word to AX. +(define-instruction fnstsw(segment) + (:printer floating-point-st ((op #b00000))) + (:emitter + (emit-byte segment #b11011111) + (emit-byte segment #b11100000))) + +;;; Load Control Word. +;;; +;;; src must be a memory location +(define-instruction fldcw(segment src) + (:printer floating-point ((op '(#b001 #b101)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment src #b101))) + +;;; Store Control Word. +(define-instruction fnstcw(segment dst) + (:printer floating-point ((op '(#b001 #b111)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment dst #b111))) + +;;; Store FP Environment. +(define-instruction fstenv(segment dst) + (:printer floating-point ((op '(#b001 #b110)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment dst #b110))) + +;;; Restore FP Environment. +(define-instruction fldenv(segment src) + (:printer floating-point ((op '(#b001 #b100)))) + (:emitter + (emit-byte segment #b11011001) + (emit-fp-op segment src #b100))) + +;;; Save FP State. +(define-instruction fsave(segment dst) + (:printer floating-point ((op '(#b101 #b110)))) + (:emitter + (emit-byte segment #b11011101) + (emit-fp-op segment dst #b110))) + +;;; Restore FP State. +(define-instruction frstor(segment src) + (:printer floating-point ((op '(#b101 #b100)))) + (:emitter + (emit-byte segment #b11011101) + (emit-fp-op segment src #b100))) + +;;; Clear exceptions. +(define-instruction fnclex(segment) + (:printer floating-point-5 ((op #b00010))) + (:emitter + (emit-byte segment #b11011011) + (emit-byte segment #b11100010))) + +;;; comparison +(define-instruction fcom (segment src) + (:printer floating-point ((op '(#b000 #b010)))) + (:emitter + (emit-byte segment #b11011000) + (emit-fp-op segment src #b010))) + +(define-instruction fcomd (segment src) + (:printer floating-point ((op '(#b100 #b010)))) + (:printer floating-point-fp ((op '(#b000 #b010)))) + (:emitter + (if (fp-reg-tn-p src) + (emit-byte segment #b11011000) + (emit-byte segment #b11011100)) + (emit-fp-op segment src #b010))) + +;;; Compare ST1 to ST0, popping the stack twice. +(define-instruction fcompp (segment) + (:printer floating-point-3 ((op '(#b110 #b011001)))) + (:emitter + (emit-byte segment #b11011110) + (emit-byte segment #b11011001))) + +;;; unordered comparison +(define-instruction fucom (segment src) + (:printer floating-point-fp ((op '(#b101 #b100)))) + (:emitter + (aver (fp-reg-tn-p src)) + (emit-byte segment #b11011101) + (emit-fp-op segment src #b100))) + +(define-instruction ftst (segment) + (:printer floating-point-no ((op #b00100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100100))) + +;;;; 80387 specials + +(define-instruction fsqrt(segment) + (:printer floating-point-no ((op #b11010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111010))) + +(define-instruction fscale(segment) + (:printer floating-point-no ((op #b11101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111101))) + +(define-instruction fxtract(segment) + (:printer floating-point-no ((op #b10100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110100))) + +(define-instruction fsin(segment) + (:printer floating-point-no ((op #b11110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111110))) + +(define-instruction fcos(segment) + (:printer floating-point-no ((op #b11111))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111111))) + +(define-instruction fprem1(segment) + (:printer floating-point-no ((op #b10101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110101))) + +(define-instruction fprem(segment) + (:printer floating-point-no ((op #b11000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111000))) + +(define-instruction fxam (segment) + (:printer floating-point-no ((op #b00101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11100101))) + +;;; These do push/pop to stack and need special handling +;;; in any VOPs that use them. See the book. + +;;; st0 <- st1*log2(st0) +(define-instruction fyl2x(segment) ; pops stack + (:printer floating-point-no ((op #b10001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110001))) + +(define-instruction fyl2xp1(segment) + (:printer floating-point-no ((op #b11001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11111001))) + +(define-instruction f2xm1(segment) + (:printer floating-point-no ((op #b10000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110000))) + +(define-instruction fptan(segment) ; st(0) <- 1; st(1) <- tan + (:printer floating-point-no ((op #b10010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110010))) + +(define-instruction fpatan(segment) ; POPS STACK + (:printer floating-point-no ((op #b10011))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11110011))) + +;;;; loading constants + +(define-instruction fldz(segment) + (:printer floating-point-no ((op #b01110))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101110))) + +(define-instruction fld1(segment) + (:printer floating-point-no ((op #b01000))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101000))) + +(define-instruction fldpi(segment) + (:printer floating-point-no ((op #b01011))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101011))) + +(define-instruction fldl2t(segment) + (:printer floating-point-no ((op #b01001))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101001))) + +(define-instruction fldl2e(segment) + (:printer floating-point-no ((op #b01010))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101010))) + +(define-instruction fldlg2(segment) + (:printer floating-point-no ((op #b01100))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101100))) + +(define-instruction fldln2(segment) + (:printer floating-point-no ((op #b01101))) + (:emitter + (emit-byte segment #b11011001) + (emit-byte segment #b11101101))) diff --git a/sbcl-src/src/compiler/x86/vm.lisp b/sbcl-src/src/compiler/x86/vm.lisp new file mode 100644 index 0000000..1bab6a3 --- /dev/null +++ b/sbcl-src/src/compiler/x86/vm.lisp @@ -0,0 +1,469 @@ +;;;; miscellaneous VM definition noise for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +;;; the size of an INTEGER representation of a SYSTEM-AREA-POINTER, i.e. +;;; size of a native memory address +(deftype sap-int () '(unsigned-byte 32)) + +;;;; register specs + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) + (defvar *float-register-names* (make-array 8 :initial-element nil)) + (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + +(macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) + (names-vector (symbolicate "*" size "-REGISTER-NAMES*"))) + `(progn + (eval-when (:compile-toplevel :load-toplevel :execute) + ;; EVAL-WHEN is necessary because stuff like #.EAX-OFFSET + ;; (in the same file) depends on compile-time evaluation + ;; of the DEFCONSTANT. -- AL 20010224 + (def!constant ,offset-sym ,offset)) + (setf (svref ,names-vector ,offset-sym) + ,(symbol-name name))))) + ;; FIXME: It looks to me as though DEFREGSET should also + ;; define the related *FOO-REGISTER-NAMES* variable. + (defregset (name &rest regs) + `(eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter ,name + (list ,@(mapcar (lambda (name) + (symbolicate name "-OFFSET")) + regs)))))) + + ;; byte registers + ;; + ;; Note: the encoding here is different than that used by the chip. + ;; We use this encoding so that the compiler thinks that AX (and + ;; EAX) overlap AL and AH instead of AL and CL. + (defreg al 0 :byte) + (defreg ah 1 :byte) + (defreg cl 2 :byte) + (defreg ch 3 :byte) + (defreg dl 4 :byte) + (defreg dh 5 :byte) + (defreg bl 6 :byte) + (defreg bh 7 :byte) + (defregset *byte-regs* al ah cl ch dl dh bl bh) + + ;; word registers + (defreg ax 0 :word) + (defreg cx 2 :word) + (defreg dx 4 :word) + (defreg bx 6 :word) + (defreg sp 8 :word) + (defreg bp 10 :word) + (defreg si 12 :word) + (defreg di 14 :word) + (defregset *word-regs* ax cx dx bx si di) + + ;; double word registers + (defreg eax 0 :dword) + (defreg ecx 2 :dword) + (defreg edx 4 :dword) + (defreg ebx 6 :dword) + (defreg esp 8 :dword) + (defreg ebp 10 :dword) + (defreg esi 12 :dword) + (defreg edi 14 :dword) + (defregset *dword-regs* eax ecx edx ebx esi edi) + + ;; floating point registers + (defreg fr0 0 :float) + (defreg fr1 1 :float) + (defreg fr2 2 :float) + (defreg fr3 3 :float) + (defreg fr4 4 :float) + (defreg fr5 5 :float) + (defreg fr6 6 :float) + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + + ;; sse registers + (defreg xmm0 0 :dqword) + (defreg xmm1 1 :dqword) + (defreg xmm2 2 :dqword) + (defreg xmm3 3 :dqword) + (defreg xmm4 4 :dqword) + (defreg xmm5 5 :dqword) + (defreg xmm6 6 :dqword) + (defreg xmm7 7 :dqword) + (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) + + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers + (def!constant register-arg-count 3) + ;; names and offsets for registers used to pass arguments + (eval-when (:compile-toplevel :load-toplevel :execute) + (defparameter *register-arg-names* '(edx edi esi))) + (defregset *register-arg-offsets* edx edi esi)) + +;;;; SB definitions + +;;; Despite the fact that there are only 8 different registers, we consider +;;; them 16 in order to describe the overlap of byte registers. The only +;;; thing we need to represent is what registers overlap. Therefore, we +;;; consider bytes to take one unit, and words or dwords to take two. We +;;; don't need to tell the difference between words and dwords, because +;;; you can't put two words in a dword register. +(define-storage-base registers :finite :size 16) + +;;; jrd changed this from size 1 to size 8. It doesn't seem to make much +;;; sense to use the 387's idea of a stack; 8 separate registers is easier +;;; to deal with. +;;; the old way: +;;; (define-storage-base float-registers :finite :size 1) +;;; the new way: +(define-storage-base float-registers :finite :size 8) + +(define-storage-base sse-registers :finite :size 8) + +(define-storage-base stack :unbounded :size 8) +(define-storage-base constant :non-packed) +(define-storage-base immediate-constant :non-packed) +(define-storage-base noise :unbounded :size 2) + +;;;; SC definitions + +;;; a handy macro so we don't have to keep changing all the numbers whenever +;;; we insert a new storage class +;;; +(defmacro !define-storage-classes (&rest classes) + (collect ((forms)) + (let ((index 0)) + (dolist (class classes) + (let* ((sc-name (car class)) + (constant-name (symbolicate sc-name "-SC-NUMBER"))) + (forms `(define-storage-class ,sc-name ,index + ,@(cdr class))) + (forms `(def!constant ,constant-name ,index)) + (incf index)))) + `(progn + ,@(forms)))) + +;;; The DEFINE-STORAGE-CLASS call for CATCH-BLOCK refers to the size +;;; of CATCH-BLOCK. The size of CATCH-BLOCK isn't calculated until +;;; later in the build process, and the calculation is entangled with +;;; code which has lots of predependencies, including dependencies on +;;; the prior call of DEFINE-STORAGE-CLASS. The proper way to +;;; unscramble this would be to untangle the code, so that the code +;;; which calculates the size of CATCH-BLOCK can be separated from the +;;; other lots-of-dependencies code, so that the code which calculates +;;; the size of CATCH-BLOCK can be executed early, so that this value +;;; is known properly at this point in compilation. However, that +;;; would be a lot of editing of code that I (WHN 19990131) can't test +;;; until the project is complete. So instead, I set the correct value +;;; by hand here (a sort of nondeterministic guess of the right +;;; answer:-) and add an assertion later, after the value is +;;; calculated, that the original guess was correct. +;;; +;;; (What a KLUDGE! Anyone who wants to come in and clean up this mess +;;; has my gratitude.) (FIXME: Maybe this should be me..) +(eval-when (:compile-toplevel :load-toplevel :execute) + (def!constant kludge-nondeterministic-catch-block-size 6)) + +(!define-storage-classes + + ;; non-immediate constants in the constant pool + (constant constant) + + ;; some FP constants can be generated in the i387 silicon + (fp-constant immediate-constant) + + (immediate immediate-constant) + + ;; + ;; the stacks + ;; + + ;; the control stack + (control-stack stack) ; may be pointers, scanned by GC + + ;; the non-descriptor stacks + (signed-stack stack) ; (signed-byte 32) + (unsigned-stack stack) ; (unsigned-byte 32) + (character-stack stack) ; non-descriptor characters. + (sap-stack stack) ; System area pointers. + (single-stack stack) ; single-floats + (double-stack stack :element-size 2) ; double-floats. + #!+long-float + (long-stack stack :element-size 3) ; long-floats. + (complex-single-stack stack :element-size 2) ; complex-single-floats + (complex-double-stack stack :element-size 4) ; complex-double-floats + #!+long-float + (complex-long-stack stack :element-size 6) ; complex-long-floats + + ;; + ;; magic SCs + ;; + + (ignore-me noise) + + ;; + ;; things that can go in the integer registers + ;; + + ;; On the X86, we don't have to distinguish between descriptor and + ;; non-descriptor registers, because of the conservative GC. + ;; Therefore, we use different scs only to distinguish between + ;; descriptor and non-descriptor values and to specify size. + + ;; immediate descriptor objects. Don't have to be seen by GC, but nothing + ;; bad will happen if they are. (fixnums, characters, header values, etc). + (any-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (control-stack)) + + ;; pointer descriptor objects -- must be seen by GC + (descriptor-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (constant immediate) + :save-p t + :alternate-scs (control-stack)) + + ;; non-descriptor characters + (character-reg registers + :locations #!-sb-unicode #.*byte-regs* + #!+sb-unicode #.*dword-regs* + #!-sb-unicode #!-sb-unicode + :reserve-locations (#.ah-offset #.al-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (character-stack)) + + ;; non-descriptor SAPs (arbitrary pointers into address space) + (sap-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (sap-stack)) + + ;; non-descriptor (signed or unsigned) numbers + (signed-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (signed-stack)) + (unsigned-reg registers + :locations #.*dword-regs* + :element-size 2 +; :reserve-locations (#.eax-offset) + :constant-scs (immediate) + :save-p t + :alternate-scs (unsigned-stack)) + + ;; miscellaneous objects that must not be seen by GC. Used only as + ;; temporaries. + (word-reg registers + :locations #.*word-regs* + :element-size 2 +; :reserve-locations (#.ax-offset) + ) + (byte-reg registers + :locations #.*byte-regs* +; :reserve-locations (#.al-offset #.ah-offset) + ) + + ;; that can go in the floating point registers + + ;; non-descriptor SINGLE-FLOATs + (single-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (single-stack)) + + ;; non-descriptor DOUBLE-FLOATs + (double-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (double-stack)) + + ;; non-descriptor LONG-FLOATs + #!+long-float + (long-reg float-registers + :locations (0 1 2 3 4 5 6 7) + :constant-scs (fp-constant) + :save-p t + :alternate-scs (long-stack)) + + (complex-single-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-single-stack)) + + (complex-double-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-double-stack)) + + #!+long-float + (complex-long-reg float-registers + :locations (0 2 4 6) + :element-size 2 + :constant-scs () + :save-p t + :alternate-scs (complex-long-stack)) + + (sse-reg sse-registers + :locations #.*sse-regs*) + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +(eval-when (:compile-toplevel :load-toplevel :execute) +(defparameter *byte-sc-names* + '(#!-sb-unicode character-reg byte-reg #!-sb-unicode character-stack)) +(defparameter *word-sc-names* '(word-reg)) +(defparameter *dword-sc-names* + '(any-reg descriptor-reg sap-reg signed-reg unsigned-reg control-stack + signed-stack unsigned-stack sap-stack single-stack + #!+sb-unicode character-reg #!+sb-unicode character-stack constant)) +;;; added by jrd. I guess the right thing to do is to treat floats +;;; as a separate size... +;;; +;;; These are used to (at least) determine operand size. +(defparameter *float-sc-names* '(single-reg)) +(defparameter *double-sc-names* '(double-reg double-stack)) +(defparameter *dqword-sc-names* '(sse-reg)) +) ; EVAL-WHEN + +;;;; miscellaneous TNs for the various registers + +(macrolet ((def-misc-reg-tns (sc-name &rest reg-names) + (collect ((forms)) + (dolist (reg-name reg-names) + (let ((tn-name (symbolicate reg-name "-TN")) + (offset-name (symbolicate reg-name "-OFFSET"))) + ;; FIXME: It'd be good to have the special + ;; variables here be named with the *FOO* + ;; convention. + (forms `(defparameter ,tn-name + (make-random-tn :kind :normal + :sc (sc-or-lose ',sc-name) + :offset + ,offset-name))))) + `(progn ,@(forms))))) + + (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) + (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) + (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) + (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) + +;;; TNs for registers used to pass arguments +(defparameter *register-arg-tns* + (mapcar (lambda (register-arg-name) + (symbol-value (symbolicate register-arg-name "-TN"))) + *register-arg-names*)) + +;;; FIXME: doesn't seem to be used in SBCL +#| +;;; added by pw +(defparameter fp-constant-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'fp-constant) + :offset 31)) ; Offset doesn't get used. +|# + +;;; If value can be represented as an immediate constant, then return +;;; the appropriate SC number, otherwise return NIL. +(!def-vm-support-routine immediate-constant-sc (value) + (typecase value + ((or (integer #.sb!xc:most-negative-fixnum #.sb!xc:most-positive-fixnum) + #-sb-xc-host system-area-pointer character) + (sc-number-or-lose 'immediate)) + (symbol + (when (static-symbol-p value) + (sc-number-or-lose 'immediate))) + (single-float + (when (or (eql value 0f0) (eql value 1f0)) + (sc-number-or-lose 'fp-constant))) + (double-float + (when (or (eql value 0d0) (eql value 1d0)) + (sc-number-or-lose 'fp-constant))) + #!+long-float + (long-float + (when (or (eql value 0l0) (eql value 1l0) + (eql value pi) + (eql value (log 10l0 2l0)) + (eql value (log 2.718281828459045235360287471352662L0 2l0)) + (eql value (log 2l0 10l0)) + (eql value (log 2l0 2.718281828459045235360287471352662L0))) + (sc-number-or-lose 'fp-constant))))) + +;;;; miscellaneous function call parameters + +;;; offsets of special stack frame locations +(def!constant ocfp-save-offset 0) +(def!constant return-pc-save-offset 1) +(def!constant code-save-offset 2) + +;;; FIXME: This is a bad comment (changed since when?) and there are others +;;; like it in this file. It'd be nice to clarify them. Failing that deleting +;;; them or flagging them with KLUDGE might be better than nothing. +;;; +;;; names of these things seem to have changed. these aliases by jrd +(def!constant lra-save-offset return-pc-save-offset) + +(def!constant cfp-offset ebp-offset) ; pfw - needed by stuff in /code + ; related to signal context stuff + +;;; This is used by the debugger. +(def!constant single-value-return-byte-offset 2) + +;;; This function is called by debug output routines that want a pretty name +;;; for a TN's location. It returns a thing that can be printed with PRINC. +(!def-vm-support-routine location-print-name (tn) + (declare (type tn tn)) + (let* ((sc (tn-sc tn)) + (sb (sb-name (sc-sb sc))) + (offset (tn-offset tn))) + (ecase sb + (registers + (let* ((sc-name (sc-name sc)) + (name-vec (cond ((member sc-name *byte-sc-names*) + *byte-register-names*) + ((member sc-name *word-sc-names*) + *word-register-names*) + ((member sc-name *dword-sc-names*) + *dword-register-names*)))) + (or (and name-vec + (< -1 offset (length name-vec)) + (svref name-vec offset)) + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) + (sse-registers (format nil "XMM~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") + (noise (symbol-name (sc-name sc)))))) +;;; FIXME: Could this, and everything that uses it, be made #!+SB-SHOW? diff --git a/scratch/.emacs.desktop b/scratch/.emacs.desktop new file mode 100644 index 0000000..4fe6149 --- /dev/null +++ b/scratch/.emacs.desktop @@ -0,0 +1,82 @@ +;; -*- coding: emacs-mule; -*- +;; -------------------------------------------------------------------------- +;; Desktop File for Emacs +;; -------------------------------------------------------------------------- +;; Created Fri Aug 5 15:49:29 2005 +;; Emacs version 21.3.1 + +;; Global section: +(setq desktop-missing-file-warning nil) +(setq tags-file-name nil) +(setq tags-table-list nil) +(setq search-ring nil) +(setq regexp-search-ring nil) +(setq register-alist nil) + +;; Buffer section: +(desktop-create-buffer 205 + "/home/rlaakso/projects/sbcl-sse/sse2.lisp" + "sse2.lisp" + 'lisp-mode + '(slime-mode) + 1 + '(nil nil) + nil + nil + nil) + +(desktop-create-buffer 205 + "/home/rlaakso/projects/sbcl-sse/OPTIMIZATIONS" + "OPTIMIZATIONS" + 'fundamental-mode + nil + 161 + '(nil nil) + nil + nil + nil) + +(desktop-create-buffer 205 + "/home/rlaakso/projects/sbcl-sse/sse.lisp" + "sse.lisp" + 'lisp-mode + '(slime-mode) + 1630 + '(nil nil) + nil + nil + nil) + +(desktop-create-buffer 205 + "/home/rlaakso/projects/sbcl-sse/cpuid.lisp" + "cpuid.lisp" + 'lisp-mode + '(slime-mode) + 68 + '(1 nil) + nil + nil + nil) + +(desktop-create-buffer 205 + "/home/rlaakso/.emacs" + ".emacs" + 'emacs-lisp-mode + nil + 848 + '(nil nil) + nil + nil + nil) + +(desktop-create-buffer 205 + "/home/rlaakso/projects/sb-simd-tmp/sb-simd/scratch/cpuid.lisp" + "cpuid.lisp<2>" + 'lisp-mode + '(slime-mode) + 1442 + '(nil nil) + nil + nil + nil) + diff --git a/scratch/README b/scratch/README new file mode 100644 index 0000000..caad35e --- /dev/null +++ b/scratch/README @@ -0,0 +1,2 @@ +scratch space. do not look ;-) + diff --git a/scratch/asm-t1.asm b/scratch/asm-t1.asm new file mode 100644 index 0000000..599e6a6 --- /dev/null +++ b/scratch/asm-t1.asm @@ -0,0 +1,10 @@ + +my_func: + xor eax, eax + movups xmm0, [edx+ebx+1] + movups xmm1, [esi+ecx+1] + addps xmm0, xmm4 + addps xmm4, xmm0 + movups [ecx+1], xmm0 + + ret \ No newline at end of file diff --git a/scratch/foo.lisp b/scratch/foo.lisp new file mode 100644 index 0000000..0f54018 --- /dev/null +++ b/scratch/foo.lisp @@ -0,0 +1,159 @@ +(in-package :sb-vm) + +(define-vop (my-vop) + (:policy :fast-safe) + + (:args (vector1 :scs (descriptor-reg)) + (vector2 :scs (descriptor-reg))) + (:arg-types simple-array-single-float simple-array-single-float) + + (:temporary (:sc unsigned-reg) index) + +;; (:temporary (:sc unsigned-reg) temp1) +;; (:temporary (:sc unsigned-reg) temp2) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + (inst xor index index) + + + (inst movups sse-temp1 + (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst movups sse-temp2 + (make-ea :dword :base vector2 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + + (inst addps sse-temp1 sse-temp2) + +;; (inst add index 4) + + (inst movups + (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) + sse-temp1) +#| + (inst add index 4) + (inst mov + (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) + index) + + (inst add index 4) + (inst movups + (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) + sse-temp2) +|# + )) + +#| +00000000 : + 0: 31 c0 xor %eax,%eax 2: 0f 10 04 c6 movups (%esi,%eax,8),%xmm0 + 6: 0f 10 0c c7 movups (%edi,%eax,8),%xmm1 + a: 0f 58 c1 addps %xmm1,%xmm0 + d: 0f 11 44 c5 00 movups %xmm0,0x0(%ebp,%eax,8) + +--- v2: + 0: 31 c0 xor %eax,%eax + 2: 0f 10 44 03 01 movups 0x1(%ebx,%eax,1),%xmm0 + 7: 0f 10 4c 01 01 movups 0x1(%ecx,%eax,1),%xmm1 + c: 0f 58 c1 addps %xmm1,%xmm0 + f: 0f 11 44 01 01 movups %xmm0,0x1(%ecx,%eax,1) + +--- v3: + 2: 0f 10 43 01 movups 0x1(%ebx),%xmm0 + 6: 0f 10 49 01 movups 0x1(%ecx),%xmm1 + a: 0f 58 c1 addps %xmm1,%xmm0 + d: 0f 11 41 01 movups %xmm0,0x1(%ecx) + +--- v4: + 2: 0f 10 44 1a 01 movups 0x1(%edx,%ebx,1),%xmm0 + 7: 0f 10 4c 0e 01 movups 0x1(%esi,%ecx,1),%xmm1 + c: 0f 58 c1 addps %xmm1,%xmm0 + f: 0f 11 41 01 movups %xmm0,0x1(%ecx) + +10h = MOVUPS Vps, Wps +11h = MOVUPS Wps, Vps + +V = 128bit xmm reg specified by the modrm reg field. +W = 128bit xmm register or mem op specified by the modrm byte. +ps = 128bit single-precision float operand + +movups xmm0, [ebx + 01] +movups md reg r/m sc idx bse disp8 +0f 10 01 000 100 00 000 011 01 + +d8 xm0 sib *0 +0 ebx +01 +|# +#| +; 43E: L4: 31C0 XOR EAX, EAX + + 7 6 5 4 3 2 1 0 + m d r e g r / m + +44h = b 0 1 0 0 0 1 0 0 +4Ch = b 0 1 0 0 1 1 0 0 +64h = b 0 1 1 0 0 1 0 0 +E0h = b 1 1 1 0 0 0 0 0 +C1h = b 1 1 0 0 0 0 0 1 +43h = b 0 1 0 0 0 0 1 1 +49h = b 0 1 0 0 1 0 0 1 + +r/m b100 => has sib byte + +modrm md+r/m field: + r/m= 000, 001, 010, 011, 100, 101, 110 , 111 +md +00 = ax, cx, dx, bx, sib, rip+d32, si, di +01 = --||-- + disp8 , bp+disp8, .. +10 = --||-- + disp32 +11 = al/ax/eax/mmx0/xmm0, 1, 2, 3, 4, 5, 6, 7 + +modrm reg: + 000 001 010 011 100 101 110 111 +reg32 eax ecx edx ebx esp ebp esi edi +xmm xm0 xm1 xm2 xm3 xm4 xm5 xm6 xm7 ;; actually xmm0..xmm7 + + +44h = md 01, r/m 100, reg 000, => xmm0, [sib + disp8] => 44 03 01 : xmm0, [ebx + 01], 44 01 01 : xmm0, [ecx + 01] +64h = md 01, r/m 100, reg 100, => xmm4, [sib + disp8] => xmm4, [ebx + 01] +04 C6 = md 00, r/m 100, reg 000, => xmm0, [sib] => xmm0, [esi*8] +4C 01 01 = md 01, reg 001, r/m 100 => xmm1, [sib + disp8] => [ecx + 01] +43h = md 01, reg 0, r/m 011 => xmm0, [ebx + 01] +49h = md 01, reg 1, r/m 001 => xmm1, [ecx + 01] + + + 7 6 5 4 3 2 1 0 + s c i d x b a s + +03h = b 0 0 0 0 0 0 1 1 = eax + ebx*1 +01h = b 0 0 0 0 0 0 0 1 = eax + ecx*1 +C6h = b 1 1 0 0 0 1 1 0 = eax + esi*8 +1Ah = b 0 0 0 1 1 0 1 0 = ebx + edx*1 +0Eh = b 0 0 0 0 1 1 1 0 = ecx + esi*1 + + +;; movups xmm0, ea 0F 10 44 03 01 +; 440: 0F 10 44 01 01 movups xmm0, [eax + ecx + 01] + +;; movups xmm1, ea 0F 10 4C 01 01 +; 445: 0F 10 64 03 01 movups xmm4, [eax + ebx + 01] + +;; addps xmm0, xmm1 0F 58 C1 +; 44A: 0F 58 E0 addps xmm0, xmm4 + +;; movups ea, xmm0 0f 11 44 01 01 + 0F 11 44 01 01 movups [eax + ecx + 01], xmm0 + +; 452: 83C004 ADD EAX, 4 + + +-- + + c: 0f 58 c4 addps %xmm4,%xmm0 + f: 0f 58 e0 addps %xmm0,%xmm4 + +|# diff --git a/scratch/sse.lisp b/scratch/sse.lisp new file mode 100644 index 0000000..db99a8a --- /dev/null +++ b/scratch/sse.lisp @@ -0,0 +1,125 @@ +;;; From sb-devel post by Christophe Rhodes +(in-package :cl-user) + +(defun %vector+ (result vector1 vector2) + (loop for x across vector1 + for y across vector2 + for i from 0 + do (setf (aref result i) (+ x y)))) + +(in-package :sb-c) + +;; kludge +(ignore-errors (defknown cl-user::%vector+ (vector vector vector) vector)) + +(in-package :sb-vm) + +(pushnew :sse2 *backend-subfeatures*) + +(define-vop (vector+/simple-array-signed-byte-30) + (:translate cl-user::%vector+) + (:policy :fast) + (:args (result :scs (descriptor-reg)) + (vector1 :scs (descriptor-reg)) + (vector2 :scs (descriptor-reg))) + (:arg-types simple-array-signed-byte-30 simple-array-signed-byte-30 simple-array-signed-byte-30) + (:temporary (:sc any-reg) temp) + (:temporary (:sc unsigned-reg) index) + (:temporary (:sc unsigned-reg) length) + (:generator 30 + (let ((top (gen-label)) + (end (gen-label))) + (loadw length result vector-length-slot other-pointer-lowtag) + ;; check that the result vector doesn't have length 0 + (inst cmp length 0) + (inst jmp :e end) + ;; zero the index + (inst xor index index) + (emit-label top) + ;; this is wasteful if one of the arguments is the same as the + ;; result; we can save a mov + (inst mov temp (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst add temp (make-ea :dword :base vector2 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst mov (make-ea :dword :base result :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) + (inst add index (fixnumize 1)) + (inst cmp index length) + (inst jmp :ne top) + (emit-label end)))) + +(define-vop (vector+/simple-array-signed-byte-30-sse) + (:translate cl-user::%vector+) + (:policy :fast) + (:args (result :scs (descriptor-reg)) + (vector1 :scs (descriptor-reg)) + (vector2 :scs (descriptor-reg))) + (:arg-types simple-array-signed-byte-30 simple-array-signed-byte-30 simple-array-signed-byte-30) + (:temporary (:sc any-reg) temp) + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc unsigned-reg) index) + (:temporary (:sc unsigned-reg) length) + (:guard (member :sse2 *backend-subfeatures*)) + (:generator 25 + (let ((top (gen-label)) + (two (gen-label)) + (four (gen-label)) + (end (gen-label))) + (loadw length result vector-length-slot other-pointer-lowtag) + ;; check that the result vector doesn't have length 0 + (inst cmp length 0) + (inst jmp :e end) + ;; zero the index + (inst xor index index) + (emit-label top) + (inst test length 1) + (inst jmp :z two) + (inst mov temp (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst add temp (make-ea :dword :base vector2 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst mov (make-ea :dword :base result :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) + (inst add index (fixnumize 1)) + (inst cmp index length) + (inst jmp :e end) + (emit-label two) + ;; eventually at this point we put in a quadword add, but that + ;; would be one more instruction to write. + (inst test length 2) + (inst mov temp (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst add temp (make-ea :dword :base vector2 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst mov (make-ea :dword :base result :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) + (inst add index (fixnumize 1)) + (inst cmp index length) + (inst jmp :e end) + (inst mov temp (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst add temp (make-ea :dword :base vector2 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst mov (make-ea :dword :base result :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) + (inst add index (fixnumize 1)) + (inst cmp index length) + (inst jmp :e end) + (emit-label four) + ;; here, we do double quadword additions until we hit the end of + ;; the computation. No guarantees about alignment, so we have to + ;; use movdqu. + (inst movdqu sse-temp1 (make-ea :dword :base vector1 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + ;; KLUDGE: We're using :dword EAs here. This is possibly non-optimal. + (inst movdqu sse-temp2 (make-ea :dword :base vector2 :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst paddd sse-temp1 sse-temp2) + (inst movdqu sse-temp1 (make-ea :dword :base result :index index + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst add index (fixnumize 4)) + (inst cmp index length) + (inst jmp :ne four) + (emit-label end)))) diff --git a/scratch/sse2.lisp b/scratch/sse2.lisp new file mode 100644 index 0000000..78b95ac --- /dev/null +++ b/scratch/sse2.lisp @@ -0,0 +1,33 @@ +;;; From sb-devel post by Christophe Rhodes +(in-package :cl-user) + +(declaim (inline fixnum/vector+)) +(defun fixnum/vector+ (vector1 vector2) + (let ((result (make-array (length vector1) :element-type 'fixnum))) + (dotimes (i 1000000) + (declare (fixnum i)) + (%vector+ result vector1 vector2)) + result)) + +(defun foo () + (declare (optimize (speed 3) (safety 0))) + (let ((x (make-array 1000 :element-type 'fixnum + :initial-contents (loop for x fixnum from 0 to 999 collect x))) + (y (make-array 1000 :element-type 'fixnum + :initial-contents (loop for x fixnum from 0 to 999 collect x)))) + (fixnum/vector+ x y))) + +(defun bar () + (declare (optimize (speed 3) (safety 0))) + (let ((x (make-array 1000 :element-type 'fixnum + :initial-contents (loop for x fixnum from 0 to 999 collect x))) + (y (make-array 1000 :element-type 'fixnum + :initial-contents (loop for x fixnum from 0 to 999 collect x)))) + (let ((result (make-array 1000 :element-type 'fixnum))) + (dotimes (j 1000000) + (declare (fixnum j)) + (loop for tx across x + for ty across y + for i fixnum upfrom 0 + do (setf (aref result i) (+ tx ty)))) + result))) diff --git a/sse-vop.lisp b/sse-vop.lisp new file mode 100644 index 0000000..bb1e880 --- /dev/null +++ b/sse-vop.lisp @@ -0,0 +1,207 @@ +(in-package :sb-vm) + +(defmacro vect-ea (vect idx) + `(make-ea :dword :base ,vect :index ,idx + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + + +(define-vop (%sse-add/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + (inst movups sse-temp2 (vect-ea vect2 index)) + + ;; operate + (inst addps sse-temp1 sse-temp2) + + ;; store + (inst movups (vect-ea result index) sse-temp1) + )) + +(define-vop (%sse-sub/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + (inst movups sse-temp2 (vect-ea vect2 index)) + + ;; operate + (inst subps sse-temp1 sse-temp2) + + ;; store + (inst movups (vect-ea result index) sse-temp1) + )) + +(define-vop (%sse-mul/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + (inst movups sse-temp2 (vect-ea vect2 index)) + + ;; operate + (inst mulps sse-temp1 sse-temp2) + + ;; store + (inst movups (vect-ea result index) sse-temp1) + )) + +(define-vop (%sse-div/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + (inst movups sse-temp2 (vect-ea vect2 index)) + + ;; operate + (inst divps sse-temp1 sse-temp2) + + ;; store + (inst movups (vect-ea result index) sse-temp1) + )) + +(define-vop (%sse-sqrt/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + + ;; operate + (inst sqrtps sse-temp2 sse-temp1) + + ;; store + (inst movups (vect-ea result index) sse-temp2) + )) + + +(define-vop (%sse-recip/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + + ;; operate + (inst rcpps sse-temp2 sse-temp1) + + ;; store + (inst movups (vect-ea result index) sse-temp2) + )) + + +(define-vop (%sse-recip-sqrt/simple-array-single-float-1) + (:policy :fast-safe) + + (:args (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types simple-array-single-float simple-array-single-float fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst movups sse-temp1 (vect-ea vect1 index)) + + ;; operate + (inst rsqrtps sse-temp2 sse-temp1) + + ;; store + (inst movups (vect-ea result index) sse-temp2) + )) + + -- 2.11.4.GIT From 7e5ad1e04ddef031c64509db6ab46dcfab418546 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 09:57:48 +0000 Subject: [PATCH 02/16] .. --- .cvsignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .cvsignore diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..c07c3b9 --- /dev/null +++ b/.cvsignore @@ -0,0 +1 @@ +.emacs.desktop -- 2.11.4.GIT From 6b704a35788e2352ffae578ddd5e85458a962809 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 10:35:37 +0000 Subject: [PATCH 03/16] .. --- .cvsignore | 2 + generate-sse-instructions.lisp | 193 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 191 insertions(+), 4 deletions(-) diff --git a/.cvsignore b/.cvsignore index c07c3b9..a15c332 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1 +1,3 @@ .emacs.desktop +*.fasl +sse-insts.lisp diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index 6717ebc..d634f45 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -4,6 +4,98 @@ instruction reference: http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26568.pdf + +TODO: + +CMPPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 +CMPPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 +CMPSD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 +CMPSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 +COMISD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43 +COMISS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 46 +CVTDQ2PD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 49 +CVTDQ2PS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 +CVTPD2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 +CVTPD2PI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 56 +CVTPD2PS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 +CVTPI2PD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 62 +CVTPI2PS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 +CVTPS2DQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 +CVTPS2PD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 69 +CVTPS2PI. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 71 +CVTSD2SI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74 +CVTSD2SS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77 +CVTSI2SD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80 +CVTSI2SS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 83 +CVTSS2SD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86 +CVTSS2SI. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88 +CVTTPD2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 +CVTTPD2PI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 +CVTTPS2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 +CVTTPS2PI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 100 +CVTTSD2SI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 103 +CVTTSS2SI. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 106 +FXRSTOR. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 121 +FXSAVE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 124 +HADDPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 126 +HADDPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 129 +HSUBPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 132 +HSUBPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 135 +LDDQU. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 138 +LDMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 140 +MASKMOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 142 +MOVAPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 168 +MOVAPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 170 +MOVD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 173 +MOVDDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 176 +MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 178 +MOVDQA . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 180 +MOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 182 +MOVHLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 184 +MOVHPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 186 +MOVHPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 188 +MOVLHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 190 +MOVLPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 192 +MOVLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 194 +MOVMSKPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 196 +MOVMSKPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 198 +MOVNTDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 200 +MOVNTPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 202 +MOVNTPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 204 +MOVQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 206 +MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208 +MOVSD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 210 +MOVSHDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 213 +MOVSLDUP . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 215 +MOVSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 217 + +PEXTRW. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 284 +PINSRW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 286 + +PSHUFD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 314 +PSHUFHW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 317 +PSHUFLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 320 +PSLLD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 323 +PSLLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 326 +PSLLQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 328 +PSLLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 330 +PSRAD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 333 +PSRAW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 336 +PSRLD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 339 +PSRLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 342 +PSRLQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 344 +PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 347 + +SHUFPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 392 +SHUFPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 395 +STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410 +UCOMISD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 424 +UCOMISS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 427 +UNPCKHPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 430 +UNPCKHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 432 +UNPCKLPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 434 +UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 436 + |# (declaim (optimize (debug 3))) @@ -16,8 +108,7 @@ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26568.pd (defun gen-ops (&optional (stream t)) - ;; single prec packed sse - ;;; like : + ;;; instructions like: ;;; ADDPS xmm1, xmm2/mem128 0F 58 /r (loop for (inst . ops) in '( @@ -36,6 +127,7 @@ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26568.pd (sqrtps #x0F #x51) (subps #x0F #x5C) (xorps #x0F #x57) + ;; double precision float (addpd #x66 #x0F #x58) (addsubpd #x66 #x0F #xD0) @@ -46,11 +138,104 @@ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26568.pd (minpd #x66 #x0F #x5D) (mulpd #x66 #x0F #x59) (orps #x66 #x0F #x56) - (rcppd #x66 #x0F #x53) - (rsqrtpd #x66 #x0F #x52) (sqrtpd #x66 #x0F #x51) (subpd #x66 #x0F #x5C) (xorpd #x66 #x0F #x57) + + ;; scalar double precision float + (addsd #xF2 #x0F #x58) + (divsd #xF2 #x0F #x5E) + (maxsd #xF2 #x0F #x5F) + (minsd #xF2 #x0F #x5D) + (mulsd #xF2 #x0F #x59) + (sqrtsd #xF2 #x0F #x51) + (subsd #xF2 #x0F #x5C) + + ;; scalar single precision float + (addss #xF3 #x0F #x58) + (divss #xF3 #x0F #x5E) + (maxss #xF3 #x0F #x5F) + (minss #xF3 #x0F #x5D) + (mulss #xF3 #x0F #x59) + (rcpss #xF3 #x0F #x53) + (rsqrtss #xF3 #x0F #x52) + (sqrtss #xF3 #x0F #x51) + (subss #xF3 #x0F #x5C) + + ;; packed integer + (packssdw #x66 #x0F #x6B) + (packsswb #x66 #x0F #x63) + (packuswb #x66 #x0F #x67) + + (paddb #x66 #x0F #xFC) + (paddd #x66 #x0F #xFE) + (paddq #x66 #x0F #xD4) + (paddsb #x66 #x0F #xEC) + (paddsw #x66 #x0F #xED) + (paddusb #x66 #x0F #xDC) + (paddusw #x66 #x0F #xDD) + (paddw #x66 #x0F #xFD) + + (pand #x66 #x0F #xDB) + (pandn #x66 #x0F #xDF) + + (pavgb #x66 #x0F #xE0) + (pavgw #x66 #x0F #xE3) + + (pcmpeqb #x66 #x0F #x74) + (pcmpeqd #x66 #x0F #x76) + (pcmpeqw #x66 #x0F #x75) + (pcmpgtb #x66 #x0F #x64) + (pcmpgtd #x66 #x0F #x66) + (pcmpgtw #x66 #x0F #x65) + + (pmaddwd #x66 #x0F #xF5) + + (pmaxsw #x66 #x0F #xEE) + (pmaxub #x66 #x0F #xDE) + + (pminsw #x66 #x0F #xEA) + (pminub #x66 #x0F #xDA) + + (pmovmskb #x66 #x0F #xD7) + + (pmulhuw #x66 #x0F #xE4) + (pmulhw #x66 #x0F #xE5) + (pmullw #x66 #x0F #xD5) + (pmuludq #x66 #x0F #xF4) + + (por #x66 #x0F #xEB) + + (psadbw #x66 #x0F #xF6) + (pssld #x66 #x0F #xF2) + (psllq #x66 #x0F #xF3) + (psllw #x66 #x0F #xF1) + (psrad #x66 #x0F #xE2) + (psraw #x66 #x0F #xE2) + (psrld #x66 #x0F #xD2) + (psrlq #x66 #x0F #xD3) + (psrlw #x66 #x0F #xD1) + + (psubb #x66 #x0F #xF8) + (psubd #x66 #x0F #xFA) + (psubq #x66 #x0F #xFB) + (psubsb #x66 #x0F #xE8) + (psubsw #x66 #x0F #xE9) + (psubusb #x66 #x0F #xD8) + (psubusw #x66 #x0F #xD9) + (psubw #x66 #x0F #xF9) + + (punpckhbw #x66 #x0F #x68) + (punpckhdq #x66 #x0F #x6A) + (punpckhqdq #x66 #x0F #x6D) + (punpckhwd #x66 #x0F #x69) + (punpcklbw #x66 #x0F #x60) + (punpckldq #x66 #x0F #x62) + (punpcklqdq #x66 #x0F #x6C) + (punpcklwd #x66 #x0F #x61) + + (pxor #x66 #x0F #xEF) + ) do (format stream "~S~%~%" -- 2.11.4.GIT From 785e38dbe3d4e0cb9e99eecbcc2bb962122b5597 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 10:59:52 +0000 Subject: [PATCH 04/16] .. --- generate-sse-instructions.lisp | 108 +++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index d634f45..e3254cf 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -11,63 +11,38 @@ CMPPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . CMPPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 CMPSD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 CMPSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 -COMISD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 43 -COMISS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 46 -CVTDQ2PD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 49 -CVTDQ2PS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51 -CVTPD2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 53 -CVTPD2PI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 56 -CVTPD2PS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 59 -CVTPI2PD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 62 -CVTPI2PS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 64 -CVTPS2DQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 66 -CVTPS2PD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 69 -CVTPS2PI. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 71 -CVTSD2SI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 74 -CVTSD2SS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77 -CVTSI2SD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 80 -CVTSI2SS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 83 -CVTSS2SD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 86 -CVTSS2SI. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 88 -CVTTPD2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 91 -CVTTPD2PI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 94 -CVTTPS2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 97 -CVTTPS2PI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 100 -CVTTSD2SI . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 103 -CVTTSS2SI. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 106 + FXRSTOR. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 121 FXSAVE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 124 + HADDPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 126 HADDPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 129 HSUBPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 132 HSUBPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 135 + LDDQU. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 138 LDMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 140 + MASKMOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 142 -MOVAPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 168 -MOVAPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 170 + MOVD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 173 MOVDDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 176 MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 178 -MOVDQA . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 180 -MOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 182 + MOVHLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 184 -MOVHPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 186 -MOVHPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 188 + MOVLHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 190 -MOVLPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 192 -MOVLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 194 + MOVMSKPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 196 MOVMSKPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 198 MOVNTDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 200 MOVNTPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 202 MOVNTPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 204 -MOVQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 206 + MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208 -MOVSD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 210 + MOVSHDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 213 MOVSLDUP . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 215 -MOVSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 217 PEXTRW. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 284 PINSRW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 286 @@ -89,12 +64,7 @@ PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . SHUFPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 392 SHUFPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 395 STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410 -UCOMISD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 424 -UCOMISS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 427 -UNPCKHPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 430 -UNPCKHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 432 -UNPCKLPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 434 -UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 436 + |# @@ -126,6 +96,8 @@ UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (rsqrtps #x0F #x52) (sqrtps #x0F #x51) (subps #x0F #x5C) + (unpckhps #x0F #x15) + (unpcklps #x0F #x14) (xorps #x0F #x57) ;; double precision float @@ -140,19 +112,24 @@ UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (orps #x66 #x0F #x56) (sqrtpd #x66 #x0F #x51) (subpd #x66 #x0F #x5C) + (unpckhpd #x66 #x0F #x15) + (unpcklpd #x66 #x0F #x14) (xorpd #x66 #x0F #x57) ;; scalar double precision float (addsd #xF2 #x0F #x58) + (comisd #x66 #x0F #x2F) (divsd #xF2 #x0F #x5E) (maxsd #xF2 #x0F #x5F) (minsd #xF2 #x0F #x5D) (mulsd #xF2 #x0F #x59) (sqrtsd #xF2 #x0F #x51) (subsd #xF2 #x0F #x5C) + (ucomisd #x66 #x0F #x2E) ;; scalar single precision float (addss #xF3 #x0F #x58) + (comiss #x0F #x2F) (divss #xF3 #x0F #x5E) (maxss #xF3 #x0F #x5F) (minss #xF3 #x0F #x5D) @@ -161,6 +138,8 @@ UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (rsqrtss #xF3 #x0F #x52) (sqrtss #xF3 #x0F #x51) (subss #xF3 #x0F #x5C) + (ucomiss #x0F #x2E) + ;; packed integer (packssdw #x66 #x0F #x6B) @@ -235,6 +214,30 @@ UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (punpcklwd #x66 #x0F #x61) (pxor #x66 #x0F #xEF) + + ;; convert + (cvtdq2pd #xF3 #x0F #xE6) + (cvtdq2ps #x0F #x5B) + (cvtpd2dq #xF2 #x0F #xE6) + (cvtpd2pi #x66 #x0F #x2D) + (cvtpd2ps #x66 #x0F #x5A) + (cvtpi2pd #x66 #x0F #x2A) + (cvtpi2ps #x0F #x2A) + (cvtps2dq #x66 #x0F #x5B) + (cvtps2pd #x0F #x5A) + (cvtps2pi #x0F #x2D) + (cvtsd2si #xF2 #x0F #x2D) + (cvtsd2ss #xF2 #x0F #x5A) + (cvtsi2sd #xF2 #x0F #x2A) + (cvtsi2ss #xF3 #x0F #x2A) + (cvtss2sd #xF3 #x0F #x5A) + (cvtss2si #xF3 #x0F #x2D) + (cvttpd2dq #x66 #x0F #xE6) + (cvttpd2pi #x66 #x0F #x2C) + (cvttps2dq #xF3 #x0F #x5B) + (cvttps2pi #x0F #x2C) + (cvttsd2si #xF2 #x0F #x2C) + (cvttss2si #xF3 #x0F #x2C) ) do @@ -244,11 +247,30 @@ UNPCKLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ,@(emit-ops ops) (emit-ea segment src (reg-tn-encoding dst)))))) - ;; MOVUPS + ;; MOVES (loop for (inst ops-m2r ops-r2m) in '( + (movapd (#x66 #x0F #x28) (#x66 #x0F #x29)) + (movaps (#x0F #x28) (#x0F #x29)) + + (movdqa (#x66 #x0F #x6F) (#x66 #x0F #x7F)) + (movdqu (#xF3 #x0F #x6F) (#xF3 #x0F #x7F)) + + (movhpd (#x66 #x0F #x16) (#x66 #x0F #x17)) + (movhps (#x0F #x16) (#x0F #x17)) + + (movlpd (#x66 #x0F #x12) (#x66 #x0F #x13)) + (movlps (#x0F #x12) (#x0F #x13)) + + (movq (#xF3 #x0F #x7E) (#x66 #x0F #xD6)) + + (movsd (#xF2 #x0F #x10) (#xF2 #x0F #x11)) + + (movss (#xF3 #x0F #x10) (#xF3 #x0F #x11)) + + (movupd (#x66 #x0F #x10) (#x66 #x0F #x11)) (movups (#x0F #x10) (#x0F #x11)) - (movupd (#x66 #x0F #x10) (#x66 #x0F #x11))) + ) do (format stream "~S~%~%" `(define-instruction ,(intern (symbol-name inst)) (segment dst src) -- 2.11.4.GIT From 161504ba194baaa3f8f9b54d470843783cd9a9bf Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 13:33:23 +0000 Subject: [PATCH 05/16] .. --- generate-sse-instructions.lisp | 110 ++- sbcl-src/makepatch.sh | 2 +- sbcl-src/patch_against_sbcl_0_9_3 | 1263 +++++++++++++++++++++++++++++++++- sbcl-src/src/compiler/x86/insts.lisp | 1241 ++++++++++++++++++++++++++++++++- sse-vop.lisp | 323 ++++----- 5 files changed, 2658 insertions(+), 281 deletions(-) rewrite sse-vop.lisp (93%) diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index e3254cf..7619416 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -7,49 +7,16 @@ http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26568.pd TODO: -CMPPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 30 -CMPPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34 -CMPSD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 37 -CMPSS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 40 - FXRSTOR. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 121 FXSAVE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 124 -HADDPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 126 -HADDPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 129 -HSUBPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 132 -HSUBPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 135 - -LDDQU. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 138 LDMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 140 -MASKMOVDQU . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 142 - -MOVD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 173 -MOVDDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 176 MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 178 -MOVHLPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 184 - -MOVLHPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 190 - -MOVMSKPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 196 -MOVMSKPS. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 198 -MOVNTDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 200 -MOVNTPD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 202 -MOVNTPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 204 - MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208 -MOVSHDUP. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 213 -MOVSLDUP . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 215 - -PEXTRW. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 284 -PINSRW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 286 - -PSHUFD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 314 -PSHUFHW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 317 -PSHUFLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 320 +(ib-forms:) PSLLD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 323 PSLLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 326 PSLLQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 328 @@ -61,8 +28,6 @@ PSRLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . PSRLQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 344 PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 347 -SHUFPD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 392 -SHUFPS . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 395 STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410 @@ -88,6 +53,8 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (andnps #x0F #x55) (andps #x0F #x54) (divps #x0F #x5E) + (haddps #xF2 #x0F #x7C) + (hsubps #xF2 #x0F #x7D) (maxps #x0F #x5F) (minps #x0F #x5D) (mulps #x0F #x59) @@ -106,10 +73,12 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (andnpd #x66 #x0F #x55) (andpd #x66 #x0F #x54) (divpd #x66 #x0F #x5E) + (haddpd #x66 #x0F #x7C) + (hsubpd #x66 #x0F #x7D) (maxpd #x66 #x0F #x5F) (minpd #x66 #x0F #x5D) (mulpd #x66 #x0F #x59) - (orps #x66 #x0F #x56) + (orpd #x66 #x0F #x56) (sqrtpd #x66 #x0F #x51) (subpd #x66 #x0F #x5C) (unpckhpd #x66 #x0F #x15) @@ -238,7 +207,20 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (cvttps2pi #x0F #x2C) (cvttsd2si #xF2 #x0F #x2C) (cvttss2si #xF3 #x0F #x2C) - + + ;; misc + (lddqu #xF2 #x0F #xF0) + (maskmovdqu #x66 #x0F #xF7) + (movddup #xF2 #x0F #x12) + (movhlps #x0F #x12) + (movlhps #x0F #x16) + (movmskpd #x66 #x0F #x50) + (movmskps #x0F #x50) + (movntdq #x66 #x0F #XE7) + (movntpd #x66 #x0F #x2B) + (movntps #x0F #x2B) + (movshdup #xF3 #x0F #x16) + (movsldup #xF3 #x0F #x12) ) do (format stream "~S~%~%" @@ -247,12 +229,64 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ,@(emit-ops ops) (emit-ea segment src (reg-tn-encoding dst)))))) + + ;; INSTRUCTIONS WITH /r IB8 + (loop for (inst . ops) in + '( + (pextrw #X66 #x0F #xC5) + (pinsrw #x66 #x0F #xC4) + + (pshufd #x66 #x0F #x70) + (pshufhw #xF3 #x0F #x70) + (pshuflw #xF2 #x0F #x70) + + (shufpd #x66 #x0F #xC6) + (shufps #x0F #xC6) + + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name inst)) (segment dst src byte) + (:emitter + ,@(emit-ops ops) + (emit-ea segment src (reg-tn-encoding dst)) + (emit-sized-immediate segment :byte byte) + )))) + + ;; COMPARE + (loop for (inst . ops) in + '( + (cmppd #x66 #x0F #xC2) + (cmpps #x0F #xC2) + (cmpsd #xF2 #x0F #xC2) + (cmpss #xF3 #x0F #xC2) + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name inst)) (segment dst src cond) + (:emitter + ,@(emit-ops ops) + (emit-ea segment src (reg-tn-encoding dst)) + (emit-sized-immediate segment :byte (cdr (assoc cond + '((:eq . #b000) (:e . #b000) (:z . #b000) + (:l . #b001) (:nge . #b001) + (:le . #b010) (:ng . #b010) + (:unord . #b011) + (:ne . #b100) (:nz . #b100) + (:nl . #b101) (:ge . #b101) + (:nle . #b110) (:g . #b110) + (:ord . #b111) + )))) + )))) + ;; MOVES (loop for (inst ops-m2r ops-r2m) in '( (movapd (#x66 #x0F #x28) (#x66 #x0F #x29)) (movaps (#x0F #x28) (#x0F #x29)) + (movd (#x66 #x0F #x6E) (#x66 #x0F #x7E)) + (movdqa (#x66 #x0F #x6F) (#x66 #x0F #x7F)) (movdqu (#xF3 #x0F #x6F) (#xF3 #x0F #x7F)) diff --git a/sbcl-src/makepatch.sh b/sbcl-src/makepatch.sh index 641b804..1927890 100755 --- a/sbcl-src/makepatch.sh +++ b/sbcl-src/makepatch.sh @@ -1,4 +1,4 @@ #!/bin/sh find . -name '.emacs*' |xargs rm 2>/dev/null find . -name '*~' |xargs rm 2>/dev/null -diff -Naur src-093 src > patch_against_sbcl_0_9_3 +diff -x "CVS*" -Naur src-093 src > patch_against_sbcl_0_9_3 diff --git a/sbcl-src/patch_against_sbcl_0_9_3 b/sbcl-src/patch_against_sbcl_0_9_3 index 234ef9d..9a04742 100644 --- a/sbcl-src/patch_against_sbcl_0_9_3 +++ b/sbcl-src/patch_against_sbcl_0_9_3 @@ -1,15 +1,16 @@ -diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp ---- src-093/compiler/x86/insts.lisp 2005-08-05 15:31:17.723664255 +0300 -+++ src/compiler/x86/insts.lisp 2005-08-05 15:42:36.536109257 +0300 -@@ -192,6 +192,7 @@ +diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp +--- src-093/compiler/x86/insts.lisp 2005-08-05 16:13:29.000000000 +0300 ++++ src/compiler/x86/insts.lisp 2005-08-08 16:30:23.352842152 +0300 +@@ -192,6 +192,8 @@ (:byte 8) (:word 16) (:dword 32) ++ (:qword 64) + (:dqword 128) (:float 32) (:double 64))) -@@ -671,7 +672,7 @@ +@@ -671,7 +673,7 @@ (defun reg-tn-encoding (tn) (declare (type tn tn)) @@ -18,7 +19,7 @@ diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp (let ((offset (tn-offset tn))) (logior (ash (logand offset 1) 2) (ash offset -1)))) -@@ -718,6 +719,8 @@ +@@ -718,6 +720,8 @@ (ecase (sb-name (sc-sb (tn-sc thing))) (registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) @@ -27,7 +28,7 @@ diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp (stack ;; Convert stack tns into an index off of EBP. (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) -@@ -830,6 +833,10 @@ +@@ -830,6 +834,10 @@ (and (tn-p thing) (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) @@ -38,7 +39,7 @@ diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp (defun accumulator-p (thing) (and (register-p thing) (= (tn-offset thing) 0))) -@@ -2042,6 +2049,123 @@ +@@ -2042,6 +2050,1339 @@ (:emitter (emit-header-data segment return-pc-header-widetag))) @@ -79,6 +80,20 @@ diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (EMIT-BYTE SEGMENT 94) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + ++(DEFINE-INSTRUCTION HADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 124) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 125) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ +(DEFINE-INSTRUCTION MAXPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) @@ -127,24 +142,1227 @@ diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (EMIT-BYTE SEGMENT 92) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + ++(DEFINE-INSTRUCTION UNPCKHPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 21) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKLPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 20) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ +(DEFINE-INSTRUCTION XORPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 87) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + -+;;; SSE MOVE ++(DEFINE-INSTRUCTION ADDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HADDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 124) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HSUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 125) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKHPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 21) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKLPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 20) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION COMISD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 47) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UCOMISD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 46) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION COMISS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 47) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UCOMISS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 46) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKSSDW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 107) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKSSWB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 99) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKUSWB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 103) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 252) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 254) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 212) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 236) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 237) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDUSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 220) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDUSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 221) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 253) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAND ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 219) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PANDN ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 223) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAVGB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 224) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAVGW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 227) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 116) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 118) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 117) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 100) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 102) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 101) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMADDWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 245) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMAXSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 238) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMAXUB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 222) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMINSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 234) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMINUB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 218) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMOVMSKB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 215) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULHUW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 228) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULHW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 229) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 213) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULUDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 244) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION POR ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 235) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSADBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 246) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSSLD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 242) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSLLQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 243) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSLLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 241) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRAD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 226) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRAW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 226) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 210) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 211) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 209) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 248) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 250) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 251) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 232) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 233) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBUSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 216) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBUSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 217) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 249) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 104) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 106) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHQDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 109) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 105) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 96) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 98) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLQDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 108) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 97) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PXOR ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 239) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTDQ2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTDQ2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPI2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPI2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSD2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSD2SS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSI2SD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSI2SS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSS2SD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSS2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPD2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPD2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPS2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPS2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTSD2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTSS2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION LDDQU ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 240) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MASKMOVDQU ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 247) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVDDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVHLPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVLHPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVMSKPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 80) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVMSKPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 80) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 231) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 43) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 43) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVSHDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVSLDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PEXTRW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 197) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PINSRW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 196) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFD ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFHW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFLW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION SHUFPD ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 198) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION SHUFPS ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 198) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION CMPPD ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPPS ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPSD ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPSS ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION MOVAPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 41) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVAPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 40) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 41) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 126) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVDQA ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 127) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVDQU ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 127) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVHPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 23) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVHPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 23) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVLPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 19) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVLPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 19) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVQ ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 214) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVSD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVSS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVUPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVUPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + -+(DEFINE-INSTRUCTION MOVUPS (SEGMENT DST SRC) -+ (:EMITTER -+ (COND -+ ((SSE-REGISTER-P DST) -+ (EMIT-BYTE SEGMENT 15) -+ (EMIT-BYTE SEGMENT 16) -+ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) -+ (T (EMIT-BYTE SEGMENT 15) -+ (EMIT-BYTE SEGMENT 17) -+ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + + +;;; CPUID @@ -155,16 +1373,15 @@ diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (emit-byte segment #x0F) + (emit-byte segment #xA2))) + -+ + + + ;;;; fp instructions ;;;; ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. -diff -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp ---- src-093/compiler/x86/vm.lisp 2005-08-05 15:32:19.810183044 +0300 -+++ src/compiler/x86/vm.lisp 2005-08-05 15:38:26.784310770 +0300 +diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp +--- src-093/compiler/x86/vm.lisp 2005-08-05 16:13:29.000000000 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-08 16:32:19.609588299 +0300 @@ -21,7 +21,8 @@ (defvar *byte-register-names* (make-array 8 :initial-element nil)) (defvar *word-register-names* (make-array 16 :initial-element nil)) diff --git a/sbcl-src/src/compiler/x86/insts.lisp b/sbcl-src/src/compiler/x86/insts.lisp index da683b8..b64678c 100644 --- a/sbcl-src/src/compiler/x86/insts.lisp +++ b/sbcl-src/src/compiler/x86/insts.lisp @@ -192,6 +192,7 @@ (:byte 8) (:word 16) (:dword 32) + (:qword 64) (:dqword 128) (:float 32) (:double 64))) @@ -2086,6 +2087,20 @@ (EMIT-BYTE SEGMENT 94) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) +(DEFINE-INSTRUCTION HADDPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 124) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION HSUBPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 125) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + (DEFINE-INSTRUCTION MAXPS (SEGMENT DST SRC) (:EMITTER (EMIT-BYTE SEGMENT 15) @@ -2134,24 +2149,1227 @@ (EMIT-BYTE SEGMENT 92) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) +(DEFINE-INSTRUCTION UNPCKHPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 21) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION UNPCKLPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 20) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + (DEFINE-INSTRUCTION XORPS (SEGMENT DST SRC) (:EMITTER (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 87) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) -;;; SSE MOVE +(DEFINE-INSTRUCTION ADDPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 88) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ADDSUBPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 208) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ANDNPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 85) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ANDPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 84) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION DIVPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 94) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION HADDPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 124) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION HSUBPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 125) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MAXPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 95) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MINPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 93) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MULPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 89) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ORPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 86) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SQRTPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 81) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SUBPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 92) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION UNPCKHPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 21) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION UNPCKLPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 20) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION XORPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 87) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ADDSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 88) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION COMISD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 47) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION DIVSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 94) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MAXSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 95) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MINSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 93) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MULSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 89) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SQRTSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 81) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SUBSD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 92) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION UCOMISD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 46) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION ADDSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 88) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION COMISS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 47) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION DIVSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 94) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MAXSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 95) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MINSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 93) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MULSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 89) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION RCPSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 83) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION RSQRTSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 82) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SQRTSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 81) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION SUBSS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 92) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION UCOMISS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 46) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PACKSSDW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 107) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PACKSSWB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 99) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PACKUSWB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 103) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 252) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 254) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 212) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDSB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 236) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDSW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 237) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDUSB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 220) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDUSW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 221) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PADDW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 253) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PAND + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 219) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PANDN + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 223) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PAVGB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 224) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PAVGW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 227) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PCMPEQB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 116) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PCMPEQD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 118) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PCMPEQW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 117) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PCMPGTB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 100) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PCMPGTD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 102) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PCMPGTW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 101) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMADDWD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 245) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMAXSW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 238) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMAXUB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 222) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMINSW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 234) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMINUB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 218) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMOVMSKB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 215) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMULHUW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 228) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMULHW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 229) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMULLW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 213) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PMULUDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 244) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION POR + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 235) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSADBW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 246) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSSLD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 242) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSLLQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 243) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSLLW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 241) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSRAD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 226) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSRAW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 226) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSRLD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 210) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSRLQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 211) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSRLW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 209) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 248) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 250) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 251) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBSB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 232) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBSW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 233) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBUSB + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 216) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBUSW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 217) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PSUBW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 249) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKHBW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 104) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKHDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 106) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKHQDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 109) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKHWD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 105) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKLBW + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 96) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKLDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 98) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKLQDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 108) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PUNPCKLWD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 97) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PXOR + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 239) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTDQ2PD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 230) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTDQ2PS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 91) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPD2DQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 230) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPD2PI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 45) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPD2PS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 90) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPI2PD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 42) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPI2PS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 42) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPS2DQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 91) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPS2PD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 90) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTPS2PI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 45) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTSD2SI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 45) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTSD2SS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 90) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTSI2SD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 42) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTSI2SS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 42) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTSS2SD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 90) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTSS2SI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 45) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTTPD2DQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 230) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTTPD2PI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 44) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTTPS2DQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 91) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTTPS2PI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 44) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTTSD2SI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 44) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION CVTTSS2SI + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 44) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION LDDQU + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 240) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MASKMOVDQU + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 247) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVDDUP + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVHLPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVLHPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVMSKPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 80) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVMSKPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 80) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVNTDQ + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 231) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVNTPD + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 43) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVNTPS + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 43) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVSHDUP + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION MOVSLDUP + (SEGMENT DST SRC) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) + +(DEFINE-INSTRUCTION PEXTRW + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 197) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION PINSRW + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 196) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION PSHUFD + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 112) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION PSHUFHW + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 112) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION PSHUFLW + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 112) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION SHUFPD + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 198) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION SHUFPS + (SEGMENT DST SRC BYTE) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 198) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) + +(DEFINE-INSTRUCTION CMPPD + (SEGMENT DST SRC COND) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 194) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT + :BYTE + (CDR + (ASSOC COND + '((:EQ . 0) + (:E . 0) (:Z . 0) + (:L . 1) + (:NGE . 1) + (:LE . 2) + (:NG . 2) + (:UNORD . 3) + (:NE . 4) + (:NZ . 4) + (:NL . 5) + (:GE . 5) + (:NLE . 6) + (:G . 6) + (:ORD . 7))))))) + +(DEFINE-INSTRUCTION CMPPS + (SEGMENT DST SRC COND) + (:EMITTER (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 194) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT + :BYTE + (CDR + (ASSOC COND + '((:EQ . 0) + (:E . 0) (:Z . 0) + (:L . 1) + (:NGE . 1) + (:LE . 2) + (:NG . 2) + (:UNORD . 3) + (:NE . 4) + (:NZ . 4) + (:NL . 5) + (:GE . 5) + (:NLE . 6) + (:G . 6) + (:ORD . 7))))))) + +(DEFINE-INSTRUCTION CMPSD + (SEGMENT DST SRC COND) + (:EMITTER (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 194) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT + :BYTE + (CDR + (ASSOC COND + '((:EQ . 0) + (:E . 0) (:Z . 0) + (:L . 1) + (:NGE . 1) + (:LE . 2) + (:NG . 2) + (:UNORD . 3) + (:NE . 4) + (:NZ . 4) + (:NL . 5) + (:GE . 5) + (:NLE . 6) + (:G . 6) + (:ORD . 7))))))) + +(DEFINE-INSTRUCTION CMPSS + (SEGMENT DST SRC COND) + (:EMITTER (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 194) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) + (EMIT-SIZED-IMMEDIATE SEGMENT + :BYTE + (CDR + (ASSOC COND + '((:EQ . 0) + (:E . 0) (:Z . 0) + (:L . 1) + (:NGE . 1) + (:LE . 2) + (:NG . 2) + (:UNORD . 3) + (:NE . 4) + (:NZ . 4) + (:NL . 5) + (:GE . 5) + (:NLE . 6) + (:G . 6) + (:ORD . 7))))))) + +(DEFINE-INSTRUCTION MOVAPD + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 41) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVAPS + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 40) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 41) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVD + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 126) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVDQA + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 127) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVDQU + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 127) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVHPD + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 23) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVHPS + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 23) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVLPD + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 19) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVLPS + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 19) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVQ + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 214) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVSD + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVSS + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVUPD + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + +(DEFINE-INSTRUCTION MOVUPS + (SEGMENT DST SRC) + (:EMITTER + (COND + ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) -(DEFINE-INSTRUCTION MOVUPS (SEGMENT DST SRC) - (:EMITTER - (COND - ((SSE-REGISTER-P DST) - (EMIT-BYTE SEGMENT 15) - (EMIT-BYTE SEGMENT 16) - (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) - (T (EMIT-BYTE SEGMENT 15) - (EMIT-BYTE SEGMENT 17) - (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ;;; CPUID @@ -2162,7 +3380,6 @@ (emit-byte segment #x0F) (emit-byte segment #xA2))) - diff --git a/sse-vop.lisp b/sse-vop.lisp dissimilarity index 93% index bb1e880..07550d0 100644 --- a/sse-vop.lisp +++ b/sse-vop.lisp @@ -1,207 +1,116 @@ -(in-package :sb-vm) - -(defmacro vect-ea (vect idx) - `(make-ea :dword :base ,vect :index ,idx - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - - -(define-vop (%sse-add/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst addps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-sub/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst subps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-mul/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst mulps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-div/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - (inst movups sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst divps sse-temp1 sse-temp2) - - ;; store - (inst movups (vect-ea result index) sse-temp1) - )) - -(define-vop (%sse-sqrt/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - - ;; operate - (inst sqrtps sse-temp2 sse-temp1) - - ;; store - (inst movups (vect-ea result index) sse-temp2) - )) - - -(define-vop (%sse-recip/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - - ;; operate - (inst rcpps sse-temp2 sse-temp1) - - ;; store - (inst movups (vect-ea result index) sse-temp2) - )) - - -(define-vop (%sse-recip-sqrt/simple-array-single-float-1) - (:policy :fast-safe) - - (:args (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types simple-array-single-float simple-array-single-float fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst movups sse-temp1 (vect-ea vect1 index)) - - ;; operate - (inst rsqrtps sse-temp2 sse-temp1) - - ;; store - (inst movups (vect-ea result index) sse-temp2) - )) - - +(in-package :sb-vm) + +(defmacro vect-ea (vect idx) + `(make-ea :dword :base ,vect :index ,idx + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + +;; TWO-ARG SSE VOPs +(loop for (op-name type mov-inst op-inst) in + '( + (add single-float movups addps) + (addsub single-float movups addsubps) + (andnot single-float movups andnps) + (and single-float movups andps) + (div single-float movups divps) + (hadd single-float movups haddps) + (hsub single-float movups hsubps) + (max single-float movups maxps) + (min single-float movups minps) + (mul single-float movups mulps) + (or single-float movups orps) + (sub single-float movups subps) + (xor single-float movups xorps) + + (add double-float movupd addpd) + (addsub double-float movupd addsubpd) + (andnot double-float movupd andnpd) + (and double-float movupd andpd) + (div double-float movupd divpd) + (hadd double-float movupd haddpd) + (hsub double-float movupd hsubpd) + (max double-float movupd maxpd) + (min double-float movupd minpd) + (mul double-float movupd mulpd) + (or double-float movupd orpd) + (sub double-float movupd subpd) + (xor double-float movupd xorpd) + ) + do + + `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (:policy :fast-safe) + + ;;(:guard (member :sse2 *backend-subfeatures*)) + + (:args + (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst ,mov-inst sse-temp1 (vect-ea vect1 index)) + (inst ,mov-inst sse-temp2 (vect-ea vect2 index)) + + ;; operate + (inst ,op-inst sse-temp1 sse-temp2) + + ;; store + (inst ,mov-inst (vect-ea result index) sse-temp1) + ))) + +;; SINGLE-ARG SSE VOPs +(loop for (op-name type mov-inst op-inst) in + '( + (recip single-float movups rcpps) + (rsqrt single-float movups rsqrtps) + (sqrt single-float movups sqrtps) + (sqrt double-float movupd sqrtpd) + ) + do + + `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (:policy :fast-safe) + + ;;(:guard (member :sse2 *backend-subfeatures*)) + + (:args + (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index 2) + + ;; load + (inst ,mov-inst sse-temp1 (vect-ea vect1 index)) + + ;; operate + (inst ,op-inst sse-temp1) + + ;; store + (inst ,mov-inst (vect-ea result index) sse-temp1) + ))) -- 2.11.4.GIT From 76ada89c55e4ffe0b16bf278b2c9e333ec852a02 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 15:56:01 +0000 Subject: [PATCH 06/16] .. --- .cvsignore | 1 + example-test.lisp | 22 ++++- generate-sse-vops.lisp | 223 +++++++++++++++++++++++++++++++++++++++++++++++++ load.lisp | 6 ++ sse-vop.lisp | 116 ------------------------- 5 files changed, 250 insertions(+), 118 deletions(-) create mode 100644 generate-sse-vops.lisp create mode 100644 load.lisp delete mode 100644 sse-vop.lisp diff --git a/.cvsignore b/.cvsignore index a15c332..f503226 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,3 +1,4 @@ .emacs.desktop *.fasl sse-insts.lisp +sse-vops.lisp diff --git a/example-test.lisp b/example-test.lisp index 222f522..6be574d 100644 --- a/example-test.lisp +++ b/example-test.lisp @@ -10,14 +10,32 @@ (aref arr2 i) (float i))) (format t "Before: ~S~%~S~%" arr1 arr2) + (format t "b <- a + b, idx 0~%") - (sb-sys:%primitive sb-vm::%sse-sqrt/simple-array-single-float-1 arr2 arr1 4) + (sb-sys:%primitive sb-vm::%sse-add/simple-array-single-float-1 arr2 arr2 arr1 0) (format t "After: ~S~%~S~%" arr1 arr2) - (sb-sys:%primitive sb-vm::%sse-recip/simple-array-single-float-1 arr1 arr2 4) + (format t "a <- sqrt(b), idx 4~%") + + (sb-sys:%primitive sb-vm::%sse-sqrt/simple-array-single-float-1 arr1 arr2 4) (format t "After: ~S~%~S~%" arr1 arr2) )) +(defun test-2 () + (let ((arr1 (make-array 16 :element-type '(unsigned-byte 8) :initial-element 0)) + (arr2 (make-array 16 :element-type '(unsigned-byte 8) :initial-element 0))) + + (loop for i from 0 below 16 do (setf (aref arr1 i) (* (1+ i) 10) + (aref arr2 i) (1+ i))) + + (format t "Before: ~S~%~S~%" arr1 arr2) + (format t "b <- a+b, idx 4~%") + + (sb-sys:%primitive sb-vm::%sse-add/simple-array-unsigned-byte-8-1 arr2 arr1 arr2 4) + + (format t "After: ~S~%~S~%" arr1 arr2) + + )) diff --git a/generate-sse-vops.lisp b/generate-sse-vops.lisp new file mode 100644 index 0000000..4823b1b --- /dev/null +++ b/generate-sse-vops.lisp @@ -0,0 +1,223 @@ +(defun vect-ea (vect idx) + `(make-ea :dword :base ,vect :index ,idx + :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + +(defun gen-vops-to-file (filename) + (with-open-file (stream filename :direction :output :if-exists :supersede) + (gen-vops stream))) + +(defun gen-vops (&optional (stream t)) + + (format stream "(in-package :sb-vm)~%~%") + + ;; TWO-ARG SSE VOPs + (loop for (op-name type mov-inst op-inst elem-width) in + '( + ;; single float + (add single-float movups addps 4) + (addsub single-float movups addsubps 4) + (andnot single-float movups andnps 4) + (and single-float movups andps 4) + (div single-float movups divps 4) + (hadd single-float movups haddps 4) + (hsub single-float movups hsubps 4) + (max single-float movups maxps 4) + (min single-float movups minps 4) + (mul single-float movups mulps 4) + (or single-float movups orps 4) + (sub single-float movups subps 4) + (xor single-float movups xorps 4) + + ;; double float + (add double-float movupd addpd 8) + (addsub double-float movupd addsubpd 8) + (andnot double-float movupd andnpd 8) + (and double-float movupd andpd 8) + (div double-float movupd divpd 8) + (hadd double-float movupd haddpd 8) + (hsub double-float movupd hsubpd 8) + (max double-float movupd maxpd 8) + (min double-float movupd minpd 8) + (mul double-float movupd mulpd 8) + (or double-float movupd orpd 8) + (sub double-float movupd subpd 8) + (xor double-float movupd xorpd 8) + + ;; unsigned byte 8 + (add unsigned-byte-8 movdqu paddb 1) + (avg unsigned-byte-8 movdqu pavgb 1) + (max unsigned-byte-8 movdqu pmaxub 1) + (min unsigned-byte-8 movdqu pminub 1) + (sub unsigned-byte-8 movdqu psubb 1) + + (and unsigned-byte-8 movdqu pand 1) + (andn unsigned-byte-8 movdqu pandn 1) + (or unsigned-byte-8 movdqu por 1) + (xor unsigned-byte-8 movdqu pxor 1) + + ;; unsigned byte 16 + (add unsigned-byte-16 movdqu paddw 2) + (avg unsigned-byte-16 movdqu pavgw 2) + (sub unsigned-byte-16 movdqu psubw 2) + + (and unsigned-byte-16 movdqu pand 2) + (andn unsigned-byte-16 movdqu pandn 2) + (or unsigned-byte-16 movdqu por 2) + (xor unsigned-byte-16 movdqu pxor 2) + + (shl unsigned-byte-16 movdqu psllw 2) + (shr unsigned-byte-16 movdqu psrlw 2) + + ;; signed byte 16 + (add signed-byte-16 movdqu paddw 2) + (max signed-byte-16 movdqu pmaxsw 2) + (min signed-byte-16 movdqu pminsw 2) + (sub signed-byte-16 movdqu psubw 2) + + (and signed-byte-16 movdqu pand 2) + (andn signed-byte-16 movdqu pandn 2) + (or signed-byte-16 movdqu por 2) + (xor signed-byte-16 movdqu pxor 2) + + (shl signed-byte-16 movdqu psllw 2) + (shr signed-byte-16 movdqu psraw 2) + ) + do + + (format stream "~S~%~%" + `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (format t "; defining VOP ~A..~%" name) + name))) + + (:policy :fast-safe) + + ;;(:guard (member :sse2 *backend-subfeatures*)) + + (:args + (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index ,(floor (log elem-width 2))) + + ;; load + (inst ,mov-inst sse-temp1 ,(vect-ea 'vect1 'index)) + (inst ,mov-inst sse-temp2 ,(vect-ea 'vect2 'index)) + + ;; operate + (inst ,op-inst sse-temp1 sse-temp2) + + ;; store + (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp1) + )))) + + ;; SINGLE-ARG SSE VOPs + (loop for (op-name type mov-inst op-inst elem-width) in + '( + (recip single-float movups rcpps 4) + (rsqrt single-float movups rsqrtps 4) + (sqrt single-float movups sqrtps 4) + (sqrt double-float movupd sqrtpd 8) + ) + do + (format stream "~S~%~%" + `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (format t "; defining VOP ~A..~%" name) + name))) + (:policy :fast-safe) + + ;;(:guard (member :sse2 *backend-subfeatures*)) + + (:args + (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index ,(floor (log elem-width 2))) + + ;; load + (inst ,mov-inst sse-temp1 ,(vect-ea 'vect1 'index)) + + ;; operate + (inst ,op-inst sse-temp2 sse-temp1) + + ;; store + (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp2) + )))) + + ;; COMPARE + (loop for (op-name type mov-inst op-inst elem-width) in + '( + (cmp single-float movups cmpps 4) + (cmp double-float movupd cmppd 8) + ) + do + (format stream "~S~%~%" + `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) + (format t "; defining VOP ~A..~%" name) + name))) + + (:policy :fast-safe) + + ;;(:guard (member :sse2 *backend-subfeatures*)) + + (:args + (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:info cond) + + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type)) + fixnum + (:constant keyword) + ) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index ,(floor (log elem-width 2))) + + ;; load + (inst ,mov-inst sse-temp1 ,(vect-ea 'vect1 'index)) + (inst ,mov-inst sse-temp2 ,(vect-ea 'vect2 'index)) + + ;; operate + (inst ,op-inst sse-temp1 sse-temp2 cond) + + ;; store + (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp1) + )))) + + ) diff --git a/load.lisp b/load.lisp new file mode 100644 index 0000000..d85b1d2 --- /dev/null +++ b/load.lisp @@ -0,0 +1,6 @@ +(if t + (progn + (load (compile-file "sse-vops.lisp")) + (load (compile-file "example-test.lisp")) + )) + \ No newline at end of file diff --git a/sse-vop.lisp b/sse-vop.lisp deleted file mode 100644 index 07550d0..0000000 --- a/sse-vop.lisp +++ /dev/null @@ -1,116 +0,0 @@ -(in-package :sb-vm) - -(defmacro vect-ea (vect idx) - `(make-ea :dword :base ,vect :index ,idx - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - -;; TWO-ARG SSE VOPs -(loop for (op-name type mov-inst op-inst) in - '( - (add single-float movups addps) - (addsub single-float movups addsubps) - (andnot single-float movups andnps) - (and single-float movups andps) - (div single-float movups divps) - (hadd single-float movups haddps) - (hsub single-float movups hsubps) - (max single-float movups maxps) - (min single-float movups minps) - (mul single-float movups mulps) - (or single-float movups orps) - (sub single-float movups subps) - (xor single-float movups xorps) - - (add double-float movupd addpd) - (addsub double-float movupd addsubpd) - (andnot double-float movupd andnpd) - (and double-float movupd andpd) - (div double-float movupd divpd) - (hadd double-float movupd haddpd) - (hsub double-float movupd hsubpd) - (max double-float movupd maxpd) - (min double-float movupd minpd) - (mul double-float movupd mulpd) - (or double-float movupd orpd) - (sub double-float movupd subpd) - (xor double-float movupd xorpd) - ) - do - - `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) - (:policy :fast-safe) - - ;;(:guard (member :sse2 *backend-subfeatures*)) - - (:args - (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (vect2 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types - ,(intern (format nil "SIMPLE-ARRAY-~A" type)) - ,(intern (format nil "SIMPLE-ARRAY-~A" type)) - ,(intern (format nil "SIMPLE-ARRAY-~A" type)) - fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst ,mov-inst sse-temp1 (vect-ea vect1 index)) - (inst ,mov-inst sse-temp2 (vect-ea vect2 index)) - - ;; operate - (inst ,op-inst sse-temp1 sse-temp2) - - ;; store - (inst ,mov-inst (vect-ea result index) sse-temp1) - ))) - -;; SINGLE-ARG SSE VOPs -(loop for (op-name type mov-inst op-inst) in - '( - (recip single-float movups rcpps) - (rsqrt single-float movups rsqrtps) - (sqrt single-float movups sqrtps) - (sqrt double-float movupd sqrtpd) - ) - do - - `(define-vop (,(intern (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type))) - (:policy :fast-safe) - - ;;(:guard (member :sse2 *backend-subfeatures*)) - - (:args - (result :scs (descriptor-reg)) - (vect1 :scs (descriptor-reg)) - (index :scs (unsigned-reg))) - - (:arg-types - ,(intern (format nil "SIMPLE-ARRAY-~A" type)) - ,(intern (format nil "SIMPLE-ARRAY-~A" type)) - fixnum) - - (:temporary (:sc sse-reg) sse-temp1) - - (:generator 10 - - ;; scale index by 4 (size-of single-float) - (inst shl index 2) - - ;; load - (inst ,mov-inst sse-temp1 (vect-ea vect1 index)) - - ;; operate - (inst ,op-inst sse-temp1) - - ;; store - (inst ,mov-inst (vect-ea result index) sse-temp1) - ))) -- 2.11.4.GIT From a91a1570532a48411fa88bbcf1b6d8a868ce1d32 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 16:23:22 +0000 Subject: [PATCH 07/16] .. --- cpuid-vop.lisp | 26 +++++++ cpuid.lisp | 26 +++++++ generate-sse-instructions.lisp | 26 +++++++ generate-sse-vops.lisp | 27 +++++++ scratch/.emacs.desktop | 82 --------------------- scratch/README | 2 - scratch/asm-t1.asm | 10 --- scratch/foo.lisp | 159 ----------------------------------------- scratch/sse.lisp | 125 -------------------------------- scratch/sse2.lisp | 33 --------- 10 files changed, 105 insertions(+), 411 deletions(-) delete mode 100644 scratch/.emacs.desktop delete mode 100644 scratch/README delete mode 100644 scratch/asm-t1.asm delete mode 100644 scratch/foo.lisp delete mode 100644 scratch/sse.lisp delete mode 100644 scratch/sse2.lisp diff --git a/cpuid-vop.lisp b/cpuid-vop.lisp index 2f79c70..41a20dd 100644 --- a/cpuid-vop.lisp +++ b/cpuid-vop.lisp @@ -1,3 +1,29 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# (in-package :sb-c) (ignore-errors (defknown cl-user::%read-cpu (unsigned-byte-32 simple-array-unsigned-byte-16) nil)) diff --git a/cpuid.lisp b/cpuid.lisp index 55c4f9c..708d3c9 100644 --- a/cpuid.lisp +++ b/cpuid.lisp @@ -1,3 +1,29 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# (defpackage :cpuid (:use :cl)) (in-package :cpuid) diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index 7619416..5de9881 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -1,4 +1,30 @@ #| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +#| instruction reference: diff --git a/generate-sse-vops.lisp b/generate-sse-vops.lisp index 4823b1b..e091688 100644 --- a/generate-sse-vops.lisp +++ b/generate-sse-vops.lisp @@ -1,3 +1,30 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# + (defun vect-ea (vect idx) `(make-ea :dword :base ,vect :index ,idx :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) diff --git a/scratch/.emacs.desktop b/scratch/.emacs.desktop deleted file mode 100644 index 4fe6149..0000000 --- a/scratch/.emacs.desktop +++ /dev/null @@ -1,82 +0,0 @@ -;; -*- coding: emacs-mule; -*- -;; -------------------------------------------------------------------------- -;; Desktop File for Emacs -;; -------------------------------------------------------------------------- -;; Created Fri Aug 5 15:49:29 2005 -;; Emacs version 21.3.1 - -;; Global section: -(setq desktop-missing-file-warning nil) -(setq tags-file-name nil) -(setq tags-table-list nil) -(setq search-ring nil) -(setq regexp-search-ring nil) -(setq register-alist nil) - -;; Buffer section: -(desktop-create-buffer 205 - "/home/rlaakso/projects/sbcl-sse/sse2.lisp" - "sse2.lisp" - 'lisp-mode - '(slime-mode) - 1 - '(nil nil) - nil - nil - nil) - -(desktop-create-buffer 205 - "/home/rlaakso/projects/sbcl-sse/OPTIMIZATIONS" - "OPTIMIZATIONS" - 'fundamental-mode - nil - 161 - '(nil nil) - nil - nil - nil) - -(desktop-create-buffer 205 - "/home/rlaakso/projects/sbcl-sse/sse.lisp" - "sse.lisp" - 'lisp-mode - '(slime-mode) - 1630 - '(nil nil) - nil - nil - nil) - -(desktop-create-buffer 205 - "/home/rlaakso/projects/sbcl-sse/cpuid.lisp" - "cpuid.lisp" - 'lisp-mode - '(slime-mode) - 68 - '(1 nil) - nil - nil - nil) - -(desktop-create-buffer 205 - "/home/rlaakso/.emacs" - ".emacs" - 'emacs-lisp-mode - nil - 848 - '(nil nil) - nil - nil - nil) - -(desktop-create-buffer 205 - "/home/rlaakso/projects/sb-simd-tmp/sb-simd/scratch/cpuid.lisp" - "cpuid.lisp<2>" - 'lisp-mode - '(slime-mode) - 1442 - '(nil nil) - nil - nil - nil) - diff --git a/scratch/README b/scratch/README deleted file mode 100644 index caad35e..0000000 --- a/scratch/README +++ /dev/null @@ -1,2 +0,0 @@ -scratch space. do not look ;-) - diff --git a/scratch/asm-t1.asm b/scratch/asm-t1.asm deleted file mode 100644 index 599e6a6..0000000 --- a/scratch/asm-t1.asm +++ /dev/null @@ -1,10 +0,0 @@ - -my_func: - xor eax, eax - movups xmm0, [edx+ebx+1] - movups xmm1, [esi+ecx+1] - addps xmm0, xmm4 - addps xmm4, xmm0 - movups [ecx+1], xmm0 - - ret \ No newline at end of file diff --git a/scratch/foo.lisp b/scratch/foo.lisp deleted file mode 100644 index 0f54018..0000000 --- a/scratch/foo.lisp +++ /dev/null @@ -1,159 +0,0 @@ -(in-package :sb-vm) - -(define-vop (my-vop) - (:policy :fast-safe) - - (:args (vector1 :scs (descriptor-reg)) - (vector2 :scs (descriptor-reg))) - (:arg-types simple-array-single-float simple-array-single-float) - - (:temporary (:sc unsigned-reg) index) - -;; (:temporary (:sc unsigned-reg) temp1) -;; (:temporary (:sc unsigned-reg) temp2) - - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - - (:generator 10 - - (inst xor index index) - - - (inst movups sse-temp1 - (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst movups sse-temp2 - (make-ea :dword :base vector2 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - - (inst addps sse-temp1 sse-temp2) - -;; (inst add index 4) - - (inst movups - (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) - sse-temp1) -#| - (inst add index 4) - (inst mov - (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) - index) - - (inst add index 4) - (inst movups - (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) - sse-temp2) -|# - )) - -#| -00000000 : - 0: 31 c0 xor %eax,%eax 2: 0f 10 04 c6 movups (%esi,%eax,8),%xmm0 - 6: 0f 10 0c c7 movups (%edi,%eax,8),%xmm1 - a: 0f 58 c1 addps %xmm1,%xmm0 - d: 0f 11 44 c5 00 movups %xmm0,0x0(%ebp,%eax,8) - ---- v2: - 0: 31 c0 xor %eax,%eax - 2: 0f 10 44 03 01 movups 0x1(%ebx,%eax,1),%xmm0 - 7: 0f 10 4c 01 01 movups 0x1(%ecx,%eax,1),%xmm1 - c: 0f 58 c1 addps %xmm1,%xmm0 - f: 0f 11 44 01 01 movups %xmm0,0x1(%ecx,%eax,1) - ---- v3: - 2: 0f 10 43 01 movups 0x1(%ebx),%xmm0 - 6: 0f 10 49 01 movups 0x1(%ecx),%xmm1 - a: 0f 58 c1 addps %xmm1,%xmm0 - d: 0f 11 41 01 movups %xmm0,0x1(%ecx) - ---- v4: - 2: 0f 10 44 1a 01 movups 0x1(%edx,%ebx,1),%xmm0 - 7: 0f 10 4c 0e 01 movups 0x1(%esi,%ecx,1),%xmm1 - c: 0f 58 c1 addps %xmm1,%xmm0 - f: 0f 11 41 01 movups %xmm0,0x1(%ecx) - -10h = MOVUPS Vps, Wps -11h = MOVUPS Wps, Vps - -V = 128bit xmm reg specified by the modrm reg field. -W = 128bit xmm register or mem op specified by the modrm byte. -ps = 128bit single-precision float operand - -movups xmm0, [ebx + 01] -movups md reg r/m sc idx bse disp8 -0f 10 01 000 100 00 000 011 01 - +d8 xm0 sib *0 +0 ebx +01 -|# -#| -; 43E: L4: 31C0 XOR EAX, EAX - - 7 6 5 4 3 2 1 0 - m d r e g r / m - -44h = b 0 1 0 0 0 1 0 0 -4Ch = b 0 1 0 0 1 1 0 0 -64h = b 0 1 1 0 0 1 0 0 -E0h = b 1 1 1 0 0 0 0 0 -C1h = b 1 1 0 0 0 0 0 1 -43h = b 0 1 0 0 0 0 1 1 -49h = b 0 1 0 0 1 0 0 1 - -r/m b100 => has sib byte - -modrm md+r/m field: - r/m= 000, 001, 010, 011, 100, 101, 110 , 111 -md -00 = ax, cx, dx, bx, sib, rip+d32, si, di -01 = --||-- + disp8 , bp+disp8, .. -10 = --||-- + disp32 -11 = al/ax/eax/mmx0/xmm0, 1, 2, 3, 4, 5, 6, 7 - -modrm reg: - 000 001 010 011 100 101 110 111 -reg32 eax ecx edx ebx esp ebp esi edi -xmm xm0 xm1 xm2 xm3 xm4 xm5 xm6 xm7 ;; actually xmm0..xmm7 - - -44h = md 01, r/m 100, reg 000, => xmm0, [sib + disp8] => 44 03 01 : xmm0, [ebx + 01], 44 01 01 : xmm0, [ecx + 01] -64h = md 01, r/m 100, reg 100, => xmm4, [sib + disp8] => xmm4, [ebx + 01] -04 C6 = md 00, r/m 100, reg 000, => xmm0, [sib] => xmm0, [esi*8] -4C 01 01 = md 01, reg 001, r/m 100 => xmm1, [sib + disp8] => [ecx + 01] -43h = md 01, reg 0, r/m 011 => xmm0, [ebx + 01] -49h = md 01, reg 1, r/m 001 => xmm1, [ecx + 01] - - - 7 6 5 4 3 2 1 0 - s c i d x b a s - -03h = b 0 0 0 0 0 0 1 1 = eax + ebx*1 -01h = b 0 0 0 0 0 0 0 1 = eax + ecx*1 -C6h = b 1 1 0 0 0 1 1 0 = eax + esi*8 -1Ah = b 0 0 0 1 1 0 1 0 = ebx + edx*1 -0Eh = b 0 0 0 0 1 1 1 0 = ecx + esi*1 - - -;; movups xmm0, ea 0F 10 44 03 01 -; 440: 0F 10 44 01 01 movups xmm0, [eax + ecx + 01] - -;; movups xmm1, ea 0F 10 4C 01 01 -; 445: 0F 10 64 03 01 movups xmm4, [eax + ebx + 01] - -;; addps xmm0, xmm1 0F 58 C1 -; 44A: 0F 58 E0 addps xmm0, xmm4 - -;; movups ea, xmm0 0f 11 44 01 01 - 0F 11 44 01 01 movups [eax + ecx + 01], xmm0 - -; 452: 83C004 ADD EAX, 4 - - --- - - c: 0f 58 c4 addps %xmm4,%xmm0 - f: 0f 58 e0 addps %xmm0,%xmm4 - -|# diff --git a/scratch/sse.lisp b/scratch/sse.lisp deleted file mode 100644 index db99a8a..0000000 --- a/scratch/sse.lisp +++ /dev/null @@ -1,125 +0,0 @@ -;;; From sb-devel post by Christophe Rhodes -(in-package :cl-user) - -(defun %vector+ (result vector1 vector2) - (loop for x across vector1 - for y across vector2 - for i from 0 - do (setf (aref result i) (+ x y)))) - -(in-package :sb-c) - -;; kludge -(ignore-errors (defknown cl-user::%vector+ (vector vector vector) vector)) - -(in-package :sb-vm) - -(pushnew :sse2 *backend-subfeatures*) - -(define-vop (vector+/simple-array-signed-byte-30) - (:translate cl-user::%vector+) - (:policy :fast) - (:args (result :scs (descriptor-reg)) - (vector1 :scs (descriptor-reg)) - (vector2 :scs (descriptor-reg))) - (:arg-types simple-array-signed-byte-30 simple-array-signed-byte-30 simple-array-signed-byte-30) - (:temporary (:sc any-reg) temp) - (:temporary (:sc unsigned-reg) index) - (:temporary (:sc unsigned-reg) length) - (:generator 30 - (let ((top (gen-label)) - (end (gen-label))) - (loadw length result vector-length-slot other-pointer-lowtag) - ;; check that the result vector doesn't have length 0 - (inst cmp length 0) - (inst jmp :e end) - ;; zero the index - (inst xor index index) - (emit-label top) - ;; this is wasteful if one of the arguments is the same as the - ;; result; we can save a mov - (inst mov temp (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst add temp (make-ea :dword :base vector2 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst mov (make-ea :dword :base result :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) - (inst add index (fixnumize 1)) - (inst cmp index length) - (inst jmp :ne top) - (emit-label end)))) - -(define-vop (vector+/simple-array-signed-byte-30-sse) - (:translate cl-user::%vector+) - (:policy :fast) - (:args (result :scs (descriptor-reg)) - (vector1 :scs (descriptor-reg)) - (vector2 :scs (descriptor-reg))) - (:arg-types simple-array-signed-byte-30 simple-array-signed-byte-30 simple-array-signed-byte-30) - (:temporary (:sc any-reg) temp) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) - (:temporary (:sc unsigned-reg) index) - (:temporary (:sc unsigned-reg) length) - (:guard (member :sse2 *backend-subfeatures*)) - (:generator 25 - (let ((top (gen-label)) - (two (gen-label)) - (four (gen-label)) - (end (gen-label))) - (loadw length result vector-length-slot other-pointer-lowtag) - ;; check that the result vector doesn't have length 0 - (inst cmp length 0) - (inst jmp :e end) - ;; zero the index - (inst xor index index) - (emit-label top) - (inst test length 1) - (inst jmp :z two) - (inst mov temp (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst add temp (make-ea :dword :base vector2 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst mov (make-ea :dword :base result :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) - (inst add index (fixnumize 1)) - (inst cmp index length) - (inst jmp :e end) - (emit-label two) - ;; eventually at this point we put in a quadword add, but that - ;; would be one more instruction to write. - (inst test length 2) - (inst mov temp (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst add temp (make-ea :dword :base vector2 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst mov (make-ea :dword :base result :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) - (inst add index (fixnumize 1)) - (inst cmp index length) - (inst jmp :e end) - (inst mov temp (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst add temp (make-ea :dword :base vector2 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst mov (make-ea :dword :base result :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)) temp) - (inst add index (fixnumize 1)) - (inst cmp index length) - (inst jmp :e end) - (emit-label four) - ;; here, we do double quadword additions until we hit the end of - ;; the computation. No guarantees about alignment, so we have to - ;; use movdqu. - (inst movdqu sse-temp1 (make-ea :dword :base vector1 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - ;; KLUDGE: We're using :dword EAs here. This is possibly non-optimal. - (inst movdqu sse-temp2 (make-ea :dword :base vector2 :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst paddd sse-temp1 sse-temp2) - (inst movdqu sse-temp1 (make-ea :dword :base result :index index - :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (inst add index (fixnumize 4)) - (inst cmp index length) - (inst jmp :ne four) - (emit-label end)))) diff --git a/scratch/sse2.lisp b/scratch/sse2.lisp deleted file mode 100644 index 78b95ac..0000000 --- a/scratch/sse2.lisp +++ /dev/null @@ -1,33 +0,0 @@ -;;; From sb-devel post by Christophe Rhodes -(in-package :cl-user) - -(declaim (inline fixnum/vector+)) -(defun fixnum/vector+ (vector1 vector2) - (let ((result (make-array (length vector1) :element-type 'fixnum))) - (dotimes (i 1000000) - (declare (fixnum i)) - (%vector+ result vector1 vector2)) - result)) - -(defun foo () - (declare (optimize (speed 3) (safety 0))) - (let ((x (make-array 1000 :element-type 'fixnum - :initial-contents (loop for x fixnum from 0 to 999 collect x))) - (y (make-array 1000 :element-type 'fixnum - :initial-contents (loop for x fixnum from 0 to 999 collect x)))) - (fixnum/vector+ x y))) - -(defun bar () - (declare (optimize (speed 3) (safety 0))) - (let ((x (make-array 1000 :element-type 'fixnum - :initial-contents (loop for x fixnum from 0 to 999 collect x))) - (y (make-array 1000 :element-type 'fixnum - :initial-contents (loop for x fixnum from 0 to 999 collect x)))) - (let ((result (make-array 1000 :element-type 'fixnum))) - (dotimes (j 1000000) - (declare (fixnum j)) - (loop for tx across x - for ty across y - for i fixnum upfrom 0 - do (setf (aref result i) (+ tx ty)))) - result))) -- 2.11.4.GIT From c62d652c91997d112d2a6fff053ba86949ad90c6 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Mon, 8 Aug 2005 17:26:08 +0000 Subject: [PATCH 08/16] .. --- example-test.lisp | 52 ++++++++++++++++++++++++++++++++++ generate-sse-vops.lisp | 75 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 117 insertions(+), 10 deletions(-) diff --git a/example-test.lisp b/example-test.lisp index 6be574d..32ddd56 100644 --- a/example-test.lisp +++ b/example-test.lisp @@ -39,3 +39,55 @@ (format t "After: ~S~%~S~%" arr1 arr2) )) + +(defparameter +sse-highbit-single-float-mask+ (make-array 16 :element-type '(unsigned-byte 8) + :initial-contents '(0 0 0 128 + 0 0 0 128 + 0 0 0 128 + 0 0 0 128))) +(defparameter +sse-lowbits-single-float-mask+ (make-array 16 :element-type '(unsigned-byte 8) + :initial-contents '(255 255 255 127 + 255 255 255 127 + 255 255 255 127 + 255 255 255 127))) + +(defun sign (float-array) + (let ((res (make-array 16 :element-type '(unsigned-byte 8) :initial-element 0))) + + (sb-sys:%primitive sb-vm::%SSE-AND/SIMPLE-ARRAY-SINGLE-FLOAT/SIMPLE-ARRAY-UNSIGNED-BYTE-8-1 + res + float-array + +sse-highbit-single-float-mask+ + 0) + (values-list (mapcar #'(lambda (x) (/= x 0)) (list (aref res 3) (aref res 7) (aref res 11) (aref res 15)))))) + +(defun %neg (float-array) + (let ((res (make-array 4 :element-type 'single-float :initial-element 0f0))) + + (sb-sys:%primitive sb-vm::%SSE-XOR/SIMPLE-ARRAY-SINGLE-FLOAT/SIMPLE-ARRAY-UNSIGNED-BYTE-8-1 + res + float-array + +sse-highbit-single-float-mask+ + 0) + res)) + +(defun %abs (float-array) + (let ((res (make-array 4 :element-type 'single-float :initial-element 0f0))) + + (sb-sys:%primitive sb-vm::%SSE-AND/SIMPLE-ARRAY-SINGLE-FLOAT/SIMPLE-ARRAY-UNSIGNED-BYTE-8-1 + res + float-array + +sse-lowbits-single-float-mask+ + 0) + res)) + +(defun test-sign () + (let ((arr1 (make-array 10 :element-type 'single-float :initial-element 0f0))) + (loop for i from 0 below 10 do (setf (aref arr1 i) + (float (* (expt -1 i) (- (* (1+ i) 10) (* 2 i i)))))) + (format t "array: ~S~%" arr1) + (multiple-value-bind (s1 s2 s3 s4) (sign arr1) + (format t "sign0->3: ~A ~A ~A ~A~%" s1 s2 s3 s4)) + (format t "neg: ~S~%" (%neg arr1)) + (format t "abs: ~S~%" (%abs arr1)) + t)) \ No newline at end of file diff --git a/generate-sse-vops.lisp b/generate-sse-vops.lisp index e091688..eb1a36e 100644 --- a/generate-sse-vops.lisp +++ b/generate-sse-vops.lisp @@ -25,8 +25,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |# -(defun vect-ea (vect idx) - `(make-ea :dword :base ,vect :index ,idx +(defun vect-ea (vect &optional (idx nil)) + `(make-ea :dword :base ,vect ,@(if idx `(:index ,idx)) :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) (defun gen-vops-to-file (filename) @@ -43,32 +43,32 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;; single float (add single-float movups addps 4) (addsub single-float movups addsubps 4) - (andnot single-float movups andnps 4) - (and single-float movups andps 4) +;; (andnot single-float movups andnps 4) +;; (and single-float movups andps 4) (div single-float movups divps 4) (hadd single-float movups haddps 4) (hsub single-float movups hsubps 4) (max single-float movups maxps 4) (min single-float movups minps 4) (mul single-float movups mulps 4) - (or single-float movups orps 4) +;; (or single-float movups orps 4) (sub single-float movups subps 4) - (xor single-float movups xorps 4) +;; (xor single-float movups xorps 4) ;; double float (add double-float movupd addpd 8) (addsub double-float movupd addsubpd 8) - (andnot double-float movupd andnpd 8) - (and double-float movupd andpd 8) +;; (andnot double-float movupd andnpd 8) +;; (and double-float movupd andpd 8) (div double-float movupd divpd 8) (hadd double-float movupd haddpd 8) (hsub double-float movupd hsubpd 8) (max double-float movupd maxpd 8) (min double-float movupd minpd 8) (mul double-float movupd mulpd 8) - (or double-float movupd orpd 8) +;; (or double-float movupd orpd 8) (sub double-float movupd subpd 8) - (xor double-float movupd xorpd 8) +;; (xor double-float movupd xorpd 8) ;; unsigned byte 8 (add unsigned-byte-8 movdqu paddb 1) @@ -151,6 +151,61 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp1) )))) + ;; TWO-ARG SSE VOPs w/ DIFFERENT ARG TYPES + (loop for (op-name type1 type2 mov-inst1 mov-inst2 op-inst elem-width) in + '( + (andnot single-float unsigned-byte-8 movups movdqu andnps 4) + (and single-float unsigned-byte-8 movups movdqu andps 4) + (or single-float unsigned-byte-8 movups movdqu orps 4) + (xor single-float unsigned-byte-8 movups movdqu xorps 4) + + (andnot double-float unsigned-byte-8 movupd movdqu andnpd 4) + (and double-float unsigned-byte-8 movupd movdqu andpd 4) + (or double-float unsigned-byte-8 movupd movdqu orpd 4) + (xor double-float unsigned-byte-8 movupd movdqu xorpd 4) + ) + do + (format stream "~S~%~%" + `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A/SIMPLE-ARRAY-~A-1" op-name type1 type2))) + (format t "; defining VOP ~A..~%" name) + name))) + + (:policy :fast-safe) + + ;;(:guard (member :sse2 *backend-subfeatures*)) + + (:args + (result :scs (descriptor-reg)) + (vect1 :scs (descriptor-reg)) + (vect2 :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + + (:arg-types + ,(intern (format nil "SIMPLE-ARRAY-~A" type2)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type1)) + ,(intern (format nil "SIMPLE-ARRAY-~A" type2)) + fixnum) + + (:temporary (:sc sse-reg) sse-temp1) + (:temporary (:sc sse-reg) sse-temp2) + + (:generator 10 + + ;; scale index by 4 (size-of single-float) + (inst shl index ,(floor (log elem-width 2))) + + ;; load + (inst ,mov-inst1 sse-temp1 ,(vect-ea 'vect1 'index)) + (inst ,mov-inst2 sse-temp2 ,(vect-ea 'vect2)) + + ;; operate + (inst ,op-inst sse-temp1 sse-temp2) + + ;; store + (inst ,mov-inst2 ,(vect-ea 'result 'index) sse-temp1) + )))) + + ;; SINGLE-ARG SSE VOPs (loop for (op-name type mov-inst op-inst elem-width) in '( -- 2.11.4.GIT From 816a067ef3caab29eda189e48061f26254a704f0 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Tue, 9 Aug 2005 09:45:36 +0000 Subject: [PATCH 09/16] .. --- load.lisp | 10 ++++- sse-matrix.lisp | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ test-matrix.lisp | 83 +++++++++++++++++++++++++++++++++++ timing.lisp | 49 +++++++++++++++++++++ 4 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 sse-matrix.lisp create mode 100644 test-matrix.lisp create mode 100644 timing.lisp diff --git a/load.lisp b/load.lisp index d85b1d2..56dd962 100644 --- a/load.lisp +++ b/load.lisp @@ -1,6 +1,14 @@ -(if t +(if nil (progn (load (compile-file "sse-vops.lisp")) (load (compile-file "example-test.lisp")) )) + +(if t + (progn + (load (compile-file "sse-matrix.lisp")) + (load (compile-file "timing.lisp")) + (load (compile-file "test-matrix.lisp")) + )) + \ No newline at end of file diff --git a/sse-matrix.lisp b/sse-matrix.lisp new file mode 100644 index 0000000..c38deb9 --- /dev/null +++ b/sse-matrix.lisp @@ -0,0 +1,129 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +#| + +http://developer.intel.com/design/pentiumiii/sml/24504501.pdf + +|# +(in-package :sb-vm) + +(defmacro vect-ea (base &optional idx) + (let ((disp + (if (and idx (numberp idx)) + `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx) + `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)))) + +;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx))) + (if (and idx (symbolp idx)) + `(make-ea :dword :base ,base :index ,idx :disp ,disp) + `(make-ea :dword :base ,base :disp ,disp)))) + +(DEFINE-VOP (%sse-matrix-mul-3x3/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (RESULT :SCS (DESCRIPTOR-REG)) + (MAT1 :SCS (DESCRIPTOR-REG)) + (MAT2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC SSE-REG) X0) + (:TEMPORARY (:SC SSE-REG) X1) + (:TEMPORARY (:SC SSE-REG) X2) + (:TEMPORARY (:SC SSE-REG) X3) + (:TEMPORARY (:SC SSE-REG) X4) + (:TEMPORARY (:SC SSE-REG) X5) + (:TEMPORARY (:SC SSE-REG) X6) + (:TEMPORARY (:SC SSE-REG) X7) + + (:GENERATOR 10 + (inst movss x2 (vect-ea mat2 32)) + (inst movhps x2 (vect-ea mat2 24)) + + (inst movss x3 (vect-ea mat1)) + (inst movss x4 (vect-ea mat1 4)) + + (inst movss x0 (vect-ea mat2)) + (inst movhps x0 (vect-ea mat2 4)) + (inst shufps x2 x2 #X36) + (inst shufps x3 x3 0) + + (inst movss x1 (vect-ea mat2 12)) + (inst movhps x1 (vect-ea mat2 16)) + + (inst shufps x4 x4 0) + (inst mulps x3 x0) + (inst movss x5 (vect-ea mat1 8)) + (inst movss x6 (vect-ea mat1 12)) + (inst mulps x4 x1) + (inst shufps x5 x5 0) + (inst mulps x5 x2) + (inst shufps x6 x6 0) + (inst mulps x6 x0) + (inst addps x3 x4) + + (inst movss x7 (vect-ea mat1 16)) + (inst movss x4 (vect-ea mat1 28)) + + (inst shufps x7 x7 0) + (inst addps x3 x5) + (inst mulps x7 x1) + + (inst shufps x4 x4 0) + + (inst movss x5 (vect-ea mat1 20)) + (inst shufps x5 x5 0) + (inst mulps x4 x1) + + (inst mulps x5 x2) + (inst addps x6 x7) + + (inst movss x1 (vect-ea mat1 24)) + + (inst movss (Vect-ea result) x3) + (inst movhpd (vect-ea result 4) x3) + + (inst addps x6 x5) + (inst shufps x1 x1 0) + + (inst movss x5 (vect-ea mat1 32)) + (inst mulps x1 x0) + (inst shufps x5 x5 0) + + (inst movss (vect-ea result 12) x6) + (inst mulps x5 x2) + (inst addps x1 x4) + (inst movhps (vect-ea result 16) x6) + (inst addps x1 x5) + (inst shufps x1 x1 #x8F) + + (inst movhps (vect-ea result 24) x1) + (inst movss (vect-ea result 32) x1) + )) + + + diff --git a/test-matrix.lisp b/test-matrix.lisp new file mode 100644 index 0000000..c404ccb --- /dev/null +++ b/test-matrix.lisp @@ -0,0 +1,83 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :cl-user) + +(declaim (optimize (speed 3) (space 0) (safety 0) (debug 0))) + + +(defun test-matrix (&optional (test-count 10000000)) + (let ((mat1 (make-array 9 :element-type 'single-float :initial-element 0f0)) + (mat2 (make-array 9 :element-type 'single-float :initial-element 0f0)) + (naive #()) + (sse #()) + ) + (declare (type (simple-vector) naive sse) (type fixnum test-count)) + + (loop for i of-type fixnum from 0 below 9 do (setf (aref mat1 i) (float (random 1f6)) + (aref mat2 i) (float (random 1f6)))) + + (format t "Data: ~S~%~S~%" mat1 mat2) + + (setf naive (naive-mul33 mat1 mat2) + sse (sse-mul33 mat1 mat2)) + + (format t "naive mul: ~S~%" naive) + (format t "sse mul: ~S~%" sse) + (format t "EQUALP? ~A~%" (loop for equal = t + for n of-type single-float across naive + for s of-type single-float across sse + when (/= n s) do (setq equal nil) + finally (return equal))) + (format t "naive, ~D ops ~%" test-count) + (time-sample-form #'(lambda () + (dotimes (i test-count) + (setf naive (naive-mul33 mat1 mat2))))) + + (format t "sse, ~D ops ~%" test-count) + (time-sample-form #'(lambda () + (dotimes (i test-count) + (setf sse (sse-mul33 mat1 mat2))))) + + )) + +(defun sse-mul33 (mat1 mat2) + (let ((res (make-array 9 :element-type 'single-float :initial-element 0f0))) + (declare (type (simple-array single-float (9)) mat1 mat2 res)) + (sb-sys:%primitive sb-vm::%sse-matrix-mul-3x3/single-float res mat1 mat2) + res)) + +(defun naive-mul33 (mat1 mat2) + (let ((res (make-array 9 :element-type 'single-float :initial-element 0f0))) + (declare (type (simple-array single-float (9)) mat1 mat2 res)) + (loop for row of-type fixnum from 0 to 2 do + (loop for col of-type fixnum from 0 to 2 do + (loop for elt of-type fixnum from 0 to 2 do + (incf (aref res (+ (* row 3) col)) + (* (aref mat1 (+ (* row 3) elt)) (aref mat2 (+ (* elt 3) col))))))) + res)) + + diff --git a/timing.lisp b/timing.lisp new file mode 100644 index 0000000..1316633 --- /dev/null +++ b/timing.lisp @@ -0,0 +1,49 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :cl-user) + +(defun time-sample-form (form &optional &key (samples 10)) + (let (start end times) + (dotimes (i samples) + (setq start (get-internal-real-time)) + (funcall form) + (setq end (get-internal-real-time)) + (push (- end start) times)) + + (flet ((calc-avg (list) (float (/ (apply #'+ list) (length list)))) + (sq (x) (* x x))) + + (let* ((avg (calc-avg times)) + (sq-times (mapcar #'sq times)) + (stddev (sqrt (- (calc-avg sq-times) (sq (calc-avg times))))) + ) +;; (format t "; times ~S, sqtimes ~S~%" times sq-times) + (format t "; ~D samples, avg ~5F sec, stddev ~5F sec~%" + samples + (/ avg internal-time-units-per-second) + (/ stddev internal-time-units-per-second)))))) + -- 2.11.4.GIT From be51dd71f74fcf71e7307719de0bb7b48ab6df74 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Tue, 9 Aug 2005 17:18:15 +0000 Subject: [PATCH 10/16] *** empty log message *** --- detect-simd.lisp | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ load.lisp | 7 +++++- sb-simd.asd | 16 ++++++++++++++ 3 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 detect-simd.lisp create mode 100644 sb-simd.asd diff --git a/detect-simd.lisp b/detect-simd.lisp new file mode 100644 index 0000000..cb4bd58 --- /dev/null +++ b/detect-simd.lisp @@ -0,0 +1,67 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :sb-vm) + +;; simpler version + +(define-vop (%detect-simd/x86) + (:policy :fast) + + (:results (res :scs (descriptor-reg))) + + (:temporary (:sc unsigned-reg :offset eax-offset :from (:argument 0)) eax) + (:temporary (:sc unsigned-reg :offset ebx-offset) ebx) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) + (:temporary (:sc unsigned-reg :offset edx-offset) edx) + + (:generator 10 + (inst mov eax 1) + (inst cpuid) + (inst mov ebx edx) + + ;; sse3 + (inst and ecx 1) + (inst shl ecx 2) + + ;; sse2/3 + (inst shr edx 25) + (inst and edx #b11) + (inst or edx ecx) + + ;; fixnumize + (inst shl edx 2) + + (inst mov res edx))) + + +(eval-when (:load-toplevel) + (let ((res (sb-sys:%primitive sb-vm::%detect-simd/x86))) +;; (format t "res is ~A~%" res) + (if (/= (logand res #b001) 0) (pushnew :sse sb-vm::*backend-subfeatures*)) + (if (/= (logand res #b010) 0) (pushnew :sse2 sb-vm::*backend-subfeatures*)) + (if (/= (logand res #b100) 0) (pushnew :sse3 sb-vm::*backend-subfeatures*)))) + \ No newline at end of file diff --git a/load.lisp b/load.lisp index 56dd962..d17b614 100644 --- a/load.lisp +++ b/load.lisp @@ -4,11 +4,16 @@ (load (compile-file "example-test.lisp")) )) -(if t +(if nil (progn (load (compile-file "sse-matrix.lisp")) (load (compile-file "timing.lisp")) (load (compile-file "test-matrix.lisp")) )) +(if t + (progn + (load (compile-file "detect-simd.lisp")) + (load (compile-file "push-simd-features.lisp")) + )) \ No newline at end of file diff --git a/sb-simd.asd b/sb-simd.asd new file mode 100644 index 0000000..3707c3d --- /dev/null +++ b/sb-simd.asd @@ -0,0 +1,16 @@ +;;; -*- Lisp -*- + +(in-package #:asdf) + +(defsystem sb-simd + :version "cvs" + :components ((:file "detect-simd") + (:file "sse-vops" :depends-on ("detect-simd")) + (:file "sse-matrix" :depends-on ("sse-vops"))) + ) + +(defmethod perform :after ((o load-op (c (eql (find-system :sb-simd))))) + (provide 'sb-simd)) + + + -- 2.11.4.GIT From 20585b1158ba6f82925157f5413ea07ff98cac6d Mon Sep 17 00:00:00 2001 From: rlaakso Date: Fri, 12 Aug 2005 11:55:38 +0000 Subject: [PATCH 11/16] *** empty log message *** --- TODO | 19 ----- VOP.txt | 10 +++ detect-simd.lisp | 8 --- generate-sse-instructions.lisp | 38 ++++++---- load.lisp | 4 +- sbcl-src/src/compiler/x86/insts.lisp | 134 ++++++++++++++++++++++++++++------ sbcl-src/src/compiler/x86/vm.lisp | 30 ++++---- sse-seq.lisp | 136 +++++++++++++++++++++++++++++++++++ test-seq.lisp | 64 +++++++++++++++++ 9 files changed, 367 insertions(+), 76 deletions(-) create mode 100644 VOP.txt create mode 100644 sse-seq.lisp create mode 100644 test-seq.lisp diff --git a/TODO b/TODO index 0260311..8b13789 100644 --- a/TODO +++ b/TODO @@ -1,20 +1 @@ -operations from AMD manual: - - -sign: - -andps xmm0, #x8000 0000 8000 0000 .. - -neg: - -xorps xmm0, #x8000 0000 8000 0000 .. - -abs: - -andps xmm0, #x7FFF FFFF 7FFF FFFF .. - -clear: - -xorps xmm0, xmm0 - diff --git a/VOP.txt b/VOP.txt new file mode 100644 index 0000000..d950d13 --- /dev/null +++ b/VOP.txt @@ -0,0 +1,10 @@ + +(loadw length seq1 vector-length-slot other-pointer-lowtag) +=> length will be length of seq1. This is fixnum, so (inst shr length 2) to get real length + +(psrldq xmm0 32) +=> this doesn't work ? because immediate is number of bytes, not bits (like shr). + +(inst mov result 1) +=> page fault ? results should be fixnumized, (inst shl result 2) + diff --git a/detect-simd.lisp b/detect-simd.lisp index cb4bd58..92161d7 100644 --- a/detect-simd.lisp +++ b/detect-simd.lisp @@ -57,11 +57,3 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (inst mov res edx))) - -(eval-when (:load-toplevel) - (let ((res (sb-sys:%primitive sb-vm::%detect-simd/x86))) -;; (format t "res is ~A~%" res) - (if (/= (logand res #b001) 0) (pushnew :sse sb-vm::*backend-subfeatures*)) - (if (/= (logand res #b010) 0) (pushnew :sse2 sb-vm::*backend-subfeatures*)) - (if (/= (logand res #b100) 0) (pushnew :sse3 sb-vm::*backend-subfeatures*)))) - \ No newline at end of file diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index 5de9881..2bea781 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -42,18 +42,6 @@ MOVDQ2Q . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . MOVQ2DQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 208 -(ib-forms:) -PSLLD. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 323 -PSLLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 326 -PSLLQ. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 328 -PSLLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 330 -PSRAD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 333 -PSRAW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 336 -PSRLD . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 339 -PSRLDQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 342 -PSRLQ . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 344 -PSRLW . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 347 - STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 410 @@ -339,7 +327,31 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ,@(emit-ops ops-m2r) (emit-ea segment src (reg-tn-encoding dst))) (t ,@(emit-ops ops-r2m) - (emit-ea segment dst (reg-tn-encoding src))))))))) + (emit-ea segment dst (reg-tn-encoding src)))))))) + + ;; misc + (loop for (name mode . opcodes) in + '( + (pslld-ib 6 #x66 #x0F #x72) + (pslldq-ib 7 #x66 #x0F #x73) + (psllq-ib 6 #x66 #x0F #x73) + (psllw-ib 6 #x66 #x0F #x71) + (psrad-ib 4 #x66 #x0F #x72) + (psraw-ib 4 #x66 #x0F #x71) + (psrld-ib 2 #x66 #x0F #x72) + (psrldq-ib 3 #x66 #x0F #x73) + (psrlq-ib 2 #x66 #x0F #x73) + (psrlw-ib 2 #x66 #x0F #x71) + ) + do + (format stream "~S~%~%" + `(define-instruction ,(intern (symbol-name name)) (segment dst amount) + (:emitter + ,@(emit-ops opcodes) + (emit-ea segment dst ,mode) + (emit-byte segment amount))))) + + ) (defun gen-ops-to-file (filename) (with-open-file (stream filename :direction :output :if-exists :supersede) diff --git a/load.lisp b/load.lisp index d17b614..e71fcf9 100644 --- a/load.lisp +++ b/load.lisp @@ -14,6 +14,8 @@ (if t (progn (load (compile-file "detect-simd.lisp")) - (load (compile-file "push-simd-features.lisp")) + (load (compile-file "timing.lisp")) + (load (compile-file "sse-seq.lisp")) + (load (compile-file "test-seq.lisp")) )) \ No newline at end of file diff --git a/sbcl-src/src/compiler/x86/insts.lisp b/sbcl-src/src/compiler/x86/insts.lisp index b64678c..3acaa01 100644 --- a/sbcl-src/src/compiler/x86/insts.lisp +++ b/sbcl-src/src/compiler/x86/insts.lisp @@ -39,13 +39,16 @@ #(ax cx dx bx sp bp si di)) (defparameter *dword-reg-names* #(eax ecx edx ebx esp ebp esi edi)) +(defparameter *xmmword-reg-names* + #(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) (defun print-reg-with-width (value width stream dstate) (declare (ignore dstate)) (princ (aref (ecase width (:byte *byte-reg-names*) (:word *word-reg-names*) - (:dword *dword-reg-names*)) + (:dword *dword-reg-names*) + (:xmmword *xmmword-reg-names*)) value) stream) ;; XXX plus should do some source-var notes @@ -193,7 +196,7 @@ (:word 16) (:dword 32) (:qword 64) - (:dqword 128) + (:xmmword 128) (:float 32) (:double 64))) @@ -680,7 +683,7 @@ (defstruct (ea (:constructor make-ea (size &key base index scale disp)) (:copier nil)) - (size nil :type (member :byte :word :dword)) + (size nil :type (member :byte :word :dword :xmmword)) (base nil :type (or tn null)) (index nil :type (or tn null)) (scale 1 :type (member 1 2 4 8)) @@ -720,7 +723,7 @@ (ecase (sb-name (sc-sb (tn-sc thing))) (registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) - (sse-registers + (xmm-registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) (stack ;; Convert stack tns into an index off of EBP. @@ -834,9 +837,18 @@ (and (tn-p thing) (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) -(defun sse-register-p (thing) +(defun xmm-register-p (thing) (and (tn-p thing) - (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) + (eq (sb-name (sc-sb (tn-sc thing))) 'xmm-registers) + (member (sc-name (tn-sc thing)) *xmmword-sc-names*) + t)) + +(defun xmm-ea-p (thing) + (typecase thing + (ea (eq (ea-size thing) :xmmword)) + (tn + (and (member (sc-name (tn-sc thing)) *xmmword-sc-names*) t)) + (t nil))) (defun accumulator-p (thing) (and (register-p thing) @@ -867,6 +879,8 @@ :float) (#.*double-sc-names* :double) + (#.*xmmword-sc-names* + :xmmword) (t (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) (ea @@ -3210,7 +3224,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3222,7 +3236,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) @@ -3233,7 +3247,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3245,7 +3259,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3257,7 +3271,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 243) @@ -3269,7 +3283,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3281,7 +3295,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) @@ -3292,7 +3306,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3304,7 +3318,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) @@ -3315,7 +3329,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3327,7 +3341,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 242) @@ -3339,7 +3353,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 243) @@ -3351,7 +3365,7 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 102) @@ -3363,13 +3377,93 @@ (SEGMENT DST SRC) (:EMITTER (COND - ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) (T (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 17) (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) +(DEFINE-INSTRUCTION PSLLD-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 114) + (EMIT-EA SEGMENT DST 6) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSLLDQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 7) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSLLQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 6) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSLLW-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 113) + (EMIT-EA SEGMENT DST 6) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRAD-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 114) + (EMIT-EA SEGMENT DST 4) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRAW-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 113) + (EMIT-EA SEGMENT DST 4) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLD-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 114) + (EMIT-EA SEGMENT DST 2) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLDQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 3) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLQ-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 115) + (EMIT-EA SEGMENT DST 2) + (EMIT-BYTE SEGMENT AMOUNT))) + +(DEFINE-INSTRUCTION PSRLW-IB + (SEGMENT DST AMOUNT) + (:EMITTER (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 113) + (EMIT-EA SEGMENT DST 2) + (EMIT-BYTE SEGMENT AMOUNT))) + ;;; CPUID diff --git a/sbcl-src/src/compiler/x86/vm.lisp b/sbcl-src/src/compiler/x86/vm.lisp index 1bab6a3..d3fde75 100644 --- a/sbcl-src/src/compiler/x86/vm.lisp +++ b/sbcl-src/src/compiler/x86/vm.lisp @@ -22,7 +22,7 @@ (defvar *word-register-names* (make-array 16 :initial-element nil)) (defvar *dword-register-names* (make-array 16 :initial-element nil)) (defvar *float-register-names* (make-array 8 :initial-element nil)) - (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + (defvar *xmmword-register-names* (make-array 8 :initial-element nil))) (macrolet ((defreg (name offset size) (let ((offset-sym (symbolicate name "-OFFSET")) @@ -93,15 +93,15 @@ (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) ;; sse registers - (defreg xmm0 0 :dqword) - (defreg xmm1 1 :dqword) - (defreg xmm2 2 :dqword) - (defreg xmm3 3 :dqword) - (defreg xmm4 4 :dqword) - (defreg xmm5 5 :dqword) - (defreg xmm6 6 :dqword) - (defreg xmm7 7 :dqword) - (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) + (defreg xmm0 0 :xmmword) + (defreg xmm1 1 :xmmword) + (defreg xmm2 2 :xmmword) + (defreg xmm3 3 :xmmword) + (defreg xmm4 4 :xmmword) + (defreg xmm5 5 :xmmword) + (defreg xmm6 6 :xmmword) + (defreg xmm7 7 :xmmword) + (defregset *xmm-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ;; registers used to pass arguments ;; @@ -130,7 +130,7 @@ ;;; the new way: (define-storage-base float-registers :finite :size 8) -(define-storage-base sse-registers :finite :size 8) +(define-storage-base xmm-registers :finite :size 8) (define-storage-base stack :unbounded :size 8) (define-storage-base constant :non-packed) @@ -334,8 +334,8 @@ :save-p t :alternate-scs (complex-long-stack)) - (sse-reg sse-registers - :locations #.*sse-regs*) + (xmm-reg xmm-registers + :locations #.*xmm-regs*) ;; a catch or unwind block (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) @@ -353,7 +353,7 @@ ;;; These are used to (at least) determine operand size. (defparameter *float-sc-names* '(single-reg)) (defparameter *double-sc-names* '(double-reg double-stack)) -(defparameter *dqword-sc-names* '(sse-reg)) +(defparameter *xmmword-sc-names* '(xmm-reg)) ) ; EVAL-WHEN ;;;; miscellaneous TNs for the various registers @@ -461,7 +461,7 @@ ;; FIXME: Shouldn't this be an ERROR? (format nil "" offset sc-name)))) (float-registers (format nil "FR~D" offset)) - (sse-registers (format nil "XMM~D" offset)) + (xmm-registers (format nil "XMM~D" offset)) (stack (format nil "S~D" offset)) (constant (format nil "Const~D" offset)) (immediate-constant "Immed") diff --git a/sse-seq.lisp b/sse-seq.lisp new file mode 100644 index 0000000..aa43257 --- /dev/null +++ b/sse-seq.lisp @@ -0,0 +1,136 @@ +(in-package :sb-vm) + +(defmacro vect-ea (base &optional idx (width :dword)) + (let ((disp + (if (and idx (numberp idx)) + `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx) + `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)))) + +;; (format t "ea ~A ~A ~A~%" base idx (and idx (symbolp idx))) + (if (and idx (symbolp idx)) + `(make-ea ,width :base ,base :index ,idx :disp ,disp) + `(make-ea ,width :base ,base :disp ,disp)))) + + +(DEFINE-VOP (%sse-seq=) + (:POLICY :FAST-SAFE) + (:ARGS (seq1 :SCS (DESCRIPTOR-REG)) + (seq2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES simple-array-unsigned-byte-8 simple-array-unsigned-byte-8 ) + + (:results (RESULT :SCS (DESCRIPTOR-REG))) + + (:result-types fixnum) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + (:TEMPORARY (:SC XMM-REG) X3) + (:TEMPORARY (:SC XMM-REG) X4) + (:TEMPORARY (:SC XMM-REG) X5) + +;; (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx) + + (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index) + (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length) + + (:GENERATOR 10 + + (let ((top (gen-label)) +;; (top2 (gen-label)) + (length-ok (gen-label)) + (fail (gen-label)) + (the-end (gen-label)) + (end (gen-label))) + + (loadw index seq1 vector-length-slot other-pointer-lowtag) + (loadw length seq2 vector-length-slot other-pointer-lowtag) + + ;; same length ? + (inst cmp index length) + (inst jmp :eq length-ok) + + ;; not same length, fail + (inst mov result -1) + (inst jmp end) + + (emit-label length-ok) + + ;; un-fixnumize length + (inst shr length 2) + + ;; calc number of 256bit blocks + (inst shr length (floor (log (/ 256 8) 2))) + + ;; init indices + (inst xor index index) + + ;; zero eq-regs + (inst pxor x4 x4) + (inst pxor x5 x5) + + (emit-label top) + + ;; load first blocks + (inst movdqu x0 (vect-ea seq1 index :xmmword)) + (inst movdqu x1 (vect-ea seq2 index :xmmword)) + + ;; load second blocks + (inst movdqu x2 + (make-ea :xmmword :base seq1 :index index + :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16))) + (inst movdqu x3 + (make-ea :xmmword :base seq2 :index index + :disp (+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) 16))) + + + ;; xor first/second blocks (i.e. if equal, xor will be zero) + (inst pxor x0 x1) + (inst pxor x2 x3) + + ;; add index + (inst add index 32) + + ;; or bits to eq-regs (if not eq, some bits will be nonzero) + (inst por x4 x0) + (inst por x5 x2) + + ;; loop + (inst dec length) + (inst jmp :nz top) + + + ;; all 256bit blocks done + + ;; or each 32bit word from x4 to x5 + (inst por x4 x5) + (inst movdqa x0 x4) + + (inst psrldq-ib x4 4) ;; this is number of bytes, not bits + (inst por x0 x4) + + (inst psrldq-ib x4 4) + (inst por x0 x4) + + (inst psrldq-ib x4 4) + (inst por x0 x4) + + ;; now low 32bits of x0 will be non-zero if seq's not equal + + (inst movd result x0) + + ;; end + (emit-label end) + + (inst test result result) + (inst jmp :nz fail) + + (inst mov result (fixnumize 0)) + (inst jmp the-end) + + (emit-label fail) + (inst mov result (fixnumize 1)) + + (emit-label the-end) + + ))) diff --git a/test-seq.lisp b/test-seq.lisp new file mode 100644 index 0000000..3f15be2 --- /dev/null +++ b/test-seq.lisp @@ -0,0 +1,64 @@ +(in-package :cl-user) + +(declaim (optimize (speed 3) (safety 0) (space 0) (debug 0))) + +(defun sse-seq= (seq1 seq2) + (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) + (multiple-value-bind (256blocks rest) (truncate (length seq1) (floor (log (/ 256 8) 2))) + (declare (ignore rest)) + (and (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0) + (loop for equal = t + for i from (* 256blocks 32) below (length seq1) + when (/= (aref seq1 i) (aref seq2 i)) do (setq equal nil) + finally (return equal))))) + +(defun seq= (seq1 seq2) + (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) + (and (= (length seq1) (length seq2)) + (loop for equal = t + for s1 of-type unsigned-byte across seq1 + for s2 of-type unsigned-byte across seq2 + when (/= s1 s2) do (setq equal nil) (return nil) + finally (return equal)))) + + +(defun test-seq (&optional (test-count 100000)) + (let ((arr1 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr2 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr3 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr4 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) + res) + + (loop for i from 0 below (length arr1) + do (setf (aref arr1 i) (mod (* (1+ i) 10) 256) + (aref arr2 i) (aref arr1 i) + (aref arr3 i) (aref arr1 i) + (aref arr4 i) (aref arr1 i) + )) + + (setf (aref arr3 1200) (mod (1+ (aref arr3 1200)) 256) + (aref arr4 256000) (mod (1+ (aref arr4 256000)) 256)) + +;; (time (dotimes (i 100000) (sse-seq= arr1 arr2))) +;; (time (dotimes (i #.(/ 100000 30)) (seq= arr1 arr2))) + + (format t "; seq= a1 a2~%") + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr2))))) + + (format t "; seq= a1 a3~%") + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr3))))) + + (format t "; seq= a2 a4~%") + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr2 arr4))))) + + + (format t "; sse-seq= a1 a2~%") + (time-sample-form #'(lambda () (dotimes (i test-count) (setf res (sse-seq= arr1 arr2))))) + + (format t "; sse-seq= a1 a3~%") + (time-sample-form #'(lambda () (dotimes (i test-count) (setf res (sse-seq= arr1 arr3))))) + + (format t "; sse-seq= a2 a4~%") + (time-sample-form #'(lambda () (dotimes (i test-count) (setf res (sse-seq= arr2 arr4))))) + + )) -- 2.11.4.GIT From e3bd32ef9364fddd4afebd711aa527366a00882b Mon Sep 17 00:00:00 2001 From: rlaakso Date: Fri, 12 Aug 2005 14:09:53 +0000 Subject: [PATCH 12/16] *** empty log message *** --- expand-parse-operand-temp-count.lisp | 8 ++++ load.lisp | 1 + push-simd-features.lisp | 9 +++++ sse-seq.lisp | 72 +++++++++++++++++++++++------------- test-seq.lisp | 27 +++++--------- 5 files changed, 75 insertions(+), 42 deletions(-) create mode 100644 expand-parse-operand-temp-count.lisp create mode 100644 push-simd-features.lisp diff --git a/expand-parse-operand-temp-count.lisp b/expand-parse-operand-temp-count.lisp new file mode 100644 index 0000000..4bfaba1 --- /dev/null +++ b/expand-parse-operand-temp-count.lisp @@ -0,0 +1,8 @@ +(in-package :sb-c) + +(setf *parse-vop-operand-count* 1) +(dotimes (i 20) + (make-operand-parse-temp) + (make-operand-parse-load-tn) + (incf *parse-vop-operand-count*)) + diff --git a/load.lisp b/load.lisp index e71fcf9..673a676 100644 --- a/load.lisp +++ b/load.lisp @@ -14,6 +14,7 @@ (if t (progn (load (compile-file "detect-simd.lisp")) + (load (compile-file "expand-parse-operand-temp-count.lisp")) (load (compile-file "timing.lisp")) (load (compile-file "sse-seq.lisp")) (load (compile-file "test-seq.lisp")) diff --git a/push-simd-features.lisp b/push-simd-features.lisp new file mode 100644 index 0000000..c9cbbb7 --- /dev/null +++ b/push-simd-features.lisp @@ -0,0 +1,9 @@ +(in-package :sb-vm) + +(eval-when (:load-toplevel) + (let ((res (sb-sys:%primitive sb-vm::%detect-simd/x86))) +;; (format t "res is ~A~%" res) + (if (/= (logand res #b001) 0) (pushnew :sse sb-vm::*backend-subfeatures*)) + (if (/= (logand res #b010) 0) (pushnew :sse2 sb-vm::*backend-subfeatures*)) + (if (/= (logand res #b100) 0) (pushnew :sse3 sb-vm::*backend-subfeatures*)))) + \ No newline at end of file diff --git a/sse-seq.lisp b/sse-seq.lisp index aa43257..7303a60 100644 --- a/sse-seq.lisp +++ b/sse-seq.lisp @@ -29,15 +29,15 @@ (:TEMPORARY (:SC XMM-REG) X4) (:TEMPORARY (:SC XMM-REG) X5) -;; (:TEMPORARY (:SC unsigned-reg :offset edx-offset) edx) - + (:TEMPORARY (:SC unsigned-reg :offset eax-offset :to (:result 0)) temp1) + (:TEMPORARY (:SC unsigned-reg :offset edx-offset) temp2) (:TEMPORARY (:SC unsigned-reg :offset ebx-offset) index) (:TEMPORARY (:SC unsigned-reg :offset ecx-offset) length) (:GENERATOR 10 (let ((top (gen-label)) -;; (top2 (gen-label)) + (top2 (gen-label)) (length-ok (gen-label)) (fail (gen-label)) (the-end (gen-label)) @@ -51,8 +51,7 @@ (inst jmp :eq length-ok) ;; not same length, fail - (inst mov result -1) - (inst jmp end) + (inst jmp fail) (emit-label length-ok) @@ -66,8 +65,8 @@ (inst xor index index) ;; zero eq-regs - (inst pxor x4 x4) - (inst pxor x5 x5) +;; (inst pxor x4 x4) +;; (inst pxor x5 x5) (emit-label top) @@ -75,6 +74,9 @@ (inst movdqu x0 (vect-ea seq1 index :xmmword)) (inst movdqu x1 (vect-ea seq2 index :xmmword)) + (inst pxor x4 x4) + (inst pxor x5 x5) + ;; load second blocks (inst movdqu x2 (make-ea :xmmword :base seq1 :index index @@ -91,9 +93,18 @@ ;; add index (inst add index 32) - ;; or bits to eq-regs (if not eq, some bits will be nonzero) - (inst por x4 x0) - (inst por x5 x2) + ;; check for non-equality + (inst pcmpeqd x4 x0) + (inst pcmpeqd x5 x2) + + (inst pmovmskb temp1 x4) + (inst pmovmskb temp2 x5) + + (inst cmp temp1 #x0000FFFF) + (inst jmp :ne fail) + + (inst cmp temp2 #x0000FFFF) + (inst jmp :ne fail) ;; loop (inst dec length) @@ -102,35 +113,46 @@ ;; all 256bit blocks done - ;; or each 32bit word from x4 to x5 - (inst por x4 x5) - (inst movdqa x0 x4) - (inst psrldq-ib x4 4) ;; this is number of bytes, not bits - (inst por x0 x4) + ;; check remaining bytes + (loadw length seq1 vector-length-slot other-pointer-lowtag) + (inst shr length 2) + (inst and length (1- (/ 256 8))) + + ;; no bytes left ? + (inst test length length) + (inst jmp :z end) - (inst psrldq-ib x4 4) - (inst por x0 x4) + (inst xor temp1 temp1) + (inst xor temp2 temp2) - (inst psrldq-ib x4 4) - (inst por x0 x4) + (emit-label top2) - ;; now low 32bits of x0 will be non-zero if seq's not equal + ;; test bytes + (inst movzx temp1 (vect-ea seq1 index :byte)) + (inst movzx temp2 (vect-ea seq2 index :byte)) + (inst xor temp1 temp2) + (inst inc index) - (inst movd result x0) + ;; if not zero, fail + (inst test temp1 temp1) + (inst jmp :nz fail) + + ;; loop + (inst dec length) + (inst jmp :nz top2) ;; end (emit-label end) - (inst test result result) - (inst jmp :nz fail) - (inst mov result (fixnumize 0)) (inst jmp the-end) + ;; fail (emit-label fail) (inst mov result (fixnumize 1)) - + + ;; the-end (emit-label the-end) ))) diff --git a/test-seq.lisp b/test-seq.lisp index 3f15be2..4a630cf 100644 --- a/test-seq.lisp +++ b/test-seq.lisp @@ -3,14 +3,7 @@ (declaim (optimize (speed 3) (safety 0) (space 0) (debug 0))) (defun sse-seq= (seq1 seq2) - (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) - (multiple-value-bind (256blocks rest) (truncate (length seq1) (floor (log (/ 256 8) 2))) - (declare (ignore rest)) - (and (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0) - (loop for equal = t - for i from (* 256blocks 32) below (length seq1) - when (/= (aref seq1 i) (aref seq2 i)) do (setq equal nil) - finally (return equal))))) + (= (sb-sys:%primitive sb-vm::%sse-seq= seq1 seq2) 0)) (defun seq= (seq1 seq2) (declare (type (simple-array (unsigned-byte 8) (*)) seq1 seq2)) @@ -22,11 +15,11 @@ finally (return equal)))) -(defun test-seq (&optional (test-count 100000)) - (let ((arr1 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) - (arr2 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) - (arr3 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) - (arr4 (make-array #.(* 256 1024) :element-type '(unsigned-byte 8) :initial-element 0)) +(defun test-seq (&optional (test-count 50000)) + (let ((arr1 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr2 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr3 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) + (arr4 (make-array #.(* 255 1025) :element-type '(unsigned-byte 8) :initial-element 0)) res) (loop for i from 0 below (length arr1) @@ -37,19 +30,19 @@ )) (setf (aref arr3 1200) (mod (1+ (aref arr3 1200)) 256) - (aref arr4 256000) (mod (1+ (aref arr4 256000)) 256)) + (aref arr4 (- (length arr4) 2)) (mod (1+ (aref arr4 (- (length arr4) 2))) 256)) ;; (time (dotimes (i 100000) (sse-seq= arr1 arr2))) ;; (time (dotimes (i #.(/ 100000 30)) (seq= arr1 arr2))) (format t "; seq= a1 a2~%") - (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr2))))) + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr1 arr2))))) (format t "; seq= a1 a3~%") - (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr1 arr3))))) + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr1 arr3))))) (format t "; seq= a2 a4~%") - (time-sample-form #'(lambda () (dotimes (i (truncate test-count 30)) (setf res (seq= arr2 arr4))))) + (time-sample-form #'(lambda () (dotimes (i (truncate test-count 15)) (setf res (seq= arr2 arr4))))) (format t "; sse-seq= a1 a2~%") -- 2.11.4.GIT From ab6cf415a1824247039d46ea7ee6d2f1eb82e874 Mon Sep 17 00:00:00 2001 From: rlaakso Date: Wed, 17 Aug 2005 14:08:55 +0000 Subject: [PATCH 13/16] *** empty log message *** --- expand-parse-operand-temp-count.lisp | 26 + generate-sse-instructions.lisp | 2 +- generate-sse-vops.lisp | 16 +- load.lisp | 12 +- push-simd-features.lisp | 26 + sbcl-src/patch_against_sbcl_0_9_3 | 368 +- ...cl_0_9_3 => patch_against_sbcl_0_9_3__08082005} | 0 sbcl-src/src-093/compiler/x86/float.lisp | 4310 +++++++++++++++++++ sbcl-src/src/compiler/x86/float.lisp | 4429 ++++++++++++++++++++ sbcl-src/src/compiler/x86/vm.lisp | 12 +- sse-matrix.lisp | 16 +- sse-moves.lisp | 236 ++ sse-vector.lisp | 225 + test-vector.lisp | 190 + 14 files changed, 9799 insertions(+), 69 deletions(-) copy sbcl-src/{patch_against_sbcl_0_9_3 => patch_against_sbcl_0_9_3__08082005} (100%) create mode 100644 sbcl-src/src-093/compiler/x86/float.lisp create mode 100644 sbcl-src/src/compiler/x86/float.lisp create mode 100644 sse-moves.lisp create mode 100644 sse-vector.lisp create mode 100644 test-vector.lisp diff --git a/expand-parse-operand-temp-count.lisp b/expand-parse-operand-temp-count.lisp index 4bfaba1..8b5c6ba 100644 --- a/expand-parse-operand-temp-count.lisp +++ b/expand-parse-operand-temp-count.lisp @@ -1,3 +1,29 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# (in-package :sb-c) (setf *parse-vop-operand-count* 1) diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index 2bea781..9bb34aa 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -323,7 +323,7 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (format stream "~S~%~%" `(define-instruction ,(intern (symbol-name inst)) (segment dst src) (:emitter - (cond ((sse-register-p dst) + (cond ((xmm-register-p dst) ,@(emit-ops ops-m2r) (emit-ea segment src (reg-tn-encoding dst))) (t ,@(emit-ops ops-r2m) diff --git a/generate-sse-vops.lisp b/generate-sse-vops.lisp index eb1a36e..079d3fb 100644 --- a/generate-sse-vops.lisp +++ b/generate-sse-vops.lisp @@ -132,8 +132,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,(intern (format nil "SIMPLE-ARRAY-~A" type)) fixnum) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 @@ -186,8 +186,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,(intern (format nil "SIMPLE-ARRAY-~A" type2)) fixnum) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 @@ -233,8 +233,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,(intern (format nil "SIMPLE-ARRAY-~A" type)) fixnum) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 @@ -283,8 +283,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (:constant keyword) ) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 diff --git a/load.lisp b/load.lisp index 673a676..6613a4f 100644 --- a/load.lisp +++ b/load.lisp @@ -11,7 +11,7 @@ (load (compile-file "test-matrix.lisp")) )) -(if t +(if nil (progn (load (compile-file "detect-simd.lisp")) (load (compile-file "expand-parse-operand-temp-count.lisp")) @@ -19,4 +19,14 @@ (load (compile-file "sse-seq.lisp")) (load (compile-file "test-seq.lisp")) )) + +(if t + (progn + (load (compile-file "detect-simd.lisp")) + (load (compile-file "sse-moves.lisp")) + (load (compile-file "expand-parse-operand-temp-count.lisp")) + (load (compile-file "timing.lisp")) + (load (compile-file "sse-vector.lisp")) + (load (compile-file "test-vector.lisp")) + )) \ No newline at end of file diff --git a/push-simd-features.lisp b/push-simd-features.lisp index c9cbbb7..468dc16 100644 --- a/push-simd-features.lisp +++ b/push-simd-features.lisp @@ -1,3 +1,29 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# (in-package :sb-vm) (eval-when (:load-toplevel) diff --git a/sbcl-src/patch_against_sbcl_0_9_3 b/sbcl-src/patch_against_sbcl_0_9_3 index 9a04742..72f17d8 100644 --- a/sbcl-src/patch_against_sbcl_0_9_3 +++ b/sbcl-src/patch_against_sbcl_0_9_3 @@ -1,16 +1,160 @@ +diff -x 'CVS*' -Naur src-093/compiler/x86/float.lisp src/compiler/x86/float.lisp +--- src-093/compiler/x86/float.lisp 2005-08-17 16:56:53.996387102 +0300 ++++ src/compiler/x86/float.lisp 2005-08-17 15:04:50.040162831 +0300 +@@ -4308,3 +4308,122 @@ + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) ++ ++ ++;; XMM Moves ++ ++ ++(defun ea-for-xmm-desc (tn) ++ (make-ea :xmmword :base tn ++ :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) ++ ++(defun ea-for-xmm-stack (tn) ++ (make-ea :xmmword :base ebp-tn ++ :disp (- (* (+ (tn-offset tn) ++ 4) ++ n-word-bytes)))) ++ ++(define-move-fun (load-xmm 2) (vop x y) ++ ((xmm-stack) (xmm-reg)) ++ (inst movdqu y (ea-for-xmm-stack x))) ++ ++(define-move-fun (store-xmm 2) (vop x y) ++ ((xmm-reg) (xmm-stack)) ++ (inst movdqu (ea-for-xmm-stack y) x)) ++ ++(define-move-fun (load-xmm-single 2) (vop x y) ++ ((single-stack) (xmm-reg)) ++ (inst movss y (ea-for-sf-stack x))) ++ ++(define-move-fun (store-xmm-single 2) (vop x y) ++ ((xmm-reg) (single-stack)) ++ (inst movss (ea-for-sf-stack y) x)) ++ ++ ++(define-vop (%load-xmm-from-array/single-float) ++ (:policy :fast-safe) ++ (:args (src :scs (descriptor-reg)) ++ (index :scs (unsigned-reg))) ++ (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) ++ (:results (dest :scs (xmm-reg))) ++ (:result-types xmm) ++ (:generator 1 ++ (inst shl index 2) ++ (inst movdqu dest (make-ea :xmmword :base src :index index ++ :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) ++ ++ ++(define-vop (%store-xmm-to-array/single-float) ++ (:policy :fast-safe) ++ (:args (dest :scs (descriptor-reg)) ++ (index :scs (unsigned-reg)) ++ (src :scs (xmm-reg))) ++ (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) ++ (:generator 1 ++ (inst shl index 2) ++ (inst movdqu (make-ea :xmmword :base dest :index index ++ :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) ++ src))) ++ ++ ++(define-vop (xmm-move) ++ (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) ++ (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) ++ (:note "xmm move") ++ (:generator 0 ++ (unless (location= x y) ++ (inst movdqa y x)))) ++ ++(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) ++ ++(define-vop (move-from-xmm) ++ (:args (x :scs (xmm-reg) :to :save)) ++ (:results (y :scs (descriptor-reg))) ++ (:node-var node) ++ (:note "xmm to pointer coercion") ++ (:generator 13 ++ (with-fixed-allocation (y ++ xmm-widetag ++ xmm-size node) ++ (inst movdqu (ea-for-xmm-desc y) x)))) ++ ++(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) ++ ++(define-vop (move-to-xmm) ++ (:args (x :scs (descriptor-reg))) ++ (:results (y :scs (xmm-reg))) ++ (:note "pointer to xmm coercion") ++ (:generator 2 ++ (inst movdqu y (ea-for-xmm-desc x)))) ++ ++(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) ++ ++ ++(define-vop (move-xmm-arg) ++ (:args (x :scs (xmm-reg) :target y) ++ (fp :scs (any-reg) ++ :load-if (not (sc-is y xmm-reg)))) ++ (:results (y)) ++ (:note "xmm argument move") ++ (:generator 6 ++ (sc-case y ++ (xmm-reg ++ (unless (location= x y) ++ (inst movdqa y x))) ++ ++ (xmm-stack ++ (if (= (tn-offset fp) esp-offset) ++ (let* ((offset (* (tn-offset y) n-word-bytes)) ++ (ea (make-ea :xmmword :base fp :disp offset))) ++ (inst movdqu ea x)) ++ ++ (let ((ea (make-ea :xmmword :base fp ++ :disp (- (* (+ (tn-offset y) 4) ++ n-word-bytes))))) ++ (inst movdqu ea x))))))) ++ ++(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) ++ ++(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) ++ ++ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp --- src-093/compiler/x86/insts.lisp 2005-08-05 16:13:29.000000000 +0300 -+++ src/compiler/x86/insts.lisp 2005-08-08 16:30:23.352842152 +0300 -@@ -192,6 +192,8 @@ ++++ src/compiler/x86/insts.lisp 2005-08-16 10:39:07.027823783 +0300 +@@ -39,13 +39,16 @@ + #(ax cx dx bx sp bp si di)) + (defparameter *dword-reg-names* + #(eax ecx edx ebx esp ebp esi edi)) ++(defparameter *xmmword-reg-names* ++ #(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) + + (defun print-reg-with-width (value width stream dstate) + (declare (ignore dstate)) + (princ (aref (ecase width + (:byte *byte-reg-names*) + (:word *word-reg-names*) +- (:dword *dword-reg-names*)) ++ (:dword *dword-reg-names*) ++ (:xmmword *xmmword-reg-names*)) + value) + stream) + ;; XXX plus should do some source-var notes +@@ -192,6 +195,8 @@ (:byte 8) (:word 16) (:dword 32) + (:qword 64) -+ (:dqword 128) ++ (:xmmword 128) (:float 32) (:double 64))) -@@ -671,7 +673,7 @@ +@@ -671,14 +676,14 @@ (defun reg-tn-encoding (tn) (declare (type tn tn)) @@ -19,27 +163,53 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp (let ((offset (tn-offset tn))) (logior (ash (logand offset 1) 2) (ash offset -1)))) -@@ -718,6 +720,8 @@ + + (defstruct (ea (:constructor make-ea (size &key base index scale disp)) + (:copier nil)) +- (size nil :type (member :byte :word :dword)) ++ (size nil :type (member :byte :word :dword :xmmword)) + (base nil :type (or tn null)) + (index nil :type (or tn null)) + (scale 1 :type (member 1 2 4 8)) +@@ -718,6 +723,8 @@ (ecase (sb-name (sc-sb (tn-sc thing))) (registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) -+ (sse-registers ++ (xmm-registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) (stack ;; Convert stack tns into an index off of EBP. (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) -@@ -830,6 +834,10 @@ +@@ -830,6 +837,19 @@ (and (tn-p thing) (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) -+(defun sse-register-p (thing) ++(defun xmm-register-p (thing) + (and (tn-p thing) -+ (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'xmm-registers) ++ (member (sc-name (tn-sc thing)) *xmmword-sc-names*) ++ t)) ++ ++(defun xmm-ea-p (thing) ++ (typecase thing ++ (ea (eq (ea-size thing) :xmmword)) ++ (tn ++ (and (member (sc-name (tn-sc thing)) *xmmword-sc-names*) t)) ++ (t nil))) + (defun accumulator-p (thing) (and (register-p thing) (= (tn-offset thing) 0))) -@@ -2042,6 +2050,1339 @@ +@@ -859,6 +879,8 @@ + :float) + (#.*double-sc-names* + :double) ++ (#.*xmmword-sc-names* ++ :xmmword) + (t + (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) + (ea +@@ -2042,6 +2064,1419 @@ (:emitter (emit-header-data segment return-pc-header-widetag))) @@ -1203,7 +1373,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1215,7 +1385,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 40) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) @@ -1226,7 +1396,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1238,7 +1408,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1250,7 +1420,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 243) @@ -1262,7 +1432,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1274,7 +1444,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) @@ -1285,7 +1455,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1297,7 +1467,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) @@ -1308,7 +1478,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1320,7 +1490,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 242) @@ -1332,7 +1502,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 243) @@ -1344,7 +1514,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1356,13 +1526,93 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + ++(DEFINE-INSTRUCTION PSLLD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLDQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 7) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRAD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 4) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRAW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 4) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLDQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 3) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ + + +;;; CPUID @@ -1381,14 +1631,14 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp --- src-093/compiler/x86/vm.lisp 2005-08-05 16:13:29.000000000 +0300 -+++ src/compiler/x86/vm.lisp 2005-08-08 16:32:19.609588299 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-17 13:06:11.717026836 +0300 @@ -21,7 +21,8 @@ (defvar *byte-register-names* (make-array 8 :initial-element nil)) (defvar *word-register-names* (make-array 16 :initial-element nil)) (defvar *dword-register-names* (make-array 16 :initial-element nil)) - (defvar *float-register-names* (make-array 8 :initial-element nil))) + (defvar *float-register-names* (make-array 8 :initial-element nil)) -+ (defvar *dqword-register-names* (make-array 8 :initial-element nil))) ++ (defvar *xmmword-register-names* (make-array 8 :initial-element nil))) (macrolet ((defreg (name offset size) (let ((offset-sym (symbolicate name "-OFFSET")) @@ -1397,15 +1647,15 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ;; sse registers -+ (defreg xmm0 0 :dqword) -+ (defreg xmm1 1 :dqword) -+ (defreg xmm2 2 :dqword) -+ (defreg xmm3 3 :dqword) -+ (defreg xmm4 4 :dqword) -+ (defreg xmm5 5 :dqword) -+ (defreg xmm6 6 :dqword) -+ (defreg xmm7 7 :dqword) -+ (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ (defreg xmm0 0 :xmmword) ++ (defreg xmm1 1 :xmmword) ++ (defreg xmm2 2 :xmmword) ++ (defreg xmm3 3 :xmmword) ++ (defreg xmm4 4 :xmmword) ++ (defreg xmm5 5 :xmmword) ++ (defreg xmm6 6 :xmmword) ++ (defreg xmm7 7 :xmmword) ++ (defregset *xmm-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) + ;; registers used to pass arguments ;; @@ -1414,33 +1664,55 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp ;;; the new way: (define-storage-base float-registers :finite :size 8) -+(define-storage-base sse-registers :finite :size 8) ++(define-storage-base xmm-registers :finite :size 8) + (define-storage-base stack :unbounded :size 8) (define-storage-base constant :non-packed) (define-storage-base immediate-constant :non-packed) -@@ -320,6 +334,8 @@ - :save-p t - :alternate-scs (complex-long-stack)) +@@ -186,6 +200,7 @@ + (sap-stack stack) ; System area pointers. + (single-stack stack) ; single-floats + (double-stack stack :element-size 2) ; double-floats. ++ (xmm-stack stack :element-size 4) ; xmm + #!+long-float + (long-stack stack :element-size 3) ; long-floats. + (complex-single-stack stack :element-size 2) ; complex-single-floats +@@ -290,6 +305,12 @@ + :save-p t + :alternate-scs (double-stack)) -+ (sse-reg sse-registers -+ :locations #.*sse-regs*) - ;; a catch or unwind block - (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) - -@@ -337,6 +353,7 @@ ++ ;; non-descriptor XMMs ++ (xmm-reg xmm-registers ++ :locations #.*xmm-regs* ++ :save-p t ++ :alternate-scs (xmm-stack)) ++ + ;; non-descriptor LONG-FLOATs + #!+long-float + (long-reg float-registers +@@ -337,6 +358,7 @@ ;;; These are used to (at least) determine operand size. (defparameter *float-sc-names* '(single-reg)) (defparameter *double-sc-names* '(double-reg double-stack)) -+(defparameter *dqword-sc-names* '(sse-reg)) ++(defparameter *xmmword-sc-names* '(xmm-reg)) ) ; EVAL-WHEN ;;;; miscellaneous TNs for the various registers -@@ -444,6 +461,7 @@ +@@ -359,7 +381,8 @@ + (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) + (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) + (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) +- (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) ++ (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) ++ (def-misc-reg-tns xmm-reg xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) + + ;;; TNs for registers used to pass arguments + (defparameter *register-arg-tns* +@@ -444,6 +467,7 @@ ;; FIXME: Shouldn't this be an ERROR? (format nil "" offset sc-name)))) (float-registers (format nil "FR~D" offset)) -+ (sse-registers (format nil "XMM~D" offset)) ++ (xmm-registers (format nil "XMM~D" offset)) (stack (format nil "S~D" offset)) (constant (format nil "Const~D" offset)) (immediate-constant "Immed") diff --git a/sbcl-src/patch_against_sbcl_0_9_3 b/sbcl-src/patch_against_sbcl_0_9_3__08082005 similarity index 100% copy from sbcl-src/patch_against_sbcl_0_9_3 copy to sbcl-src/patch_against_sbcl_0_9_3__08082005 diff --git a/sbcl-src/src-093/compiler/x86/float.lisp b/sbcl-src/src-093/compiler/x86/float.lisp new file mode 100644 index 0000000..621a1cd --- /dev/null +++ b/sbcl-src/src-093/compiler/x86/float.lisp @@ -0,0 +1,4310 @@ +;;;; floating point support for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(macrolet ((ea-for-xf-desc (tn slot) + `(make-ea + :dword :base ,tn + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) + (defun ea-for-sf-desc (tn) + (ea-for-xf-desc tn single-float-value-slot)) + (defun ea-for-df-desc (tn) + (ea-for-xf-desc tn double-float-value-slot)) + #!+long-float + (defun ea-for-lf-desc (tn) + (ea-for-xf-desc tn long-float-value-slot)) + ;; complex floats + (defun ea-for-csf-real-desc (tn) + (ea-for-xf-desc tn complex-single-float-real-slot)) + (defun ea-for-csf-imag-desc (tn) + (ea-for-xf-desc tn complex-single-float-imag-slot)) + (defun ea-for-cdf-real-desc (tn) + (ea-for-xf-desc tn complex-double-float-real-slot)) + (defun ea-for-cdf-imag-desc (tn) + (ea-for-xf-desc tn complex-double-float-imag-slot)) + #!+long-float + (defun ea-for-clf-real-desc (tn) + (ea-for-xf-desc tn complex-long-float-real-slot)) + #!+long-float + (defun ea-for-clf-imag-desc (tn) + (ea-for-xf-desc tn complex-long-float-imag-slot))) + +(macrolet ((ea-for-xf-stack (tn kind) + `(make-ea + :dword :base ebp-tn + :disp (- (* (+ (tn-offset ,tn) + (ecase ,kind (:single 1) (:double 2) (:long 3))) + n-word-bytes))))) + (defun ea-for-sf-stack (tn) + (ea-for-xf-stack tn :single)) + (defun ea-for-df-stack (tn) + (ea-for-xf-stack tn :double)) + #!+long-float + (defun ea-for-lf-stack (tn) + (ea-for-xf-stack tn :long))) + +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +;;; +;;; Until 2004-03-15, the implementation of this was buggy; it +;;; unconditionally emitted the WAIT instruction. It turns out that +;;; this is the right thing to do anyway; omitting them can lead to +;;; system corruption on conforming code. -- CSR +(defun maybe-fp-wait (node &optional note-next-instruction) + (declare (ignore node)) + #+nil + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base) + `(make-ea + :dword :base ,base + :disp (- (* (+ (tn-offset ,tn) + (* (ecase ,kind + (:single 1) + (:double 2) + (:long 3)) + (ecase ,slot (:real 1) (:imag 2)))) + n-word-bytes))))) + (defun ea-for-csf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :real base)) + (defun ea-for-csf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :imag base)) + (defun ea-for-cdf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :real base)) + (defun ea-for-cdf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :imag base)) + #!+long-float + (defun ea-for-clf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :real base)) + #!+long-float + (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :imag base))) + +;;; Abstract out the copying of a FP register to the FP stack top, and +;;; provide two alternatives for its implementation. Note: it's not +;;; necessary to distinguish between a single or double register move +;;; here. +;;; +;;; Using a Pop then load. +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fstp fr0-tn) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset reg))))) +;;; Using Fxch then Fst to restore the original reg contents. +#+nil +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fxch reg) + (inst fst reg)) + +;;; The x86 can't store a long-float to memory without popping the +;;; stack and marking a register as empty, so it is necessary to +;;; restore the register from memory. +#!+long-float +(defun store-long-float (ea) + (inst fstpl ea) + (inst fldl ea)) + +;;;; move functions + +;;; X is source, Y is destination. +(define-move-fun (load-single 2) (vop x y) + ((single-stack) (single-reg)) + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-stack x)))) + +(define-move-fun (store-single 2) (vop x y) + ((single-reg) (single-stack)) + (cond ((zerop (tn-offset x)) + (inst fst (ea-for-sf-stack y))) + (t + (inst fxch x) + (inst fst (ea-for-sf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +(define-move-fun (load-double 2) (vop x y) + ((double-stack) (double-reg)) + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-stack x)))) + +(define-move-fun (store-double 2) (vop x y) + ((double-reg) (double-stack)) + (cond ((zerop (tn-offset x)) + (inst fstd (ea-for-df-stack y))) + (t + (inst fxch x) + (inst fstd (ea-for-df-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +#!+long-float +(define-move-fun (load-long 2) (vop x y) + ((long-stack) (long-reg)) + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-stack x)))) + +#!+long-float +(define-move-fun (store-long 2) (vop x y) + ((long-reg) (long-stack)) + (cond ((zerop (tn-offset x)) + (store-long-float (ea-for-lf-stack y))) + (t + (inst fxch x) + (store-long-float (ea-for-lf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* + #!+long-float 'long-float #!-long-float 'double-float)) +(define-move-fun (load-fp-constant 2) (vop x y) + ((fp-constant) (single-reg double-reg #!+long-float long-reg)) + (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) + (with-empty-tn@fp-top(y) + (cond ((zerop value) + (inst fldz)) + ((= value 1e0) + (inst fld1)) + ((= value (coerce pi *read-default-float-format*)) + (inst fldpi)) + ((= value (log 10e0 2e0)) + (inst fldl2t)) + ((= value (log 2.718281828459045235360287471352662e0 2e0)) + (inst fldl2e)) + ((= value (log 2e0 10e0)) + (inst fldlg2)) + ((= value (log 2e0 2.718281828459045235360287471352662e0)) + (inst fldln2)) + (t (warn "ignoring bogus i387 constant ~A" value)))))) +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'single-float)) + +;;;; complex float move functions + +(defun complex-single-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (tn-offset x))) +(defun complex-single-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (1+ (tn-offset x)))) + +(defun complex-double-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (tn-offset x))) +(defun complex-double-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (1+ (tn-offset x)))) + +#!+long-float +(defun complex-long-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (tn-offset x))) +#!+long-float +(defun complex-long-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (1+ (tn-offset x)))) + +;;; X is source, Y is destination. +(define-move-fun (load-complex-single 2) (vop x y) + ((complex-single-stack) (complex-single-reg)) + (let ((real-tn (complex-single-reg-real-tn y))) + (with-empty-tn@fp-top (real-tn) + (inst fld (ea-for-csf-real-stack x)))) + (let ((imag-tn (complex-single-reg-imag-tn y))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (ea-for-csf-imag-stack x))))) + +(define-move-fun (store-complex-single 2) (vop x y) + ((complex-single-reg) (complex-single-stack)) + (let ((real-tn (complex-single-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fst (ea-for-csf-real-stack y))) + (t + (inst fxch real-tn) + (inst fst (ea-for-csf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fst (ea-for-csf-imag-stack y)) + (inst fxch imag-tn))) + +(define-move-fun (load-complex-double 2) (vop x y) + ((complex-double-stack) (complex-double-reg)) + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldd (ea-for-cdf-real-stack x)))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldd (ea-for-cdf-imag-stack x))))) + +(define-move-fun (store-complex-double 2) (vop x y) + ((complex-double-reg) (complex-double-stack)) + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fstd (ea-for-cdf-real-stack y))) + (t + (inst fxch real-tn) + (inst fstd (ea-for-cdf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fstd (ea-for-cdf-imag-stack y)) + (inst fxch imag-tn))) + +#!+long-float +(define-move-fun (load-complex-long 2) (vop x y) + ((complex-long-stack) (complex-long-reg)) + (let ((real-tn (complex-long-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldl (ea-for-clf-real-stack x)))) + (let ((imag-tn (complex-long-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldl (ea-for-clf-imag-stack x))))) + +#!+long-float +(define-move-fun (store-complex-long 2) (vop x y) + ((complex-long-reg) (complex-long-stack)) + (let ((real-tn (complex-long-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (store-long-float (ea-for-clf-real-stack y))) + (t + (inst fxch real-tn) + (store-long-float (ea-for-clf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (inst fxch imag-tn) + (store-long-float (ea-for-clf-imag-stack y)) + (inst fxch imag-tn))) + + +;;;; move VOPs + +;;; float register to register moves +(define-vop (float-move) + (:args (x)) + (:results (y)) + (:note "float move") + (:generator 0 + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x)))))) + +(define-vop (single-move float-move) + (:args (x :scs (single-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (single-reg) :load-if (not (location= x y))))) +(define-move-vop single-move :move (single-reg) (single-reg)) + +(define-vop (double-move float-move) + (:args (x :scs (double-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (double-reg) :load-if (not (location= x y))))) +(define-move-vop double-move :move (double-reg) (double-reg)) + +#!+long-float +(define-vop (long-move float-move) + (:args (x :scs (long-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop long-move :move (long-reg) (long-reg)) + +;;; complex float register to register moves +(define-vop (complex-float-move) + (:args (x :target y :load-if (not (location= x y)))) + (:results (y :load-if (not (location= x y)))) + (:note "complex float move") + (:generator 0 + (unless (location= x y) + ;; Note the complex-float-regs are aligned to every second + ;; float register so there is not need to worry about overlap. + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag))))) + +(define-vop (complex-single-move complex-float-move) + (:args (x :scs (complex-single-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))) +(define-move-vop complex-single-move :move + (complex-single-reg) (complex-single-reg)) + +(define-vop (complex-double-move complex-float-move) + (:args (x :scs (complex-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))) +(define-move-vop complex-double-move :move + (complex-double-reg) (complex-double-reg)) + +#!+long-float +(define-vop (complex-long-move complex-float-move) + (:args (x :scs (complex-long-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop complex-long-move :move + (complex-long-reg) (complex-long-reg)) + +;;; Move from float to a descriptor reg. allocating a new float +;;; object in the process. +(define-vop (move-from-single) + (:args (x :scs (single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + single-float-widetag + single-float-size node) + (with-tn@fp-top(x) + (inst fst (ea-for-sf-desc y)))))) +(define-move-vop move-from-single :move + (single-reg) (descriptor-reg)) + +(define-vop (move-from-double) + (:args (x :scs (double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + double-float-widetag + double-float-size + node) + (with-tn@fp-top(x) + (inst fstd (ea-for-df-desc y)))))) +(define-move-vop move-from-double :move + (double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-long) + (:args (x :scs (long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + long-float-widetag + long-float-size + node) + (with-tn@fp-top(x) + (store-long-float (ea-for-lf-desc y)))))) +#!+long-float +(define-move-vop move-from-long :move + (long-reg) (descriptor-reg)) + +(define-vop (move-from-fp-constant) + (:args (x :scs (fp-constant))) + (:results (y :scs (descriptor-reg))) + (:generator 2 + (ecase (sb!c::constant-value (sb!c::tn-leaf x)) + (0f0 (load-symbol-value y *fp-constant-0f0*)) + (1f0 (load-symbol-value y *fp-constant-1f0*)) + (0d0 (load-symbol-value y *fp-constant-0d0*)) + (1d0 (load-symbol-value y *fp-constant-1d0*)) + #!+long-float + (0l0 (load-symbol-value y *fp-constant-0l0*)) + #!+long-float + (1l0 (load-symbol-value y *fp-constant-1l0*)) + #!+long-float + (#.pi (load-symbol-value y *fp-constant-pi*)) + #!+long-float + (#.(log 10l0 2l0) (load-symbol-value y *fp-constant-l2t*)) + #!+long-float + (#.(log 2.718281828459045235360287471352662L0 2l0) + (load-symbol-value y *fp-constant-l2e*)) + #!+long-float + (#.(log 2l0 10l0) (load-symbol-value y *fp-constant-lg2*)) + #!+long-float + (#.(log 2l0 2.718281828459045235360287471352662L0) + (load-symbol-value y *fp-constant-ln2*))))) +(define-move-vop move-from-fp-constant :move + (fp-constant) (descriptor-reg)) + +;;; Move from a descriptor to a float register. +(define-vop (move-to-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (single-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-desc x))))) +(define-move-vop move-to-single :move (descriptor-reg) (single-reg)) + +(define-vop (move-to-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-desc x))))) +(define-move-vop move-to-double :move (descriptor-reg) (double-reg)) + +#!+long-float +(define-vop (move-to-long) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (long-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-desc x))))) +#!+long-float +(define-move-vop move-to-long :move (descriptor-reg) (long-reg)) + +;;; Move from complex float to a descriptor reg. allocating a new +;;; complex float object in the process. +(define-vop (move-from-complex-single) + (:args (x :scs (complex-single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-single-float-widetag + complex-single-float-size + node) + (let ((real-tn (complex-single-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fst (ea-for-csf-real-desc y)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fst (ea-for-csf-imag-desc y))))))) +(define-move-vop move-from-complex-single :move + (complex-single-reg) (descriptor-reg)) + +(define-vop (move-from-complex-double) + (:args (x :scs (complex-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-double-float-widetag + complex-double-float-size + node) + (let ((real-tn (complex-double-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fstd (ea-for-cdf-real-desc y)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fstd (ea-for-cdf-imag-desc y))))))) +(define-move-vop move-from-complex-double :move + (complex-double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-complex-long) + (:args (x :scs (complex-long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-long-float-widetag + complex-long-float-size + node) + (let ((real-tn (complex-long-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (store-long-float (ea-for-clf-real-desc y)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (store-long-float (ea-for-clf-imag-desc y))))))) +#!+long-float +(define-move-vop move-from-complex-long :move + (complex-long-reg) (descriptor-reg)) + +;;; Move from a descriptor to a complex float register. +(macrolet ((frob (name sc format) + `(progn + (define-vop (,name) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (,sc))) + (:note "pointer to complex float coercion") + (:generator 2 + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-real-desc x)))) + (:double '((inst fldd (ea-for-cdf-real-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-real-desc x))))))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-imag-desc x)))) + (:double '((inst fldd (ea-for-cdf-imag-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-imag-desc x))))))))) + (define-move-vop ,name :move (descriptor-reg) (,sc))))) + (frob move-to-complex-single complex-single-reg :single) + (frob move-to-complex-double complex-double-reg :double) + #!+long-float + (frob move-to-complex-double complex-long-reg :long)) + +;;;; the move argument vops +;;;; +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. + +;;; the general MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "float argument move") + (:generator ,(case format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x))))) + (,stack-sc + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :dword :base fp :disp offset))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea)))))) + (let ((ea (make-ea + :dword :base fp + :disp (- (* (+ (tn-offset y) + ,(case format + (:single 1) + (:double 2) + (:long 3))) + n-word-bytes))))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea))))))))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-single-float-arg single-reg single-stack :single) + (frob move-double-float-arg double-reg double-stack :double) + #!+long-float + (frob move-long-float-arg long-reg long-stack :long)) + +;;;; complex float MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "complex float argument move") + (:generator ,(ecase format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag)))) + (,stack-sc + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp)))))) + (t + (inst fxch real-tn) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp))))) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + ,@(ecase format + (:single + '((inst fst (ea-for-csf-imag-stack y fp)))) + (:double + '((inst fstd (ea-for-cdf-imag-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-imag-stack y fp))))) + (inst fxch imag-tn)))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-complex-single-float-arg + complex-single-reg complex-single-stack :single) + (frob move-complex-double-float-arg + complex-double-reg complex-double-stack :double) + #!+long-float + (frob move-complex-long-float-arg + complex-long-reg complex-long-stack :long)) + +(define-move-vop move-arg :move-arg + (single-reg double-reg #!+long-float long-reg + complex-single-reg complex-double-reg #!+long-float complex-long-reg) + (descriptor-reg)) + + +;;;; arithmetic VOPs + +;;; dtc: the floating point arithmetic vops +;;; +;;; Note: Although these can accept x and y on the stack or pointed to +;;; from a descriptor register, they will work with register loading +;;; without these. Same deal with the result - it need only be a +;;; register. When load-tns are needed they will probably be in ST0 +;;; and the code below should be able to correctly handle all cases. +;;; +;;; However it seems to produce better code if all arg. and result +;;; options are used; on the P86 there is no extra cost in using a +;;; memory operand to the FP instructions - not so on the PPro. +;;; +;;; It may also be useful to handle constant args? +;;; +;;; 22-Jul-97: descriptor args lose in some simple cases when +;;; a function result computed in a loop. Then Python insists +;;; on consing the intermediate values! For example +;;; +;;; (defun test(a n) +;;; (declare (type (simple-array double-float (*)) a) +;;; (fixnum n)) +;;; (let ((sum 0d0)) +;;; (declare (type double-float sum)) +;;; (dotimes (i n) +;;; (incf sum (* (aref a i)(aref a i)))) +;;; sum)) +;;; +;;; So, disabling descriptor args until this can be fixed elsewhere. +(macrolet + ((frob (op fop-sti fopr-sti + fop fopr sname scost + fopd foprd dname dcost + lname lcost) + #!-long-float (declare (ignore lcost lname)) + `(progn + (define-vop (,sname) + (:translate ,op) + (:args (x :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval) + (y :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc single-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (single-reg single-stack))) + (:arg-types single-float single-float) + (:result-types single-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,scost + ;; Handle a few special cases + (cond + ;; x, y, and r are the same register. + ((and (sc-is x single-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch r) + (inst ,fop fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x single-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (single-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fop y)) + (single-stack + ;; ST(0) = ST(0) op Mem + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + (t + ;; y to ST0 + (sc-case y + (single-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y single-stack) + (inst fld (ea-for-sf-stack y)) + (inst fld (ea-for-sf-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y single-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (single-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,fopr x)) + (single-stack + ;; ST(0) = Mem op ST(0) + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x single-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + ;; y is in ST0 + ((and (sc-is y single-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (single-reg + (inst ,fopr x)) + (single-stack + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (copy-fp-reg-to-fr0 x)) + (single-stack + (inst fstp fr0) + (inst fld (ea-for-sf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fld (ea-for-sf-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (single-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (single-stack + (inst fst (ea-for-sf-stack r)))))))) + + (define-vop (,dname) + (:translate ,op) + (:args (x :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval) + (y :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc double-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (double-reg double-stack))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,dcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (sc-is x double-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x double-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (double-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (double-stack + ;; ST(0) = ST(0) op Mem + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + (t + ;; y to ST0 + (sc-case y + (double-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y double-stack) + (inst fldd (ea-for-df-stack y)) + (inst fldd (ea-for-df-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y double-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (double-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (double-stack + ;; ST(0) = Mem op ST(0) + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + ;; y is in ST0 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (double-reg + (inst ,foprd x)) + (double-stack + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (double-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (double-stack + (inst fstd (ea-for-df-stack r)))))))) + + #!+long-float + (define-vop (,lname) + (:translate ,op) + (:args (x :scs (long-reg) :to :eval) + (y :scs (long-reg) :to :eval)) + (:temporary (:sc long-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,lcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((location= x r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (t + ;; y to ST0 + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y)) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((location= y r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (t + ;; x to ST0 + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0. + ((zerop (tn-offset x)) + ;; ST0 = ST0 op y + (inst ,fopd y)) + ;; y is in ST0 + ((zerop (tn-offset y)) + ;; ST0 = x op ST0 + (inst ,foprd x)) + (t + ;; x to ST0 + (copy-fp-reg-to-fr0 x) + ;; ST0 = ST0 op y + (inst ,fopd y))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))))))))) + + (frob + fadd-sti fadd-sti + fadd fadd +/single-float 2 + faddd faddd +/double-float 2 + +/long-float 2) + (frob - fsub-sti fsubr-sti + fsub fsubr -/single-float 2 + fsubd fsubrd -/double-float 2 + -/long-float 2) + (frob * fmul-sti fmul-sti + fmul fmul */single-float 3 + fmuld fmuld */double-float 3 + */long-float 3) + (frob / fdiv-sti fdivr-sti + fdiv fdivr //single-float 12 + fdivd fdivrd //double-float 12 + //long-float 12)) + +(macrolet ((frob (name inst translate sc type) + `(define-vop (,name) + (:args (x :scs (,sc) :target fr0)) + (:results (y :scs (,sc))) + (:translate ,translate) + (:policy :fast-safe) + (:arg-types ,type) + (:result-types ,type) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. + (unless (zerop (tn-offset y)) + (inst fst y)))))) + + (frob abs/single-float fabs abs single-reg single-float) + (frob abs/double-float fabs abs double-reg double-float) + #!+long-float + (frob abs/long-float fabs abs long-reg long-float) + (frob %negate/single-float fchs %negate single-reg single-float) + (frob %negate/double-float fchs %negate double-reg double-float) + #!+long-float + (frob %negate/long-float fchs %negate long-reg long-float)) + +;;;; comparison + +(define-vop (=/float) + (:args (x) (y)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0; y is in any reg. + ((zerop (tn-offset x)) + (inst fucom y)) + ;; y is in ST0; x is in another reg. + ((zerop (tn-offset y)) + (inst fucom x)) + ;; x and y are the same register, not ST0 + ((location= x y) + (inst fxch x) + (inst fucom fr0-tn) + (inst fxch x)) + ;; x and y are different registers, neither ST0. + (t + (inst fxch x) + (inst fucom y) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (inst cmp ah-tn #x40) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=/single-float =/float) + (:translate =) + (:args (x :scs (single-reg)) + (y :scs (single-reg))) + (:arg-types single-float single-float)) + +(define-vop (=/double-float =/float) + (:translate =) + (:args (x :scs (double-reg)) + (y :scs (double-reg))) + (:arg-types double-float double-float)) + +#!+long-float +(define-vop (=/long-float =/float) + (:translate =) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float)) + +(define-vop (single-float) + (:translate >) + (:args (x :scs (single-reg single-stack descriptor-reg)) + (y :scs (single-reg single-stack descriptor-reg))) + (:arg-types single-float single-float) + (:temporary (:sc single-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y single-reg) (zerop (tn-offset y))) + (sc-case x + (single-reg + (inst fcom x)) + ((single-stack descriptor-reg) + (if (sc-is x single-stack) + (inst fcom (ea-for-sf-stack x)) + (inst fcom (ea-for-sf-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + (sc-case y + (single-reg + (inst fcom y)) + ((single-stack descriptor-reg) + (if (sc-is y single-stack) + (inst fcom (ea-for-sf-stack y)) + (inst fcom (ea-for-sf-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (>double-float) + (:translate >) + (:args (x :scs (double-reg double-stack descriptor-reg)) + (y :scs (double-reg double-stack descriptor-reg))) + (:arg-types double-float double-float) + (:temporary (:sc double-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (sc-case x + (double-reg + (inst fcomd x)) + ((double-stack descriptor-reg) + (if (sc-is x double-stack) + (inst fcomd (ea-for-df-stack x)) + (inst fcomd (ea-for-df-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (sc-case y + (double-reg + (inst fcomd y)) + ((double-stack descriptor-reg) + (if (sc-is y double-stack) + (inst fcomd (ea-for-df-stack y)) + (inst fcomd (ea-for-df-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +#!+long-float +(define-vop (>long-float) + (:translate >) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (cond + ;; y is in ST0; x is in any reg. + ((zerop (tn-offset y)) + (inst fcomd x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + ;; x is in ST0; y is in another reg. + ((zerop (tn-offset x)) + (inst fcomd y) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45)) + ;; y and x are the same register, not ST0 + ;; y and x are different registers, neither ST0. + (t + (inst fxch x) + (inst fcomd y) + (inst fxch x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +;;; Comparisons with 0 can use the FTST instruction. + +(define-vop (float-test) + (:args (x)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p y) + (:variant-vars code) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp y) + (:generator 2 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0 + ((zerop (tn-offset x)) + (inst ftst)) + ;; x not ST0 + (t + (inst fxch x) + (inst ftst) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (unless (zerop code) + (inst cmp ah-tn code)) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=0/single-float float-test) + (:translate =) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x40)) +(define-vop (=0/double-float float-test) + (:translate =) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x40)) +#!+long-float +(define-vop (=0/long-float float-test) + (:translate =) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x40)) + +(define-vop (<0/single-float float-test) + (:translate <) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x01)) +(define-vop (<0/double-float float-test) + (:translate <) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x01)) +#!+long-float +(define-vop (<0/long-float float-test) + (:translate <) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x01)) + +(define-vop (>0/single-float float-test) + (:translate >) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x00)) +(define-vop (>0/double-float float-test) + (:translate >) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x00)) +#!+long-float +(define-vop (>0/long-float float-test) + (:translate >) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x00)) + +#!+long-float +(deftransform eql ((x y) (long-float long-float)) + `(and (= (long-float-low-bits x) (long-float-low-bits y)) + (= (long-float-high-bits x) (long-float-high-bits y)) + (= (long-float-exp-bits x) (long-float-exp-bits y)))) + +;;;; conversion + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc signed-stack) temp) + (:results (y :scs (,to-sc))) + (:arg-types signed-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case x + (signed-reg + (inst mov temp x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild temp))) + (signed-stack + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild x)))))))) + (frob %single-float/signed %single-float single-reg single-float) + (frob %double-float/signed %double-float double-reg double-float) + #!+long-float + (frob %long-float/signed %long-float long-reg long-float)) + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (unsigned-reg))) + (:results (y :scs (,to-sc))) + (:arg-types unsigned-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 6 + (inst push 0) + (inst push x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fildl (make-ea :dword :base esp-tn))) + (inst add esp-tn 8))))) + (frob %single-float/unsigned %single-float single-reg single-float) + (frob %double-float/unsigned %double-float double-reg double-float) + #!+long-float + (frob %long-float/unsigned %long-float long-reg long-float)) + +;;; These should be no-ops but the compiler might want to move some +;;; things around. +(macrolet ((frob (name translate from-sc from-type to-sc to-type) + `(define-vop (,name) + (:args (x :scs (,from-sc) :target y)) + (:results (y :scs (,to-sc))) + (:arg-types ,from-type) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 2 + (note-this-location vop :internal-error) + (unless (location= x y) + (cond + ((zerop (tn-offset x)) + ;; x is in ST0, y is in another reg. not ST0 + (inst fst y)) + ((zerop (tn-offset y)) + ;; y is in ST0, x is in another reg. not ST0 + (copy-fp-reg-to-fr0 x)) + (t + ;; Neither x or y are in ST0, and they are not in + ;; the same reg. + (inst fxch x) + (inst fst y) + (inst fxch x)))))))) + + (frob %single-float/double-float %single-float double-reg + double-float single-reg single-float) + #!+long-float + (frob %single-float/long-float %single-float long-reg + long-float single-reg single-float) + (frob %double-float/single-float %double-float single-reg single-float + double-reg double-float) + #!+long-float + (frob %double-float/long-float %double-float long-reg long-float + double-reg double-float) + #!+long-float + (frob %long-float/single-float %long-float single-reg single-float + long-reg long-float) + #!+long-float + (frob %long-float/double-float %long-float double-reg double-float + long-reg long-float)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type)) + (:args (x :scs (,from-sc))) + (:temporary (:sc signed-stack) stack-temp) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (signed-reg))) + (:arg-types ,from-type) + (:result-types signed-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + (,(if round-p 'progn 'pseudo-atomic) + ;; Normal mode (for now) is "round to best". + (with-tn@fp-top (x) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (sc-case y + (signed-stack + (inst fist y)) + (signed-reg + (inst fist stack-temp) + (inst mov y stack-temp))) + ,@(unless round-p + '((inst fldcw scw))))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type "=>UNSIGNED")) + (:args (x :scs (,from-sc) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) stack-temp) + (:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (unsigned-reg))) + (:arg-types ,from-type) + (:result-types unsigned-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + ;; Normal mode (for now) is "round to best". + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (inst sub esp-tn 8) + (inst fistpl (make-ea :dword :base esp-tn)) + (inst pop y) + (inst fld fr0) ; copy fr0 to at least restore stack. + (inst add esp-tn 4) + ,@(unless round-p + '((inst fldcw scw))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(define-vop (make-single-float) + (:args (bits :scs (signed-reg) :target res + :load-if (not (or (and (sc-is bits signed-stack) + (sc-is res single-reg)) + (and (sc-is bits signed-stack) + (sc-is res single-stack) + (location= bits res)))))) + (:results (res :scs (single-reg single-stack))) + (:temporary (:sc signed-stack) stack-temp) + (:arg-types signed-num) + (:result-types single-float) + (:translate make-single-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case res + (single-stack + (sc-case bits + (signed-reg + (inst mov res bits)) + (signed-stack + (aver (location= bits res))))) + (single-reg + (sc-case bits + (signed-reg + ;; source must be in memory + (inst mov stack-temp bits) + (with-empty-tn@fp-top(res) + (inst fld stack-temp))) + (signed-stack + (with-empty-tn@fp-top(res) + (inst fld bits)))))))) + +(define-vop (make-double-float) + (:args (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (double-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types signed-num unsigned-num) + (:result-types double-float) + (:translate make-double-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 2 + (let ((offset (1+ (tn-offset temp)))) + (storew hi-bits ebp-tn (- offset)) + (storew lo-bits ebp-tn (- (1+ offset))) + (with-empty-tn@fp-top(res) + (inst fldd (make-ea :dword :base ebp-tn + :disp (- (* (1+ offset) n-word-bytes)))))))) + +#!+long-float +(define-vop (make-long-float) + (:args (exp-bits :scs (signed-reg)) + (hi-bits :scs (unsigned-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (long-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types signed-num unsigned-num unsigned-num) + (:result-types long-float) + (:translate make-long-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 3 + (let ((offset (1+ (tn-offset temp)))) + (storew exp-bits ebp-tn (- offset)) + (storew hi-bits ebp-tn (- (1+ offset))) + (storew lo-bits ebp-tn (- (+ offset 2))) + (with-empty-tn@fp-top(res) + (inst fldl (make-ea :dword :base ebp-tn + :disp (- (* (+ offset 2) n-word-bytes)))))))) + +(define-vop (single-float-bits) + (:args (float :scs (single-reg descriptor-reg) + :load-if (not (sc-is float single-stack)))) + (:results (bits :scs (signed-reg))) + (:temporary (:sc signed-stack :from :argument :to :result) stack-temp) + (:arg-types single-float) + (:result-types signed-num) + (:translate single-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case bits + (signed-reg + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst stack-temp) + (inst mov bits stack-temp))) + (single-stack + (inst mov bits float)) + (descriptor-reg + (loadw + bits float single-float-value-slot + other-pointer-lowtag)))) + (signed-stack + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst bits)))))))) + +(define-vop (double-float-high-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (hi-bits :scs (signed-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types signed-num) + (:translate double-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw hi-bits ebp-tn (- (1+ (tn-offset temp))))) + (double-stack + (loadw hi-bits ebp-tn (- (1+ (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + other-pointer-lowtag))))) + +(define-vop (double-float-low-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types unsigned-num) + (:translate double-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset temp))))) + (double-stack + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw lo-bits float double-float-value-slot + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-exp-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (exp-bits :scs (signed-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types signed-num) + (:translate long-float-exp-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset temp))) n-word-bytes)))) + (long-stack + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset float))) n-word-bytes)))) + (descriptor-reg + (inst movsx exp-bits + (make-ea :word :base float + :disp (- (* (+ 2 long-float-value-slot) + n-word-bytes) + other-pointer-lowtag))))))) + +#!+long-float +(define-vop (long-float-high-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (hi-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw hi-bits ebp-tn (- (+ (tn-offset temp) 2)))) + (long-stack + (loadw hi-bits ebp-tn (- (+ (tn-offset float) 2)))) + (descriptor-reg + (loadw hi-bits float (1+ long-float-value-slot) + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-low-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw lo-bits ebp-tn (- (+ (tn-offset temp) 3)))) + (long-stack + (loadw lo-bits ebp-tn (- (+ (tn-offset float) 3)))) + (descriptor-reg + (loadw lo-bits float long-float-value-slot + other-pointer-lowtag))))) + +;;;; float mode hackery + +(sb!xc:deftype float-modes () '(unsigned-byte 32)) ; really only 16 +(defknown floating-point-modes () float-modes (flushable)) +(defknown ((setf floating-point-modes)) (float-modes) + float-modes) + +(def!constant npx-env-size (* 7 n-word-bytes)) +(def!constant npx-cw-offset 0) +(def!constant npx-sw-offset 4) + +(define-vop (floating-point-modes) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset :target res + :to :result) eax) + (:generator 8 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions + (inst fstenv (make-ea :dword :base esp-tn)) ; masks all exceptions + (inst fldenv (make-ea :dword :base esp-tn)) ; Restore previous state. + ;; Move current status to high word. + (inst mov eax (make-ea :dword :base esp-tn :disp (- npx-sw-offset 2))) + ;; Move exception mask to low word. + (inst mov ax-tn (make-ea :word :base esp-tn :disp npx-cw-offset)) + (inst add esp-tn npx-env-size) ; Pop stack. + (inst xor eax #x3f) ; Flip exception mask to trap enable bits. + (move res eax))) + +(define-vop (set-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:results (res :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:result-types unsigned-num) + (:translate (setf floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:generator 3 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions. + (inst fstenv (make-ea :dword :base esp-tn)) + (inst mov eax new) + (inst xor eax #x3f) ; Turn trap enable bits into exception mask. + (inst mov (make-ea :word :base esp-tn :disp npx-cw-offset) ax-tn) + (inst shr eax 16) ; position status word + (inst mov (make-ea :word :base esp-tn :disp npx-sw-offset) ax-tn) + (inst fldenv (make-ea :dword :base esp-tn)) + (inst add esp-tn npx-env-size) ; Pop stack. + (move res new))) + +#!-long-float +(progn + +;;; Let's use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of fsin and fcos that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; KLUDGE: these versions of fsin, fcos, and ftan simply load a 0.0 +;;; result if the argument is out of range 2^63 and would thus be +;;; hopelessly inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; %exp that handles the following special cases: exp(+Inf) is +Inf; +;;; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (double-reg))) + (:arg-types double-float signed-num) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (double-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (double-reg) :to :result)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p with limited x range function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 4 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr1) + (y :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y)) + (sc-is x double-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (zerop (tn-offset x))) + ;; copy x to fr1 + (inst fst fr1)) + ;; y in fr0; x not in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))) + ;; Load y to fr0 + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) +) ; PROGN #!-LONG-FLOAT + +#!+long-float +(progn + +;;; Lets use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of FSIN and FCOS that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if +;;; the argument is out of range 2^63 and would thus be hopelessly +;;; inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fldz) ; Load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; Modified exp that handles the following special cases: +;;; exp(+Inf) is +Inf; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (long-reg))) + (:arg-types long-float signed-num) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (long-reg) :to :result)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + ;; FIXME 1: This appears to be the second DEFINE-VOP of FLOG1P. + ;; Perhaps this is OK because of the #!+LONG-FLOAT wrapped around + ;; an enormous PROGN above. Still, it would be probably be good to + ;; add some code to warn about redefining VOPs. + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p function") + (:generator 5 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr1) + (y :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y)) + (sc-is x long-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; y in fr0; x not in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))) + ;; Load y to fr0 + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +) ; PROGN #!+LONG-FLOAT + +;;;; complex float VOPs + +(define-vop (make-complex-single-float) + (:translate complex) + (:args (real :scs (single-reg) :to :result :target r + :load-if (not (location= real r))) + (imag :scs (single-reg) :to :save)) + (:arg-types single-float single-float) + (:results (r :scs (complex-single-reg) :from (:argument 0) + :load-if (not (sc-is r complex-single-stack)))) + (:result-types complex-single-float) + (:note "inline complex single-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-single-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-single-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fst (ea-for-csf-real-stack r))) + (t + (inst fxch real) + (inst fst (ea-for-csf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fst (ea-for-csf-imag-stack r)) + (inst fxch imag))))) + +(define-vop (make-complex-double-float) + (:translate complex) + (:args (real :scs (double-reg) :target r + :load-if (not (location= real r))) + (imag :scs (double-reg) :to :save)) + (:arg-types double-float double-float) + (:results (r :scs (complex-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-stack)))) + (:result-types complex-double-float) + (:note "inline complex double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-double-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fstd (ea-for-cdf-real-stack r))) + (t + (inst fxch real) + (inst fstd (ea-for-cdf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fstd (ea-for-cdf-imag-stack r)) + (inst fxch imag))))) + +#!+long-float +(define-vop (make-complex-long-float) + (:translate complex) + (:args (real :scs (long-reg) :target r + :load-if (not (location= real r))) + (imag :scs (long-reg) :to :save)) + (:arg-types long-float long-float) + (:results (r :scs (complex-long-reg) :from (:argument 0) + :load-if (not (sc-is r complex-long-stack)))) + (:result-types complex-long-float) + (:note "inline complex long-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-long-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-long-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (store-long-float (ea-for-clf-real-stack r))) + (t + (inst fxch real) + (store-long-float (ea-for-clf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (store-long-float (ea-for-clf-imag-stack r)) + (inst fxch imag))))) + + +(define-vop (complex-float-value) + (:args (x :target r)) + (:results (r)) + (:variant-vars offset) + (:policy :fast-safe) + (:generator 3 + (cond ((sc-is x complex-single-reg complex-double-reg + #!+long-float complex-long-reg) + (let ((value-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (+ offset (tn-offset x))))) + (unless (location= value-tn r) + (cond ((zerop (tn-offset r)) + (copy-fp-reg-to-fr0 value-tn)) + ((zerop (tn-offset value-tn)) + (inst fstd r)) + (t + (inst fxch value-tn) + (inst fstd r) + (inst fxch value-tn)))))) + ((sc-is r single-reg) + (let ((ea (sc-case x + (complex-single-stack + (ecase offset + (0 (ea-for-csf-real-stack x)) + (1 (ea-for-csf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-csf-real-desc x)) + (1 (ea-for-csf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fld ea)))) + ((sc-is r double-reg) + (let ((ea (sc-case x + (complex-double-stack + (ecase offset + (0 (ea-for-cdf-real-stack x)) + (1 (ea-for-cdf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-cdf-real-desc x)) + (1 (ea-for-cdf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldd ea)))) + #!+long-float + ((sc-is r long-reg) + (let ((ea (sc-case x + (complex-long-stack + (ecase offset + (0 (ea-for-clf-real-stack x)) + (1 (ea-for-clf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-clf-real-desc x)) + (1 (ea-for-clf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldl ea)))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) + +(define-vop (realpart/complex-single-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (realpart/complex-double-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float realpart") + (:variant 0)) + +#!+long-float +(define-vop (realpart/complex-long-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (imagpart/complex-single-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float imagpart") + (:variant 1)) + +(define-vop (imagpart/complex-double-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float imagpart") + (:variant 1)) + +#!+long-float +(define-vop (imagpart/complex-long-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float imagpart") + (:variant 1)) + +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations +(defknown double-float-reg-bias (double-float) (values)) +(define-vop (double-float-reg-bias) + (:translate double-float-reg-bias) + (:args (x :scs (double-reg double-stack) :load-if nil)) + (:arg-types double-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) +(defknown single-float-reg-bias (single-float) (values)) +(define-vop (single-float-reg-bias) + (:translate single-float-reg-bias) + (:args (x :scs (single-reg single-stack) :load-if nil)) + (:arg-types single-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) diff --git a/sbcl-src/src/compiler/x86/float.lisp b/sbcl-src/src/compiler/x86/float.lisp new file mode 100644 index 0000000..11976c3 --- /dev/null +++ b/sbcl-src/src/compiler/x86/float.lisp @@ -0,0 +1,4429 @@ +;;;; floating point support for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(macrolet ((ea-for-xf-desc (tn slot) + `(make-ea + :dword :base ,tn + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) + (defun ea-for-sf-desc (tn) + (ea-for-xf-desc tn single-float-value-slot)) + (defun ea-for-df-desc (tn) + (ea-for-xf-desc tn double-float-value-slot)) + #!+long-float + (defun ea-for-lf-desc (tn) + (ea-for-xf-desc tn long-float-value-slot)) + ;; complex floats + (defun ea-for-csf-real-desc (tn) + (ea-for-xf-desc tn complex-single-float-real-slot)) + (defun ea-for-csf-imag-desc (tn) + (ea-for-xf-desc tn complex-single-float-imag-slot)) + (defun ea-for-cdf-real-desc (tn) + (ea-for-xf-desc tn complex-double-float-real-slot)) + (defun ea-for-cdf-imag-desc (tn) + (ea-for-xf-desc tn complex-double-float-imag-slot)) + #!+long-float + (defun ea-for-clf-real-desc (tn) + (ea-for-xf-desc tn complex-long-float-real-slot)) + #!+long-float + (defun ea-for-clf-imag-desc (tn) + (ea-for-xf-desc tn complex-long-float-imag-slot))) + +(macrolet ((ea-for-xf-stack (tn kind) + `(make-ea + :dword :base ebp-tn + :disp (- (* (+ (tn-offset ,tn) + (ecase ,kind (:single 1) (:double 2) (:long 3))) + n-word-bytes))))) + (defun ea-for-sf-stack (tn) + (ea-for-xf-stack tn :single)) + (defun ea-for-df-stack (tn) + (ea-for-xf-stack tn :double)) + #!+long-float + (defun ea-for-lf-stack (tn) + (ea-for-xf-stack tn :long))) + +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +;;; +;;; Until 2004-03-15, the implementation of this was buggy; it +;;; unconditionally emitted the WAIT instruction. It turns out that +;;; this is the right thing to do anyway; omitting them can lead to +;;; system corruption on conforming code. -- CSR +(defun maybe-fp-wait (node &optional note-next-instruction) + (declare (ignore node)) + #+nil + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base) + `(make-ea + :dword :base ,base + :disp (- (* (+ (tn-offset ,tn) + (* (ecase ,kind + (:single 1) + (:double 2) + (:long 3)) + (ecase ,slot (:real 1) (:imag 2)))) + n-word-bytes))))) + (defun ea-for-csf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :real base)) + (defun ea-for-csf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :imag base)) + (defun ea-for-cdf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :real base)) + (defun ea-for-cdf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :imag base)) + #!+long-float + (defun ea-for-clf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :real base)) + #!+long-float + (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :imag base))) + +;;; Abstract out the copying of a FP register to the FP stack top, and +;;; provide two alternatives for its implementation. Note: it's not +;;; necessary to distinguish between a single or double register move +;;; here. +;;; +;;; Using a Pop then load. +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fstp fr0-tn) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset reg))))) +;;; Using Fxch then Fst to restore the original reg contents. +#+nil +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fxch reg) + (inst fst reg)) + +;;; The x86 can't store a long-float to memory without popping the +;;; stack and marking a register as empty, so it is necessary to +;;; restore the register from memory. +#!+long-float +(defun store-long-float (ea) + (inst fstpl ea) + (inst fldl ea)) + +;;;; move functions + +;;; X is source, Y is destination. +(define-move-fun (load-single 2) (vop x y) + ((single-stack) (single-reg)) + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-stack x)))) + +(define-move-fun (store-single 2) (vop x y) + ((single-reg) (single-stack)) + (cond ((zerop (tn-offset x)) + (inst fst (ea-for-sf-stack y))) + (t + (inst fxch x) + (inst fst (ea-for-sf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +(define-move-fun (load-double 2) (vop x y) + ((double-stack) (double-reg)) + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-stack x)))) + +(define-move-fun (store-double 2) (vop x y) + ((double-reg) (double-stack)) + (cond ((zerop (tn-offset x)) + (inst fstd (ea-for-df-stack y))) + (t + (inst fxch x) + (inst fstd (ea-for-df-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +#!+long-float +(define-move-fun (load-long 2) (vop x y) + ((long-stack) (long-reg)) + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-stack x)))) + +#!+long-float +(define-move-fun (store-long 2) (vop x y) + ((long-reg) (long-stack)) + (cond ((zerop (tn-offset x)) + (store-long-float (ea-for-lf-stack y))) + (t + (inst fxch x) + (store-long-float (ea-for-lf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* + #!+long-float 'long-float #!-long-float 'double-float)) +(define-move-fun (load-fp-constant 2) (vop x y) + ((fp-constant) (single-reg double-reg #!+long-float long-reg)) + (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) + (with-empty-tn@fp-top(y) + (cond ((zerop value) + (inst fldz)) + ((= value 1e0) + (inst fld1)) + ((= value (coerce pi *read-default-float-format*)) + (inst fldpi)) + ((= value (log 10e0 2e0)) + (inst fldl2t)) + ((= value (log 2.718281828459045235360287471352662e0 2e0)) + (inst fldl2e)) + ((= value (log 2e0 10e0)) + (inst fldlg2)) + ((= value (log 2e0 2.718281828459045235360287471352662e0)) + (inst fldln2)) + (t (warn "ignoring bogus i387 constant ~A" value)))))) +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'single-float)) + +;;;; complex float move functions + +(defun complex-single-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (tn-offset x))) +(defun complex-single-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (1+ (tn-offset x)))) + +(defun complex-double-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (tn-offset x))) +(defun complex-double-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (1+ (tn-offset x)))) + +#!+long-float +(defun complex-long-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (tn-offset x))) +#!+long-float +(defun complex-long-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (1+ (tn-offset x)))) + +;;; X is source, Y is destination. +(define-move-fun (load-complex-single 2) (vop x y) + ((complex-single-stack) (complex-single-reg)) + (let ((real-tn (complex-single-reg-real-tn y))) + (with-empty-tn@fp-top (real-tn) + (inst fld (ea-for-csf-real-stack x)))) + (let ((imag-tn (complex-single-reg-imag-tn y))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (ea-for-csf-imag-stack x))))) + +(define-move-fun (store-complex-single 2) (vop x y) + ((complex-single-reg) (complex-single-stack)) + (let ((real-tn (complex-single-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fst (ea-for-csf-real-stack y))) + (t + (inst fxch real-tn) + (inst fst (ea-for-csf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fst (ea-for-csf-imag-stack y)) + (inst fxch imag-tn))) + +(define-move-fun (load-complex-double 2) (vop x y) + ((complex-double-stack) (complex-double-reg)) + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldd (ea-for-cdf-real-stack x)))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldd (ea-for-cdf-imag-stack x))))) + +(define-move-fun (store-complex-double 2) (vop x y) + ((complex-double-reg) (complex-double-stack)) + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fstd (ea-for-cdf-real-stack y))) + (t + (inst fxch real-tn) + (inst fstd (ea-for-cdf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fstd (ea-for-cdf-imag-stack y)) + (inst fxch imag-tn))) + +#!+long-float +(define-move-fun (load-complex-long 2) (vop x y) + ((complex-long-stack) (complex-long-reg)) + (let ((real-tn (complex-long-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldl (ea-for-clf-real-stack x)))) + (let ((imag-tn (complex-long-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldl (ea-for-clf-imag-stack x))))) + +#!+long-float +(define-move-fun (store-complex-long 2) (vop x y) + ((complex-long-reg) (complex-long-stack)) + (let ((real-tn (complex-long-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (store-long-float (ea-for-clf-real-stack y))) + (t + (inst fxch real-tn) + (store-long-float (ea-for-clf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (inst fxch imag-tn) + (store-long-float (ea-for-clf-imag-stack y)) + (inst fxch imag-tn))) + + +;;;; move VOPs + +;;; float register to register moves +(define-vop (float-move) + (:args (x)) + (:results (y)) + (:note "float move") + (:generator 0 + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x)))))) + +(define-vop (single-move float-move) + (:args (x :scs (single-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (single-reg) :load-if (not (location= x y))))) +(define-move-vop single-move :move (single-reg) (single-reg)) + +(define-vop (double-move float-move) + (:args (x :scs (double-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (double-reg) :load-if (not (location= x y))))) +(define-move-vop double-move :move (double-reg) (double-reg)) + +#!+long-float +(define-vop (long-move float-move) + (:args (x :scs (long-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop long-move :move (long-reg) (long-reg)) + +;;; complex float register to register moves +(define-vop (complex-float-move) + (:args (x :target y :load-if (not (location= x y)))) + (:results (y :load-if (not (location= x y)))) + (:note "complex float move") + (:generator 0 + (unless (location= x y) + ;; Note the complex-float-regs are aligned to every second + ;; float register so there is not need to worry about overlap. + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag))))) + +(define-vop (complex-single-move complex-float-move) + (:args (x :scs (complex-single-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))) +(define-move-vop complex-single-move :move + (complex-single-reg) (complex-single-reg)) + +(define-vop (complex-double-move complex-float-move) + (:args (x :scs (complex-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))) +(define-move-vop complex-double-move :move + (complex-double-reg) (complex-double-reg)) + +#!+long-float +(define-vop (complex-long-move complex-float-move) + (:args (x :scs (complex-long-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop complex-long-move :move + (complex-long-reg) (complex-long-reg)) + +;;; Move from float to a descriptor reg. allocating a new float +;;; object in the process. +(define-vop (move-from-single) + (:args (x :scs (single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + single-float-widetag + single-float-size node) + (with-tn@fp-top(x) + (inst fst (ea-for-sf-desc y)))))) +(define-move-vop move-from-single :move + (single-reg) (descriptor-reg)) + +(define-vop (move-from-double) + (:args (x :scs (double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + double-float-widetag + double-float-size + node) + (with-tn@fp-top(x) + (inst fstd (ea-for-df-desc y)))))) +(define-move-vop move-from-double :move + (double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-long) + (:args (x :scs (long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + long-float-widetag + long-float-size + node) + (with-tn@fp-top(x) + (store-long-float (ea-for-lf-desc y)))))) +#!+long-float +(define-move-vop move-from-long :move + (long-reg) (descriptor-reg)) + +(define-vop (move-from-fp-constant) + (:args (x :scs (fp-constant))) + (:results (y :scs (descriptor-reg))) + (:generator 2 + (ecase (sb!c::constant-value (sb!c::tn-leaf x)) + (0f0 (load-symbol-value y *fp-constant-0f0*)) + (1f0 (load-symbol-value y *fp-constant-1f0*)) + (0d0 (load-symbol-value y *fp-constant-0d0*)) + (1d0 (load-symbol-value y *fp-constant-1d0*)) + #!+long-float + (0l0 (load-symbol-value y *fp-constant-0l0*)) + #!+long-float + (1l0 (load-symbol-value y *fp-constant-1l0*)) + #!+long-float + (#.pi (load-symbol-value y *fp-constant-pi*)) + #!+long-float + (#.(log 10l0 2l0) (load-symbol-value y *fp-constant-l2t*)) + #!+long-float + (#.(log 2.718281828459045235360287471352662L0 2l0) + (load-symbol-value y *fp-constant-l2e*)) + #!+long-float + (#.(log 2l0 10l0) (load-symbol-value y *fp-constant-lg2*)) + #!+long-float + (#.(log 2l0 2.718281828459045235360287471352662L0) + (load-symbol-value y *fp-constant-ln2*))))) +(define-move-vop move-from-fp-constant :move + (fp-constant) (descriptor-reg)) + +;;; Move from a descriptor to a float register. +(define-vop (move-to-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (single-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-desc x))))) +(define-move-vop move-to-single :move (descriptor-reg) (single-reg)) + +(define-vop (move-to-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-desc x))))) +(define-move-vop move-to-double :move (descriptor-reg) (double-reg)) + +#!+long-float +(define-vop (move-to-long) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (long-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-desc x))))) +#!+long-float +(define-move-vop move-to-long :move (descriptor-reg) (long-reg)) + +;;; Move from complex float to a descriptor reg. allocating a new +;;; complex float object in the process. +(define-vop (move-from-complex-single) + (:args (x :scs (complex-single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-single-float-widetag + complex-single-float-size + node) + (let ((real-tn (complex-single-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fst (ea-for-csf-real-desc y)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fst (ea-for-csf-imag-desc y))))))) +(define-move-vop move-from-complex-single :move + (complex-single-reg) (descriptor-reg)) + +(define-vop (move-from-complex-double) + (:args (x :scs (complex-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-double-float-widetag + complex-double-float-size + node) + (let ((real-tn (complex-double-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fstd (ea-for-cdf-real-desc y)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fstd (ea-for-cdf-imag-desc y))))))) +(define-move-vop move-from-complex-double :move + (complex-double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-complex-long) + (:args (x :scs (complex-long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-long-float-widetag + complex-long-float-size + node) + (let ((real-tn (complex-long-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (store-long-float (ea-for-clf-real-desc y)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (store-long-float (ea-for-clf-imag-desc y))))))) +#!+long-float +(define-move-vop move-from-complex-long :move + (complex-long-reg) (descriptor-reg)) + +;;; Move from a descriptor to a complex float register. +(macrolet ((frob (name sc format) + `(progn + (define-vop (,name) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (,sc))) + (:note "pointer to complex float coercion") + (:generator 2 + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-real-desc x)))) + (:double '((inst fldd (ea-for-cdf-real-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-real-desc x))))))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-imag-desc x)))) + (:double '((inst fldd (ea-for-cdf-imag-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-imag-desc x))))))))) + (define-move-vop ,name :move (descriptor-reg) (,sc))))) + (frob move-to-complex-single complex-single-reg :single) + (frob move-to-complex-double complex-double-reg :double) + #!+long-float + (frob move-to-complex-double complex-long-reg :long)) + +;;;; the move argument vops +;;;; +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. + +;;; the general MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "float argument move") + (:generator ,(case format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x))))) + (,stack-sc + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :dword :base fp :disp offset))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea)))))) + (let ((ea (make-ea + :dword :base fp + :disp (- (* (+ (tn-offset y) + ,(case format + (:single 1) + (:double 2) + (:long 3))) + n-word-bytes))))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea))))))))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-single-float-arg single-reg single-stack :single) + (frob move-double-float-arg double-reg double-stack :double) + #!+long-float + (frob move-long-float-arg long-reg long-stack :long)) + +;;;; complex float MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "complex float argument move") + (:generator ,(ecase format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag)))) + (,stack-sc + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp)))))) + (t + (inst fxch real-tn) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp))))) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + ,@(ecase format + (:single + '((inst fst (ea-for-csf-imag-stack y fp)))) + (:double + '((inst fstd (ea-for-cdf-imag-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-imag-stack y fp))))) + (inst fxch imag-tn)))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-complex-single-float-arg + complex-single-reg complex-single-stack :single) + (frob move-complex-double-float-arg + complex-double-reg complex-double-stack :double) + #!+long-float + (frob move-complex-long-float-arg + complex-long-reg complex-long-stack :long)) + +(define-move-vop move-arg :move-arg + (single-reg double-reg #!+long-float long-reg + complex-single-reg complex-double-reg #!+long-float complex-long-reg) + (descriptor-reg)) + + +;;;; arithmetic VOPs + +;;; dtc: the floating point arithmetic vops +;;; +;;; Note: Although these can accept x and y on the stack or pointed to +;;; from a descriptor register, they will work with register loading +;;; without these. Same deal with the result - it need only be a +;;; register. When load-tns are needed they will probably be in ST0 +;;; and the code below should be able to correctly handle all cases. +;;; +;;; However it seems to produce better code if all arg. and result +;;; options are used; on the P86 there is no extra cost in using a +;;; memory operand to the FP instructions - not so on the PPro. +;;; +;;; It may also be useful to handle constant args? +;;; +;;; 22-Jul-97: descriptor args lose in some simple cases when +;;; a function result computed in a loop. Then Python insists +;;; on consing the intermediate values! For example +;;; +;;; (defun test(a n) +;;; (declare (type (simple-array double-float (*)) a) +;;; (fixnum n)) +;;; (let ((sum 0d0)) +;;; (declare (type double-float sum)) +;;; (dotimes (i n) +;;; (incf sum (* (aref a i)(aref a i)))) +;;; sum)) +;;; +;;; So, disabling descriptor args until this can be fixed elsewhere. +(macrolet + ((frob (op fop-sti fopr-sti + fop fopr sname scost + fopd foprd dname dcost + lname lcost) + #!-long-float (declare (ignore lcost lname)) + `(progn + (define-vop (,sname) + (:translate ,op) + (:args (x :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval) + (y :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc single-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (single-reg single-stack))) + (:arg-types single-float single-float) + (:result-types single-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,scost + ;; Handle a few special cases + (cond + ;; x, y, and r are the same register. + ((and (sc-is x single-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch r) + (inst ,fop fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x single-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (single-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fop y)) + (single-stack + ;; ST(0) = ST(0) op Mem + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + (t + ;; y to ST0 + (sc-case y + (single-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y single-stack) + (inst fld (ea-for-sf-stack y)) + (inst fld (ea-for-sf-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y single-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (single-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,fopr x)) + (single-stack + ;; ST(0) = Mem op ST(0) + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x single-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + ;; y is in ST0 + ((and (sc-is y single-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (single-reg + (inst ,fopr x)) + (single-stack + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (copy-fp-reg-to-fr0 x)) + (single-stack + (inst fstp fr0) + (inst fld (ea-for-sf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fld (ea-for-sf-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (single-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (single-stack + (inst fst (ea-for-sf-stack r)))))))) + + (define-vop (,dname) + (:translate ,op) + (:args (x :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval) + (y :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc double-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (double-reg double-stack))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,dcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (sc-is x double-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x double-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (double-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (double-stack + ;; ST(0) = ST(0) op Mem + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + (t + ;; y to ST0 + (sc-case y + (double-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y double-stack) + (inst fldd (ea-for-df-stack y)) + (inst fldd (ea-for-df-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y double-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (double-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (double-stack + ;; ST(0) = Mem op ST(0) + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + ;; y is in ST0 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (double-reg + (inst ,foprd x)) + (double-stack + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (double-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (double-stack + (inst fstd (ea-for-df-stack r)))))))) + + #!+long-float + (define-vop (,lname) + (:translate ,op) + (:args (x :scs (long-reg) :to :eval) + (y :scs (long-reg) :to :eval)) + (:temporary (:sc long-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,lcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((location= x r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (t + ;; y to ST0 + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y)) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((location= y r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (t + ;; x to ST0 + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0. + ((zerop (tn-offset x)) + ;; ST0 = ST0 op y + (inst ,fopd y)) + ;; y is in ST0 + ((zerop (tn-offset y)) + ;; ST0 = x op ST0 + (inst ,foprd x)) + (t + ;; x to ST0 + (copy-fp-reg-to-fr0 x) + ;; ST0 = ST0 op y + (inst ,fopd y))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))))))))) + + (frob + fadd-sti fadd-sti + fadd fadd +/single-float 2 + faddd faddd +/double-float 2 + +/long-float 2) + (frob - fsub-sti fsubr-sti + fsub fsubr -/single-float 2 + fsubd fsubrd -/double-float 2 + -/long-float 2) + (frob * fmul-sti fmul-sti + fmul fmul */single-float 3 + fmuld fmuld */double-float 3 + */long-float 3) + (frob / fdiv-sti fdivr-sti + fdiv fdivr //single-float 12 + fdivd fdivrd //double-float 12 + //long-float 12)) + +(macrolet ((frob (name inst translate sc type) + `(define-vop (,name) + (:args (x :scs (,sc) :target fr0)) + (:results (y :scs (,sc))) + (:translate ,translate) + (:policy :fast-safe) + (:arg-types ,type) + (:result-types ,type) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. + (unless (zerop (tn-offset y)) + (inst fst y)))))) + + (frob abs/single-float fabs abs single-reg single-float) + (frob abs/double-float fabs abs double-reg double-float) + #!+long-float + (frob abs/long-float fabs abs long-reg long-float) + (frob %negate/single-float fchs %negate single-reg single-float) + (frob %negate/double-float fchs %negate double-reg double-float) + #!+long-float + (frob %negate/long-float fchs %negate long-reg long-float)) + +;;;; comparison + +(define-vop (=/float) + (:args (x) (y)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0; y is in any reg. + ((zerop (tn-offset x)) + (inst fucom y)) + ;; y is in ST0; x is in another reg. + ((zerop (tn-offset y)) + (inst fucom x)) + ;; x and y are the same register, not ST0 + ((location= x y) + (inst fxch x) + (inst fucom fr0-tn) + (inst fxch x)) + ;; x and y are different registers, neither ST0. + (t + (inst fxch x) + (inst fucom y) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (inst cmp ah-tn #x40) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=/single-float =/float) + (:translate =) + (:args (x :scs (single-reg)) + (y :scs (single-reg))) + (:arg-types single-float single-float)) + +(define-vop (=/double-float =/float) + (:translate =) + (:args (x :scs (double-reg)) + (y :scs (double-reg))) + (:arg-types double-float double-float)) + +#!+long-float +(define-vop (=/long-float =/float) + (:translate =) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float)) + +(define-vop (single-float) + (:translate >) + (:args (x :scs (single-reg single-stack descriptor-reg)) + (y :scs (single-reg single-stack descriptor-reg))) + (:arg-types single-float single-float) + (:temporary (:sc single-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y single-reg) (zerop (tn-offset y))) + (sc-case x + (single-reg + (inst fcom x)) + ((single-stack descriptor-reg) + (if (sc-is x single-stack) + (inst fcom (ea-for-sf-stack x)) + (inst fcom (ea-for-sf-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + (sc-case y + (single-reg + (inst fcom y)) + ((single-stack descriptor-reg) + (if (sc-is y single-stack) + (inst fcom (ea-for-sf-stack y)) + (inst fcom (ea-for-sf-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (>double-float) + (:translate >) + (:args (x :scs (double-reg double-stack descriptor-reg)) + (y :scs (double-reg double-stack descriptor-reg))) + (:arg-types double-float double-float) + (:temporary (:sc double-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (sc-case x + (double-reg + (inst fcomd x)) + ((double-stack descriptor-reg) + (if (sc-is x double-stack) + (inst fcomd (ea-for-df-stack x)) + (inst fcomd (ea-for-df-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (sc-case y + (double-reg + (inst fcomd y)) + ((double-stack descriptor-reg) + (if (sc-is y double-stack) + (inst fcomd (ea-for-df-stack y)) + (inst fcomd (ea-for-df-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +#!+long-float +(define-vop (>long-float) + (:translate >) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (cond + ;; y is in ST0; x is in any reg. + ((zerop (tn-offset y)) + (inst fcomd x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + ;; x is in ST0; y is in another reg. + ((zerop (tn-offset x)) + (inst fcomd y) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45)) + ;; y and x are the same register, not ST0 + ;; y and x are different registers, neither ST0. + (t + (inst fxch x) + (inst fcomd y) + (inst fxch x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +;;; Comparisons with 0 can use the FTST instruction. + +(define-vop (float-test) + (:args (x)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p y) + (:variant-vars code) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp y) + (:generator 2 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0 + ((zerop (tn-offset x)) + (inst ftst)) + ;; x not ST0 + (t + (inst fxch x) + (inst ftst) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (unless (zerop code) + (inst cmp ah-tn code)) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=0/single-float float-test) + (:translate =) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x40)) +(define-vop (=0/double-float float-test) + (:translate =) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x40)) +#!+long-float +(define-vop (=0/long-float float-test) + (:translate =) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x40)) + +(define-vop (<0/single-float float-test) + (:translate <) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x01)) +(define-vop (<0/double-float float-test) + (:translate <) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x01)) +#!+long-float +(define-vop (<0/long-float float-test) + (:translate <) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x01)) + +(define-vop (>0/single-float float-test) + (:translate >) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x00)) +(define-vop (>0/double-float float-test) + (:translate >) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x00)) +#!+long-float +(define-vop (>0/long-float float-test) + (:translate >) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x00)) + +#!+long-float +(deftransform eql ((x y) (long-float long-float)) + `(and (= (long-float-low-bits x) (long-float-low-bits y)) + (= (long-float-high-bits x) (long-float-high-bits y)) + (= (long-float-exp-bits x) (long-float-exp-bits y)))) + +;;;; conversion + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc signed-stack) temp) + (:results (y :scs (,to-sc))) + (:arg-types signed-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case x + (signed-reg + (inst mov temp x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild temp))) + (signed-stack + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild x)))))))) + (frob %single-float/signed %single-float single-reg single-float) + (frob %double-float/signed %double-float double-reg double-float) + #!+long-float + (frob %long-float/signed %long-float long-reg long-float)) + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (unsigned-reg))) + (:results (y :scs (,to-sc))) + (:arg-types unsigned-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 6 + (inst push 0) + (inst push x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fildl (make-ea :dword :base esp-tn))) + (inst add esp-tn 8))))) + (frob %single-float/unsigned %single-float single-reg single-float) + (frob %double-float/unsigned %double-float double-reg double-float) + #!+long-float + (frob %long-float/unsigned %long-float long-reg long-float)) + +;;; These should be no-ops but the compiler might want to move some +;;; things around. +(macrolet ((frob (name translate from-sc from-type to-sc to-type) + `(define-vop (,name) + (:args (x :scs (,from-sc) :target y)) + (:results (y :scs (,to-sc))) + (:arg-types ,from-type) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 2 + (note-this-location vop :internal-error) + (unless (location= x y) + (cond + ((zerop (tn-offset x)) + ;; x is in ST0, y is in another reg. not ST0 + (inst fst y)) + ((zerop (tn-offset y)) + ;; y is in ST0, x is in another reg. not ST0 + (copy-fp-reg-to-fr0 x)) + (t + ;; Neither x or y are in ST0, and they are not in + ;; the same reg. + (inst fxch x) + (inst fst y) + (inst fxch x)))))))) + + (frob %single-float/double-float %single-float double-reg + double-float single-reg single-float) + #!+long-float + (frob %single-float/long-float %single-float long-reg + long-float single-reg single-float) + (frob %double-float/single-float %double-float single-reg single-float + double-reg double-float) + #!+long-float + (frob %double-float/long-float %double-float long-reg long-float + double-reg double-float) + #!+long-float + (frob %long-float/single-float %long-float single-reg single-float + long-reg long-float) + #!+long-float + (frob %long-float/double-float %long-float double-reg double-float + long-reg long-float)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type)) + (:args (x :scs (,from-sc))) + (:temporary (:sc signed-stack) stack-temp) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (signed-reg))) + (:arg-types ,from-type) + (:result-types signed-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + (,(if round-p 'progn 'pseudo-atomic) + ;; Normal mode (for now) is "round to best". + (with-tn@fp-top (x) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (sc-case y + (signed-stack + (inst fist y)) + (signed-reg + (inst fist stack-temp) + (inst mov y stack-temp))) + ,@(unless round-p + '((inst fldcw scw))))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type "=>UNSIGNED")) + (:args (x :scs (,from-sc) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) stack-temp) + (:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (unsigned-reg))) + (:arg-types ,from-type) + (:result-types unsigned-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + ;; Normal mode (for now) is "round to best". + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (inst sub esp-tn 8) + (inst fistpl (make-ea :dword :base esp-tn)) + (inst pop y) + (inst fld fr0) ; copy fr0 to at least restore stack. + (inst add esp-tn 4) + ,@(unless round-p + '((inst fldcw scw))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(define-vop (make-single-float) + (:args (bits :scs (signed-reg) :target res + :load-if (not (or (and (sc-is bits signed-stack) + (sc-is res single-reg)) + (and (sc-is bits signed-stack) + (sc-is res single-stack) + (location= bits res)))))) + (:results (res :scs (single-reg single-stack))) + (:temporary (:sc signed-stack) stack-temp) + (:arg-types signed-num) + (:result-types single-float) + (:translate make-single-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case res + (single-stack + (sc-case bits + (signed-reg + (inst mov res bits)) + (signed-stack + (aver (location= bits res))))) + (single-reg + (sc-case bits + (signed-reg + ;; source must be in memory + (inst mov stack-temp bits) + (with-empty-tn@fp-top(res) + (inst fld stack-temp))) + (signed-stack + (with-empty-tn@fp-top(res) + (inst fld bits)))))))) + +(define-vop (make-double-float) + (:args (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (double-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types signed-num unsigned-num) + (:result-types double-float) + (:translate make-double-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 2 + (let ((offset (1+ (tn-offset temp)))) + (storew hi-bits ebp-tn (- offset)) + (storew lo-bits ebp-tn (- (1+ offset))) + (with-empty-tn@fp-top(res) + (inst fldd (make-ea :dword :base ebp-tn + :disp (- (* (1+ offset) n-word-bytes)))))))) + +#!+long-float +(define-vop (make-long-float) + (:args (exp-bits :scs (signed-reg)) + (hi-bits :scs (unsigned-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (long-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types signed-num unsigned-num unsigned-num) + (:result-types long-float) + (:translate make-long-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 3 + (let ((offset (1+ (tn-offset temp)))) + (storew exp-bits ebp-tn (- offset)) + (storew hi-bits ebp-tn (- (1+ offset))) + (storew lo-bits ebp-tn (- (+ offset 2))) + (with-empty-tn@fp-top(res) + (inst fldl (make-ea :dword :base ebp-tn + :disp (- (* (+ offset 2) n-word-bytes)))))))) + +(define-vop (single-float-bits) + (:args (float :scs (single-reg descriptor-reg) + :load-if (not (sc-is float single-stack)))) + (:results (bits :scs (signed-reg))) + (:temporary (:sc signed-stack :from :argument :to :result) stack-temp) + (:arg-types single-float) + (:result-types signed-num) + (:translate single-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case bits + (signed-reg + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst stack-temp) + (inst mov bits stack-temp))) + (single-stack + (inst mov bits float)) + (descriptor-reg + (loadw + bits float single-float-value-slot + other-pointer-lowtag)))) + (signed-stack + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst bits)))))))) + +(define-vop (double-float-high-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (hi-bits :scs (signed-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types signed-num) + (:translate double-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw hi-bits ebp-tn (- (1+ (tn-offset temp))))) + (double-stack + (loadw hi-bits ebp-tn (- (1+ (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + other-pointer-lowtag))))) + +(define-vop (double-float-low-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types unsigned-num) + (:translate double-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset temp))))) + (double-stack + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw lo-bits float double-float-value-slot + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-exp-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (exp-bits :scs (signed-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types signed-num) + (:translate long-float-exp-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset temp))) n-word-bytes)))) + (long-stack + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset float))) n-word-bytes)))) + (descriptor-reg + (inst movsx exp-bits + (make-ea :word :base float + :disp (- (* (+ 2 long-float-value-slot) + n-word-bytes) + other-pointer-lowtag))))))) + +#!+long-float +(define-vop (long-float-high-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (hi-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw hi-bits ebp-tn (- (+ (tn-offset temp) 2)))) + (long-stack + (loadw hi-bits ebp-tn (- (+ (tn-offset float) 2)))) + (descriptor-reg + (loadw hi-bits float (1+ long-float-value-slot) + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-low-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw lo-bits ebp-tn (- (+ (tn-offset temp) 3)))) + (long-stack + (loadw lo-bits ebp-tn (- (+ (tn-offset float) 3)))) + (descriptor-reg + (loadw lo-bits float long-float-value-slot + other-pointer-lowtag))))) + +;;;; float mode hackery + +(sb!xc:deftype float-modes () '(unsigned-byte 32)) ; really only 16 +(defknown floating-point-modes () float-modes (flushable)) +(defknown ((setf floating-point-modes)) (float-modes) + float-modes) + +(def!constant npx-env-size (* 7 n-word-bytes)) +(def!constant npx-cw-offset 0) +(def!constant npx-sw-offset 4) + +(define-vop (floating-point-modes) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset :target res + :to :result) eax) + (:generator 8 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions + (inst fstenv (make-ea :dword :base esp-tn)) ; masks all exceptions + (inst fldenv (make-ea :dword :base esp-tn)) ; Restore previous state. + ;; Move current status to high word. + (inst mov eax (make-ea :dword :base esp-tn :disp (- npx-sw-offset 2))) + ;; Move exception mask to low word. + (inst mov ax-tn (make-ea :word :base esp-tn :disp npx-cw-offset)) + (inst add esp-tn npx-env-size) ; Pop stack. + (inst xor eax #x3f) ; Flip exception mask to trap enable bits. + (move res eax))) + +(define-vop (set-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:results (res :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:result-types unsigned-num) + (:translate (setf floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:generator 3 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions. + (inst fstenv (make-ea :dword :base esp-tn)) + (inst mov eax new) + (inst xor eax #x3f) ; Turn trap enable bits into exception mask. + (inst mov (make-ea :word :base esp-tn :disp npx-cw-offset) ax-tn) + (inst shr eax 16) ; position status word + (inst mov (make-ea :word :base esp-tn :disp npx-sw-offset) ax-tn) + (inst fldenv (make-ea :dword :base esp-tn)) + (inst add esp-tn npx-env-size) ; Pop stack. + (move res new))) + +#!-long-float +(progn + +;;; Let's use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of fsin and fcos that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; KLUDGE: these versions of fsin, fcos, and ftan simply load a 0.0 +;;; result if the argument is out of range 2^63 and would thus be +;;; hopelessly inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; %exp that handles the following special cases: exp(+Inf) is +Inf; +;;; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (double-reg))) + (:arg-types double-float signed-num) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (double-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (double-reg) :to :result)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p with limited x range function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 4 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr1) + (y :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y)) + (sc-is x double-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (zerop (tn-offset x))) + ;; copy x to fr1 + (inst fst fr1)) + ;; y in fr0; x not in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))) + ;; Load y to fr0 + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) +) ; PROGN #!-LONG-FLOAT + +#!+long-float +(progn + +;;; Lets use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of FSIN and FCOS that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if +;;; the argument is out of range 2^63 and would thus be hopelessly +;;; inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fldz) ; Load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; Modified exp that handles the following special cases: +;;; exp(+Inf) is +Inf; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (long-reg))) + (:arg-types long-float signed-num) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (long-reg) :to :result)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + ;; FIXME 1: This appears to be the second DEFINE-VOP of FLOG1P. + ;; Perhaps this is OK because of the #!+LONG-FLOAT wrapped around + ;; an enormous PROGN above. Still, it would be probably be good to + ;; add some code to warn about redefining VOPs. + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p function") + (:generator 5 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr1) + (y :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y)) + (sc-is x long-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; y in fr0; x not in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))) + ;; Load y to fr0 + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +) ; PROGN #!+LONG-FLOAT + +;;;; complex float VOPs + +(define-vop (make-complex-single-float) + (:translate complex) + (:args (real :scs (single-reg) :to :result :target r + :load-if (not (location= real r))) + (imag :scs (single-reg) :to :save)) + (:arg-types single-float single-float) + (:results (r :scs (complex-single-reg) :from (:argument 0) + :load-if (not (sc-is r complex-single-stack)))) + (:result-types complex-single-float) + (:note "inline complex single-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-single-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-single-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fst (ea-for-csf-real-stack r))) + (t + (inst fxch real) + (inst fst (ea-for-csf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fst (ea-for-csf-imag-stack r)) + (inst fxch imag))))) + +(define-vop (make-complex-double-float) + (:translate complex) + (:args (real :scs (double-reg) :target r + :load-if (not (location= real r))) + (imag :scs (double-reg) :to :save)) + (:arg-types double-float double-float) + (:results (r :scs (complex-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-stack)))) + (:result-types complex-double-float) + (:note "inline complex double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-double-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fstd (ea-for-cdf-real-stack r))) + (t + (inst fxch real) + (inst fstd (ea-for-cdf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fstd (ea-for-cdf-imag-stack r)) + (inst fxch imag))))) + +#!+long-float +(define-vop (make-complex-long-float) + (:translate complex) + (:args (real :scs (long-reg) :target r + :load-if (not (location= real r))) + (imag :scs (long-reg) :to :save)) + (:arg-types long-float long-float) + (:results (r :scs (complex-long-reg) :from (:argument 0) + :load-if (not (sc-is r complex-long-stack)))) + (:result-types complex-long-float) + (:note "inline complex long-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-long-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-long-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (store-long-float (ea-for-clf-real-stack r))) + (t + (inst fxch real) + (store-long-float (ea-for-clf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (store-long-float (ea-for-clf-imag-stack r)) + (inst fxch imag))))) + + +(define-vop (complex-float-value) + (:args (x :target r)) + (:results (r)) + (:variant-vars offset) + (:policy :fast-safe) + (:generator 3 + (cond ((sc-is x complex-single-reg complex-double-reg + #!+long-float complex-long-reg) + (let ((value-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (+ offset (tn-offset x))))) + (unless (location= value-tn r) + (cond ((zerop (tn-offset r)) + (copy-fp-reg-to-fr0 value-tn)) + ((zerop (tn-offset value-tn)) + (inst fstd r)) + (t + (inst fxch value-tn) + (inst fstd r) + (inst fxch value-tn)))))) + ((sc-is r single-reg) + (let ((ea (sc-case x + (complex-single-stack + (ecase offset + (0 (ea-for-csf-real-stack x)) + (1 (ea-for-csf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-csf-real-desc x)) + (1 (ea-for-csf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fld ea)))) + ((sc-is r double-reg) + (let ((ea (sc-case x + (complex-double-stack + (ecase offset + (0 (ea-for-cdf-real-stack x)) + (1 (ea-for-cdf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-cdf-real-desc x)) + (1 (ea-for-cdf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldd ea)))) + #!+long-float + ((sc-is r long-reg) + (let ((ea (sc-case x + (complex-long-stack + (ecase offset + (0 (ea-for-clf-real-stack x)) + (1 (ea-for-clf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-clf-real-desc x)) + (1 (ea-for-clf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldl ea)))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) + +(define-vop (realpart/complex-single-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (realpart/complex-double-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float realpart") + (:variant 0)) + +#!+long-float +(define-vop (realpart/complex-long-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (imagpart/complex-single-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float imagpart") + (:variant 1)) + +(define-vop (imagpart/complex-double-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float imagpart") + (:variant 1)) + +#!+long-float +(define-vop (imagpart/complex-long-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float imagpart") + (:variant 1)) + +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations +(defknown double-float-reg-bias (double-float) (values)) +(define-vop (double-float-reg-bias) + (:translate double-float-reg-bias) + (:args (x :scs (double-reg double-stack) :load-if nil)) + (:arg-types double-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) +(defknown single-float-reg-bias (single-float) (values)) +(define-vop (single-float-reg-bias) + (:translate single-float-reg-bias) + (:args (x :scs (single-reg single-stack) :load-if nil)) + (:arg-types single-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) + + +;; XMM Moves + + +(defun ea-for-xmm-desc (tn) + (make-ea :xmmword :base tn + :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) + +(defun ea-for-xmm-stack (tn) + (make-ea :xmmword :base ebp-tn + :disp (- (* (+ (tn-offset tn) + 4) + n-word-bytes)))) + +(define-move-fun (load-xmm 2) (vop x y) + ((xmm-stack) (xmm-reg)) + (inst movdqu y (ea-for-xmm-stack x))) + +(define-move-fun (store-xmm 2) (vop x y) + ((xmm-reg) (xmm-stack)) + (inst movdqu (ea-for-xmm-stack y) x)) + +(define-move-fun (load-xmm-single 2) (vop x y) + ((single-stack) (xmm-reg)) + (inst movss y (ea-for-sf-stack x))) + +(define-move-fun (store-xmm-single 2) (vop x y) + ((xmm-reg) (single-stack)) + (inst movss (ea-for-sf-stack y) x)) + + +(define-vop (%load-xmm-from-array/single-float) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) + (:results (dest :scs (xmm-reg))) + (:result-types xmm) + (:generator 1 + (inst shl index 2) + (inst movdqu dest (make-ea :xmmword :base src :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) + + +(define-vop (%store-xmm-to-array/single-float) + (:policy :fast-safe) + (:args (dest :scs (descriptor-reg)) + (index :scs (unsigned-reg)) + (src :scs (xmm-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) + (:generator 1 + (inst shl index 2) + (inst movdqu (make-ea :xmmword :base dest :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) + src))) + + +(define-vop (xmm-move) + (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) + (:note "xmm move") + (:generator 0 + (unless (location= x y) + (inst movdqa y x)))) + +(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) + +(define-vop (move-from-xmm) + (:args (x :scs (xmm-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "xmm to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + xmm-widetag + xmm-size node) + (inst movdqu (ea-for-xmm-desc y) x)))) + +(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) + +(define-vop (move-to-xmm) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (xmm-reg))) + (:note "pointer to xmm coercion") + (:generator 2 + (inst movdqu y (ea-for-xmm-desc x)))) + +(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) + + +(define-vop (move-xmm-arg) + (:args (x :scs (xmm-reg) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y xmm-reg)))) + (:results (y)) + (:note "xmm argument move") + (:generator 6 + (sc-case y + (xmm-reg + (unless (location= x y) + (inst movdqa y x))) + + (xmm-stack + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :xmmword :base fp :disp offset))) + (inst movdqu ea x)) + + (let ((ea (make-ea :xmmword :base fp + :disp (- (* (+ (tn-offset y) 4) + n-word-bytes))))) + (inst movdqu ea x))))))) + +(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) + +(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) + + diff --git a/sbcl-src/src/compiler/x86/vm.lisp b/sbcl-src/src/compiler/x86/vm.lisp index d3fde75..168f5e1 100644 --- a/sbcl-src/src/compiler/x86/vm.lisp +++ b/sbcl-src/src/compiler/x86/vm.lisp @@ -200,6 +200,7 @@ (sap-stack stack) ; System area pointers. (single-stack stack) ; single-floats (double-stack stack :element-size 2) ; double-floats. + (xmm-stack stack :element-size 4) ; xmm #!+long-float (long-stack stack :element-size 3) ; long-floats. (complex-single-stack stack :element-size 2) ; complex-single-floats @@ -304,6 +305,12 @@ :save-p t :alternate-scs (double-stack)) + ;; non-descriptor XMMs + (xmm-reg xmm-registers + :locations #.*xmm-regs* + :save-p t + :alternate-scs (xmm-stack)) + ;; non-descriptor LONG-FLOATs #!+long-float (long-reg float-registers @@ -334,8 +341,6 @@ :save-p t :alternate-scs (complex-long-stack)) - (xmm-reg xmm-registers - :locations #.*xmm-regs*) ;; a catch or unwind block (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) @@ -376,7 +381,8 @@ (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) - (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) + (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + (def-misc-reg-tns xmm-reg xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) ;;; TNs for registers used to pass arguments (defparameter *register-arg-tns* diff --git a/sse-matrix.lisp b/sse-matrix.lisp index c38deb9..c0f5976 100644 --- a/sse-matrix.lisp +++ b/sse-matrix.lisp @@ -51,14 +51,14 @@ http://developer.intel.com/design/pentiumiii/sml/24504501.pdf SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) - (:TEMPORARY (:SC SSE-REG) X0) - (:TEMPORARY (:SC SSE-REG) X1) - (:TEMPORARY (:SC SSE-REG) X2) - (:TEMPORARY (:SC SSE-REG) X3) - (:TEMPORARY (:SC SSE-REG) X4) - (:TEMPORARY (:SC SSE-REG) X5) - (:TEMPORARY (:SC SSE-REG) X6) - (:TEMPORARY (:SC SSE-REG) X7) + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + (:TEMPORARY (:SC XMM-REG) X3) + (:TEMPORARY (:SC XMM-REG) X4) + (:TEMPORARY (:SC XMM-REG) X5) + (:TEMPORARY (:SC XMM-REG) X6) + (:TEMPORARY (:SC XMM-REG) X7) (:GENERATOR 10 (inst movss x2 (vect-ea mat2 32)) diff --git a/sse-moves.lisp b/sse-moves.lisp new file mode 100644 index 0000000..389de4e --- /dev/null +++ b/sse-moves.lisp @@ -0,0 +1,236 @@ +(in-package :sb-vm) + +(defun ea-for-xmm-desc (tn) + (make-ea :xmmword :base tn + :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) + +(defun ea-for-xmm-stack (tn) + (make-ea :xmmword :base ebp-tn + :disp (- (* (+ (tn-offset tn) + 4) + n-word-bytes)))) + +(define-move-fun (load-xmm 2) (vop x y) + ((xmm-stack) (xmm-reg)) + (inst movdqu y (ea-for-xmm-stack x))) + +(define-move-fun (store-xmm 2) (vop x y) + ((xmm-reg) (xmm-stack)) + (inst movdqu (ea-for-xmm-stack y) x)) + +(define-move-fun (load-xmm-single 2) (vop x y) + ((single-stack) (xmm-reg)) + (inst movss y (ea-for-sf-stack x))) + +(define-move-fun (store-xmm-single 2) (vop x y) + ((xmm-reg) (single-stack)) + (inst movss (ea-for-sf-stack y) x)) + + +(define-vop (data-vector-ref/simple-array-single-float/xmm) + (:note "array to xmm access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types simple-array-single-float positive-fixnum) + (:results (value :scs (xmm-reg))) + (:result-types xmm) + (:generator 5 + (inst movdqu value + (make-ea :xmmword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-array-single-float/xmm) + (:note "array to xmm access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-single-float (:constant (signed-byte 30))) + (:results (value :scs (xmm-reg))) + (:result-types xmm) + (:generator 4 + (inst movdqu value (make-ea :xmmword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag))))) + + + +(define-vop (data-vector-set/simple-array-single-float/xmm) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs (xmm-reg) :target result)) + (:arg-types simple-array-single-float positive-fixnum xmm) + (:results (result :scs (xmm-reg))) + (:result-types xmm) + (:generator 5 + (inst movdqu (make-ea :dword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag)) + value) + (unless (location= value result) + (move result value)))) + +(define-vop (data-vector-set-c/simple-array-single-float/xmm) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (xmm-reg) :target result)) + (:info index) + (:arg-types simple-array-single-float (:constant (signed-byte 30)) + xmm) + (:results (result :scs (xmm-reg))) + (:result-types xmm) + (:generator 4 + (inst movdqu (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag)) + value) + (unless (location= value result) + (move result value)))) + + +(define-vop (myvop1) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (descriptor-reg))) + (:result-types fixnum) + (:generator 1 + (inst mov dest (fixnumize 100)))) + +(define-vop (myvop2) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (descriptor-reg))) + (:result-types fixnum) + (:temporary (:scs (xmm-reg)) x0) + (:generator 1 + (inst movdqu x0 (make-ea :xmmword :base src :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst mov dest (fixnumize 100)))) + +(define-vop (myvop3) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (xmm-reg))) + (:result-types xmm) + (:temporary (:scs (xmm-reg) :to :result) x0) + (:generator 1 + (inst movdqu x0 (make-ea :xmmword :base src :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (move dest x0))) + +(define-vop (myvop4) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (single-reg))) + (:result-types single-float) + (:temporary (:scs (single-reg) :to :result) x0) + (:generator 1 +;; (move x0 (make-ea :dword :base src :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) +;; (move dest x0))) + (inst nop))) + + +(define-vop (%load-xmm-from-array/single-float) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) + (:results (dest :scs (xmm-reg))) + (:result-types xmm) + (:generator 1 + (inst shl index 2) + (inst movdqu dest (make-ea :xmmword :base src :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) + + +(define-vop (%store-xmm-to-array/single-float) + (:policy :fast-safe) + (:args (dest :scs (descriptor-reg)) + (index :scs (unsigned-reg)) + (src :scs (xmm-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) + (:generator 1 + (inst shl index 2) + (inst movdqu (make-ea :xmmword :base dest :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) + src))) + + +(define-vop (xmm-move) + (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) + (:note "xmm move") + (:generator 0 + (unless (location= x y) + (inst movdqa y x)))) + +(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) + +(define-vop (move-from-xmm) + (:args (x :scs (xmm-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "xmm to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + xmm-widetag + xmm-size node) + (inst movdqu (ea-for-xmm-desc y) x)))) + +(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) + +(define-vop (move-to-xmm) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (xmm-reg))) + (:note "pointer to xmm coercion") + (:generator 2 + (inst movdqu y (ea-for-xmm-desc x)))) + +(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) + + +(define-vop (move-xmm-arg) + (:args (x :scs (xmm-reg) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y xmm-reg)))) + (:results (y)) + (:note "xmm argument move") + (:generator 6 + (sc-case y + (xmm-reg + (unless (location= x y) + (inst movdqa y x))) + + (xmm-stack + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :xmmword :base fp :disp offset))) + (inst movdqu ea x)) + + (let ((ea (make-ea :xmmword :base fp + :disp (- (* (+ (tn-offset y) 4) + n-word-bytes))))) + (inst movdqu ea x))))))) + +(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) + +(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) + + + diff --git a/sse-vector.lisp b/sse-vector.lisp new file mode 100644 index 0000000..2eec399 --- /dev/null +++ b/sse-vector.lisp @@ -0,0 +1,225 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :sb-vm) + +(defmacro vect-ea (base &optional idx) + (let ((disp + (if (and idx (numberp idx)) + `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx) + `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)))) + + (if (and idx (symbolp idx)) + `(make-ea :dword :base ,base :index ,idx :disp ,disp) + `(make-ea :dword :base ,base :disp ,disp)))) + +(DEFINE-VOP (%sse-vect-add/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SRC2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst movdqu x0 (vect-ea src1)) + (inst movdqu x1 (vect-ea src2)) + (inst addps x0 x1) + (inst movdqu (vect-ea dest) x0))) + +(DEFINE-VOP (%sse-vect-add2/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (SRC1 :SCS (XMM-REG)) + (SRC2 :SCS (XMM-REG))) + (:ARG-TYPES XMM XMM) + + (:RESULTS (DEST :SCS (XMM-REG))) + + (:TEMPORARY (:SC XMM-REG :from :argument :to :result) X0) + (:TEMPORARY (:SC XMM-REG :from :argument) X1) + + (:GENERATOR 10 + (move x0 src1) + (move x1 src2) + (inst addps x0 x1) + (move dest x0) + )) + +(DEFINE-VOP (%sse-vect-sub/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SRC2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst movdqu x0 (vect-ea src1)) + (inst movdqu x1 (vect-ea src2)) + (inst subps x0 x1) + (inst movdqu (vect-ea dest) x0))) + +(DEFINE-VOP (%sse-vect-len/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst xorps x0 x0) + (inst movdqu x1 (vect-ea src1)) + (inst mulps x1 x1) ;; ^2 + + (inst movdqa x0 x1) ;; + + + (inst psrldq-ib x1 4) ;; >> 4 + (inst addss x0 x1) ;; + + + (inst psrldq-ib x1 4) ;; .. + (inst addss x0 x1) + + (inst psrldq-ib x1 4) + (inst addss x0 x1) ;; here we have added up all single-floats + + (inst sqrtss x1 x0) ;; sqrt + + (inst movss (vect-ea dest) x1) ;; store scalar single-float + )) + +(DEFINE-VOP (%sse-vect-scalar-mul/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SCALAR :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + + (:GENERATOR 10 + (inst xorps x2 x2) + (inst movdqu x0 (vect-ea src1)) + (inst movss x1 (vect-ea scalar)) + + ;; load scalar to all slots + (inst addss x2 x1) + (inst pslldq-ib x1 4) + (inst orps x2 x1) + (inst pslldq-ib x1 4) + (inst orps x2 x1) + (inst pslldq-ib x1 4) + (inst orps x2 x1) + + ;; mul vector with scalar-vector + (inst mulps x0 x2) + + ;; store + (inst movdqu (vect-ea dest) x0) + )) + +(DEFINE-VOP (%sse-vect-normalize/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + + (:GENERATOR 10 + (inst xorps x0 x0) + (inst movdqu x1 (vect-ea src1)) + (inst movdqa x2 x1) + + ;; calculate x0 <- 1 / sqrt( x^2 + y^2 + z^2 + w^2 ) + (inst mulps x1 x1) ;; ^2 + + ;; copy x1 to x0, then rotate/add + (inst movdqa x0 x1) + + (inst shufps x1 x1 #b10010011) ;; rotate + (inst addps x0 x1) ;; + + + (inst shufps x1 x1 #b10010011) ;; rotate + (inst addps x0 x1) ;; + + + (inst shufps x1 x1 #b10010011) ;; rotate + (inst addps x0 x1) ;; + + + (inst rsqrtps x1 x0) ;; 1 / sqrt + + (inst mulps x2 x1) ;; vect = vect * (1 / sqrt(len)) + + (inst movdqu (vect-ea dest) x2) ;; store normalized vector + )) + +(DEFINE-VOP (%sse-vect-dot/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SRC2 :SCS (DESCRIPTOR-REG))) + + (:ARG-TYPES + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst movdqu x0 (vect-ea src1)) + (inst movdqu x1 (vect-ea src2)) + + (inst mulps x1 x0) ;; a_n * b_n + + (inst movdqa x0 x1) ;; + + (inst psrldq-ib x1 4) ;; >> 4 + (inst addss x0 x1) ;; + + + (inst psrldq-ib x1 4) ;; .. + (inst addss x0 x1) + + (inst psrldq-ib x1 4) + (inst addss x0 x1) ;; here we have added up all single-floats + + (inst movss (vect-ea dest) x0) ;; store scalar single-float + )) + diff --git a/test-vector.lisp b/test-vector.lisp new file mode 100644 index 0000000..8ae582c --- /dev/null +++ b/test-vector.lisp @@ -0,0 +1,190 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :cl-user) +;;(declaim (optimize (speed 3) (space 0) (debug 0) (safety 0))) + +(defmacro make-vector () + `(make-array 4 :element-type 'single-float :initial-element 0f0 :adjustable nil :fill-pointer nil)) + +(defmacro make-scalar () + `(make-array 1 :element-type 'single-float :initial-element 0f0 :adjustable nil :fill-pointer nil)) + +(declaim + (ftype (function ((simple-array single-float (4)) single-float) (simple-array single-float (4))) v* v2*) + (ftype (function ((simple-array single-float (4)) (simple-array single-float (4))) (simple-array single-float (4))) v+ v- v2+ v2-) + (ftype (function ((simple-array single-float (4)) (simple-array single-float (4))) single-float) dot dot2) + (ftype (function ((simple-array single-float (4))) (simple-array single-float (4))) unitise unitise2) + (ftype (function (single-float single-float single-float) (simple-array single-float (4))) vec) + ) + +(declaim (inline v* v+ v- dot unitise vec v2* v2+ v2- dot2 unitise2)) + +(defun v2* (a s) + (let ((res (make-vector))) + (declare (type (simple-array single-float (4)) a res) (type single-float s)) + (loop for i from 0 to 3 do (setf (aref res i) (* (aref a i) s))) + res)) + +(defun v* (a s) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-scalar-mul/single-float res a s) + res)) + +(defun v2+ (a b) + (let ((res (make-vector))) + (declare (type (simple-array single-float (4)) a b res)) + (loop for i from 0 to 3 do (setf (aref res i) (+ (aref a i) (aref b i)))) + res)) + +(defun v+ (a b) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-add/single-float res a b) + res)) + +(defun v2- (a b) + (let ((res (make-vector))) + (declare (type (simple-array single-float (4)) a b res)) + (loop for i from 0 to 3 do (setf (aref res i) (- (aref a i) (aref b i)))) + res)) + +(defun v- (a b) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-sub/single-float res a b) + res)) + +(defun dot2 (a b) + (declare (type (simple-array single-float (4)) a b)) + (loop for i from 0 to 3 sum (* (aref a i) (aref b i)) into res finally (return res))) + +(defun dot (a b) + (let ((res (make-scalar))) + (sb-sys:%primitive sb-vm::%sse-vect-dot/single-float res a b) + (aref res 0))) + +(defun unitise2 (a) + (v2* a (/ 1f0 (sqrt (dot2 a a))))) + +(defun unitise (a) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-normalize/single-float res a) + res)) + +(defun vec (x y z) + (let ((res (make-vector))) + (setf (aref res 0) x (aref res 1) y (aref res 2) z) + res)) + +(defun test-foo2 () + (let* ((v (v- (vec 10f0 10f0 0f0) (vec 3f0 3f0 1f0))) + (b (dot v (vec 0f0 0f0 10f0))) + (disc (+ (- (* b b) (dot v v)) (* 1.5 1.5)))) + disc)) + +(defun test-bar4 () +;; (let ((x (vec (random 1f6) (random 1f6) (random 1f6))) +;; (y (vec (random 1f6) (random 1f6) (random 1f6))) +;; (z (vec (random 1f6) (random 1f6) (random 1f6))) +;; (idx 0) +;; (res (make-vector))) + (let ((x (Vec 1f0 2f0 3f0)) + (idx 0)) + +;; (sb-sys:%primitive sb-vm::%store-xmm-to-array/single-float res 0 +;; (sb-sys:%primitive sb-vm::%sse-vect-add2/single-float +;; (the xmm (sb-sys:%primitive sb-vm::data-vector-ref/simple-array-single-float/xmm x idx)) +;; (sb-sys:%primitive sb-vm::data-vector-ref/simple-array-single-float/xmm y idx)) + +;; (data-vector-ref x 0) +;; (data-vector-ref y 0)) +;; (sb-sys:%primitive sb-vm::%store-xmm-to-array/single-float y 0 +;; (the xmm (sb-sys:%primitive sb-vm::%load-xmm-from-array/single-float x 0)) +;; (sb-sys:%primitive sb-vm::%load-xmm-from-array/single-float y 0))) + (the single-float (sb-sys:%primitive sb-vm::data-vector-ref/simple-array-single-float x idx)) + +;; (sb-sys:%primitive sb-vm::move-from-xmm +;; (sb-sys:%primitive sb-vm::myvop4 x)))) + +)) +(defun test-bar3 (x y) + (v- (v+ x y) (unitise y))) + +(defun test-bar () + (let ((x (vec (random 1f6) (random 1f6) (random 1f6))) + (y (vec (random 1f6) (random 1f6) (random 1f6))) + res) + (time (dotimes (i 1000000) + (setf res (dot (v- (v+ x y) y) (unitise y))))) + (time (dotimes (i 1000000) + (setf res (dot2 (v2- (v2+ x y) y) (unitise2 y))))) + res)) + + + +(defun test-foo () + (format t "~S.~%" (unitise (vec -1.0 -3.0 2.0)))) + +(defun test-vector () + (let ((vec1 (make-vector)) + (vec2 (make-vector)) + (vec3 (make-vector)) + (temp (make-array 1 :element-type 'single-float :initial-element 0f0)) + res) + + (loop for i of-type fixnum from 0 below 3 + do (setf (aref vec1 i) (float (random 1f6)) + (aref vec2 i) (float (random 1f6)))) + + + (format t "Data: ~S~%~S~%" vec1 vec2) + + (sb-sys:%primitive sb-vm::%sse-vect-add/single-float vec3 vec1 vec2) + (format t "Add: ~S, ok? ~A~%" vec3 + (loop for equal = t + for res-elt across res + for idx from 0 + for ok-elt = (+ (aref vec1 idx) (aref vec2 idx)) + when (/= ok-elt res-elt) do (setq equal nil) + finally (return equal))) + + (sb-sys:%primitive sb-vm::%sse-vect-normalize/single-float vec3 vec1) + (sb-sys:%primitive sb-vm::%sse-vect-len/single-float temp vec3) + (format t "Normalize 1: ~S, len ~S.~%" vec3 temp) + + (sb-sys:%primitive sb-vm::%sse-vect-normalize/single-float vec3 vec2) + (sb-sys:%primitive sb-vm::%sse-vect-len/single-float temp vec3) + (format t "Normalize 2: ~S, len ~S.~%" vec3 temp) + + (sb-sys:%primitive sb-vm::%sse-vect-dot/single-float temp vec1 vec2) + (format t "Dot: ~S, ok? ~A.~%" temp + (loop for a across vec1 + for b across vec2 + sum (* a b) into res + finally (return (= res (aref temp 0))))) + + )) + + -- 2.11.4.GIT From b2048134ce27b039b8b8c625efbc7665ccb852be Mon Sep 17 00:00:00 2001 From: D Herring Date: Mon, 30 Jul 2007 21:56:33 -0400 Subject: [PATCH 14/16] CVS patch versions --- patch_against_sbcl_0_9_3-1.0.txt | 229 +++++ patch_against_sbcl_0_9_3-1.1.1.txt | 229 +++++ patch_against_sbcl_0_9_3-1.2.txt | 1446 ++++++++++++++++++++++++++++++ patch_against_sbcl_0_9_3-1.3.txt | 1718 ++++++++++++++++++++++++++++++++++++ 4 files changed, 3622 insertions(+) create mode 100644 patch_against_sbcl_0_9_3-1.0.txt create mode 100644 patch_against_sbcl_0_9_3-1.1.1.txt create mode 100644 patch_against_sbcl_0_9_3-1.2.txt create mode 100644 patch_against_sbcl_0_9_3-1.3.txt diff --git a/patch_against_sbcl_0_9_3-1.0.txt b/patch_against_sbcl_0_9_3-1.0.txt new file mode 100644 index 0000000..234ef9d --- /dev/null +++ b/patch_against_sbcl_0_9_3-1.0.txt @@ -0,0 +1,229 @@ +diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp +--- src-093/compiler/x86/insts.lisp 2005-08-05 15:31:17.723664255 +0300 ++++ src/compiler/x86/insts.lisp 2005-08-05 15:42:36.536109257 +0300 +@@ -192,6 +192,7 @@ + (:byte 8) + (:word 16) + (:dword 32) ++ (:dqword 128) + (:float 32) + (:double 64))) + +@@ -671,7 +672,7 @@ + + (defun reg-tn-encoding (tn) + (declare (type tn tn)) +- (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) ++; (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) +@@ -718,6 +719,8 @@ + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) ++ (sse-registers ++ (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) +@@ -830,6 +833,10 @@ + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + ++(defun sse-register-p (thing) ++ (and (tn-p thing) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) ++ + (defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) +@@ -2042,6 +2049,123 @@ + (:emitter + (emit-header-data segment return-pc-header-widetag))) + ++ ++;;;; SSE instructions ++;;;; ++;;;; Automatically generated ++ ++ ++(DEFINE-INSTRUCTION ADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++;;; SSE MOVE ++ ++(DEFINE-INSTRUCTION MOVUPS (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++ ++;;; CPUID ++ ++ ++(define-instruction cpuid (segment) ++ (:emitter ++ (emit-byte segment #x0F) ++ (emit-byte segment #xA2))) ++ ++ ++ ++ ++ + ;;;; fp instructions + ;;;; + ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +diff -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp +--- src-093/compiler/x86/vm.lisp 2005-08-05 15:32:19.810183044 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-05 15:38:26.784310770 +0300 +@@ -21,7 +21,8 @@ + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) +- (defvar *float-register-names* (make-array 8 :initial-element nil))) ++ (defvar *float-register-names* (make-array 8 :initial-element nil)) ++ (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + + (macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) +@@ -91,6 +92,17 @@ + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ++ ;; sse registers ++ (defreg xmm0 0 :dqword) ++ (defreg xmm1 1 :dqword) ++ (defreg xmm2 2 :dqword) ++ (defreg xmm3 3 :dqword) ++ (defreg xmm4 4 :dqword) ++ (defreg xmm5 5 :dqword) ++ (defreg xmm6 6 :dqword) ++ (defreg xmm7 7 :dqword) ++ (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers +@@ -118,6 +130,8 @@ + ;;; the new way: + (define-storage-base float-registers :finite :size 8) + ++(define-storage-base sse-registers :finite :size 8) ++ + (define-storage-base stack :unbounded :size 8) + (define-storage-base constant :non-packed) + (define-storage-base immediate-constant :non-packed) +@@ -320,6 +334,8 @@ + :save-p t + :alternate-scs (complex-long-stack)) + ++ (sse-reg sse-registers ++ :locations #.*sse-regs*) + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +@@ -337,6 +353,7 @@ + ;;; These are used to (at least) determine operand size. + (defparameter *float-sc-names* '(single-reg)) + (defparameter *double-sc-names* '(double-reg double-stack)) ++(defparameter *dqword-sc-names* '(sse-reg)) + ) ; EVAL-WHEN + + ;;;; miscellaneous TNs for the various registers +@@ -444,6 +461,7 @@ + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) ++ (sse-registers (format nil "XMM~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") diff --git a/patch_against_sbcl_0_9_3-1.1.1.txt b/patch_against_sbcl_0_9_3-1.1.1.txt new file mode 100644 index 0000000..234ef9d --- /dev/null +++ b/patch_against_sbcl_0_9_3-1.1.1.txt @@ -0,0 +1,229 @@ +diff -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp +--- src-093/compiler/x86/insts.lisp 2005-08-05 15:31:17.723664255 +0300 ++++ src/compiler/x86/insts.lisp 2005-08-05 15:42:36.536109257 +0300 +@@ -192,6 +192,7 @@ + (:byte 8) + (:word 16) + (:dword 32) ++ (:dqword 128) + (:float 32) + (:double 64))) + +@@ -671,7 +672,7 @@ + + (defun reg-tn-encoding (tn) + (declare (type tn tn)) +- (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) ++; (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) +@@ -718,6 +719,8 @@ + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) ++ (sse-registers ++ (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) +@@ -830,6 +833,10 @@ + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + ++(defun sse-register-p (thing) ++ (and (tn-p thing) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) ++ + (defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) +@@ -2042,6 +2049,123 @@ + (:emitter + (emit-header-data segment return-pc-header-widetag))) + ++ ++;;;; SSE instructions ++;;;; ++;;;; Automatically generated ++ ++ ++(DEFINE-INSTRUCTION ADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++;;; SSE MOVE ++ ++(DEFINE-INSTRUCTION MOVUPS (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++ ++;;; CPUID ++ ++ ++(define-instruction cpuid (segment) ++ (:emitter ++ (emit-byte segment #x0F) ++ (emit-byte segment #xA2))) ++ ++ ++ ++ ++ + ;;;; fp instructions + ;;;; + ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +diff -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp +--- src-093/compiler/x86/vm.lisp 2005-08-05 15:32:19.810183044 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-05 15:38:26.784310770 +0300 +@@ -21,7 +21,8 @@ + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) +- (defvar *float-register-names* (make-array 8 :initial-element nil))) ++ (defvar *float-register-names* (make-array 8 :initial-element nil)) ++ (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + + (macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) +@@ -91,6 +92,17 @@ + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ++ ;; sse registers ++ (defreg xmm0 0 :dqword) ++ (defreg xmm1 1 :dqword) ++ (defreg xmm2 2 :dqword) ++ (defreg xmm3 3 :dqword) ++ (defreg xmm4 4 :dqword) ++ (defreg xmm5 5 :dqword) ++ (defreg xmm6 6 :dqword) ++ (defreg xmm7 7 :dqword) ++ (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers +@@ -118,6 +130,8 @@ + ;;; the new way: + (define-storage-base float-registers :finite :size 8) + ++(define-storage-base sse-registers :finite :size 8) ++ + (define-storage-base stack :unbounded :size 8) + (define-storage-base constant :non-packed) + (define-storage-base immediate-constant :non-packed) +@@ -320,6 +334,8 @@ + :save-p t + :alternate-scs (complex-long-stack)) + ++ (sse-reg sse-registers ++ :locations #.*sse-regs*) + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +@@ -337,6 +353,7 @@ + ;;; These are used to (at least) determine operand size. + (defparameter *float-sc-names* '(single-reg)) + (defparameter *double-sc-names* '(double-reg double-stack)) ++(defparameter *dqword-sc-names* '(sse-reg)) + ) ; EVAL-WHEN + + ;;;; miscellaneous TNs for the various registers +@@ -444,6 +461,7 @@ + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) ++ (sse-registers (format nil "XMM~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") diff --git a/patch_against_sbcl_0_9_3-1.2.txt b/patch_against_sbcl_0_9_3-1.2.txt new file mode 100644 index 0000000..9a04742 --- /dev/null +++ b/patch_against_sbcl_0_9_3-1.2.txt @@ -0,0 +1,1446 @@ +diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp +--- src-093/compiler/x86/insts.lisp 2005-08-05 16:13:29.000000000 +0300 ++++ src/compiler/x86/insts.lisp 2005-08-08 16:30:23.352842152 +0300 +@@ -192,6 +192,8 @@ + (:byte 8) + (:word 16) + (:dword 32) ++ (:qword 64) ++ (:dqword 128) + (:float 32) + (:double 64))) + +@@ -671,7 +673,7 @@ + + (defun reg-tn-encoding (tn) + (declare (type tn tn)) +- (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) ++; (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) +@@ -718,6 +720,8 @@ + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) ++ (sse-registers ++ (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) +@@ -830,6 +834,10 @@ + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + ++(defun sse-register-p (thing) ++ (and (tn-p thing) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) ++ + (defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) +@@ -2042,6 +2050,1339 @@ + (:emitter + (emit-header-data segment return-pc-header-widetag))) + ++ ++;;;; SSE instructions ++;;;; ++;;;; Automatically generated ++ ++ ++(DEFINE-INSTRUCTION ADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 124) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 125) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKHPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 21) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKLPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 20) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HADDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 124) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HSUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 125) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKHPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 21) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKLPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 20) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION COMISD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 47) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UCOMISD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 46) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION COMISS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 47) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UCOMISS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 46) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKSSDW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 107) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKSSWB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 99) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKUSWB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 103) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 252) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 254) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 212) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 236) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 237) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDUSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 220) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDUSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 221) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 253) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAND ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 219) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PANDN ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 223) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAVGB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 224) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAVGW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 227) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 116) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 118) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 117) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 100) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 102) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 101) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMADDWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 245) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMAXSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 238) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMAXUB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 222) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMINSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 234) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMINUB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 218) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMOVMSKB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 215) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULHUW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 228) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULHW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 229) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 213) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULUDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 244) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION POR ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 235) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSADBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 246) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSSLD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 242) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSLLQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 243) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSLLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 241) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRAD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 226) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRAW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 226) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 210) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 211) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 209) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 248) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 250) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 251) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 232) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 233) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBUSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 216) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBUSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 217) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 249) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 104) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 106) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHQDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 109) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 105) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 96) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 98) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLQDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 108) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 97) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PXOR ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 239) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTDQ2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTDQ2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPI2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPI2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSD2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSD2SS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSI2SD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSI2SS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSS2SD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSS2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPD2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPD2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPS2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPS2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTSD2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTSS2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION LDDQU ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 240) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MASKMOVDQU ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 247) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVDDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVHLPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVLHPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVMSKPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 80) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVMSKPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 80) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 231) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 43) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 43) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVSHDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVSLDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PEXTRW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 197) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PINSRW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 196) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFD ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFHW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFLW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION SHUFPD ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 198) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION SHUFPS ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 198) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION CMPPD ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPPS ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPSD ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPSS ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION MOVAPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 41) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVAPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 40) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 41) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 126) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVDQA ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 127) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVDQU ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 127) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVHPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 23) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVHPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 23) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVLPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 19) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVLPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 19) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVQ ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 214) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVSD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVSS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVUPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVUPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++ ++ ++;;; CPUID ++ ++ ++(define-instruction cpuid (segment) ++ (:emitter ++ (emit-byte segment #x0F) ++ (emit-byte segment #xA2))) ++ ++ ++ ++ + ;;;; fp instructions + ;;;; + ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp +--- src-093/compiler/x86/vm.lisp 2005-08-05 16:13:29.000000000 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-08 16:32:19.609588299 +0300 +@@ -21,7 +21,8 @@ + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) +- (defvar *float-register-names* (make-array 8 :initial-element nil))) ++ (defvar *float-register-names* (make-array 8 :initial-element nil)) ++ (defvar *dqword-register-names* (make-array 8 :initial-element nil))) + + (macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) +@@ -91,6 +92,17 @@ + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ++ ;; sse registers ++ (defreg xmm0 0 :dqword) ++ (defreg xmm1 1 :dqword) ++ (defreg xmm2 2 :dqword) ++ (defreg xmm3 3 :dqword) ++ (defreg xmm4 4 :dqword) ++ (defreg xmm5 5 :dqword) ++ (defreg xmm6 6 :dqword) ++ (defreg xmm7 7 :dqword) ++ (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers +@@ -118,6 +130,8 @@ + ;;; the new way: + (define-storage-base float-registers :finite :size 8) + ++(define-storage-base sse-registers :finite :size 8) ++ + (define-storage-base stack :unbounded :size 8) + (define-storage-base constant :non-packed) + (define-storage-base immediate-constant :non-packed) +@@ -320,6 +334,8 @@ + :save-p t + :alternate-scs (complex-long-stack)) + ++ (sse-reg sse-registers ++ :locations #.*sse-regs*) + ;; a catch or unwind block + (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) + +@@ -337,6 +353,7 @@ + ;;; These are used to (at least) determine operand size. + (defparameter *float-sc-names* '(single-reg)) + (defparameter *double-sc-names* '(double-reg double-stack)) ++(defparameter *dqword-sc-names* '(sse-reg)) + ) ; EVAL-WHEN + + ;;;; miscellaneous TNs for the various registers +@@ -444,6 +461,7 @@ + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) ++ (sse-registers (format nil "XMM~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") diff --git a/patch_against_sbcl_0_9_3-1.3.txt b/patch_against_sbcl_0_9_3-1.3.txt new file mode 100644 index 0000000..72f17d8 --- /dev/null +++ b/patch_against_sbcl_0_9_3-1.3.txt @@ -0,0 +1,1718 @@ +diff -x 'CVS*' -Naur src-093/compiler/x86/float.lisp src/compiler/x86/float.lisp +--- src-093/compiler/x86/float.lisp 2005-08-17 16:56:53.996387102 +0300 ++++ src/compiler/x86/float.lisp 2005-08-17 15:04:50.040162831 +0300 +@@ -4308,3 +4308,122 @@ + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) ++ ++ ++;; XMM Moves ++ ++ ++(defun ea-for-xmm-desc (tn) ++ (make-ea :xmmword :base tn ++ :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) ++ ++(defun ea-for-xmm-stack (tn) ++ (make-ea :xmmword :base ebp-tn ++ :disp (- (* (+ (tn-offset tn) ++ 4) ++ n-word-bytes)))) ++ ++(define-move-fun (load-xmm 2) (vop x y) ++ ((xmm-stack) (xmm-reg)) ++ (inst movdqu y (ea-for-xmm-stack x))) ++ ++(define-move-fun (store-xmm 2) (vop x y) ++ ((xmm-reg) (xmm-stack)) ++ (inst movdqu (ea-for-xmm-stack y) x)) ++ ++(define-move-fun (load-xmm-single 2) (vop x y) ++ ((single-stack) (xmm-reg)) ++ (inst movss y (ea-for-sf-stack x))) ++ ++(define-move-fun (store-xmm-single 2) (vop x y) ++ ((xmm-reg) (single-stack)) ++ (inst movss (ea-for-sf-stack y) x)) ++ ++ ++(define-vop (%load-xmm-from-array/single-float) ++ (:policy :fast-safe) ++ (:args (src :scs (descriptor-reg)) ++ (index :scs (unsigned-reg))) ++ (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) ++ (:results (dest :scs (xmm-reg))) ++ (:result-types xmm) ++ (:generator 1 ++ (inst shl index 2) ++ (inst movdqu dest (make-ea :xmmword :base src :index index ++ :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) ++ ++ ++(define-vop (%store-xmm-to-array/single-float) ++ (:policy :fast-safe) ++ (:args (dest :scs (descriptor-reg)) ++ (index :scs (unsigned-reg)) ++ (src :scs (xmm-reg))) ++ (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) ++ (:generator 1 ++ (inst shl index 2) ++ (inst movdqu (make-ea :xmmword :base dest :index index ++ :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) ++ src))) ++ ++ ++(define-vop (xmm-move) ++ (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) ++ (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) ++ (:note "xmm move") ++ (:generator 0 ++ (unless (location= x y) ++ (inst movdqa y x)))) ++ ++(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) ++ ++(define-vop (move-from-xmm) ++ (:args (x :scs (xmm-reg) :to :save)) ++ (:results (y :scs (descriptor-reg))) ++ (:node-var node) ++ (:note "xmm to pointer coercion") ++ (:generator 13 ++ (with-fixed-allocation (y ++ xmm-widetag ++ xmm-size node) ++ (inst movdqu (ea-for-xmm-desc y) x)))) ++ ++(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) ++ ++(define-vop (move-to-xmm) ++ (:args (x :scs (descriptor-reg))) ++ (:results (y :scs (xmm-reg))) ++ (:note "pointer to xmm coercion") ++ (:generator 2 ++ (inst movdqu y (ea-for-xmm-desc x)))) ++ ++(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) ++ ++ ++(define-vop (move-xmm-arg) ++ (:args (x :scs (xmm-reg) :target y) ++ (fp :scs (any-reg) ++ :load-if (not (sc-is y xmm-reg)))) ++ (:results (y)) ++ (:note "xmm argument move") ++ (:generator 6 ++ (sc-case y ++ (xmm-reg ++ (unless (location= x y) ++ (inst movdqa y x))) ++ ++ (xmm-stack ++ (if (= (tn-offset fp) esp-offset) ++ (let* ((offset (* (tn-offset y) n-word-bytes)) ++ (ea (make-ea :xmmword :base fp :disp offset))) ++ (inst movdqu ea x)) ++ ++ (let ((ea (make-ea :xmmword :base fp ++ :disp (- (* (+ (tn-offset y) 4) ++ n-word-bytes))))) ++ (inst movdqu ea x))))))) ++ ++(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) ++ ++(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) ++ ++ +diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp +--- src-093/compiler/x86/insts.lisp 2005-08-05 16:13:29.000000000 +0300 ++++ src/compiler/x86/insts.lisp 2005-08-16 10:39:07.027823783 +0300 +@@ -39,13 +39,16 @@ + #(ax cx dx bx sp bp si di)) + (defparameter *dword-reg-names* + #(eax ecx edx ebx esp ebp esi edi)) ++(defparameter *xmmword-reg-names* ++ #(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) + + (defun print-reg-with-width (value width stream dstate) + (declare (ignore dstate)) + (princ (aref (ecase width + (:byte *byte-reg-names*) + (:word *word-reg-names*) +- (:dword *dword-reg-names*)) ++ (:dword *dword-reg-names*) ++ (:xmmword *xmmword-reg-names*)) + value) + stream) + ;; XXX plus should do some source-var notes +@@ -192,6 +195,8 @@ + (:byte 8) + (:word 16) + (:dword 32) ++ (:qword 64) ++ (:xmmword 128) + (:float 32) + (:double 64))) + +@@ -671,14 +676,14 @@ + + (defun reg-tn-encoding (tn) + (declare (type tn tn)) +- (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) ++; (aver (eq (sb-name (sc-sb (tn-sc tn))) 'registers)) + (let ((offset (tn-offset tn))) + (logior (ash (logand offset 1) 2) + (ash offset -1)))) + + (defstruct (ea (:constructor make-ea (size &key base index scale disp)) + (:copier nil)) +- (size nil :type (member :byte :word :dword)) ++ (size nil :type (member :byte :word :dword :xmmword)) + (base nil :type (or tn null)) + (index nil :type (or tn null)) + (scale 1 :type (member 1 2 4 8)) +@@ -718,6 +723,8 @@ + (ecase (sb-name (sc-sb (tn-sc thing))) + (registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) ++ (xmm-registers ++ (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) + (stack + ;; Convert stack tns into an index off of EBP. + (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) +@@ -830,6 +837,19 @@ + (and (tn-p thing) + (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) + ++(defun xmm-register-p (thing) ++ (and (tn-p thing) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'xmm-registers) ++ (member (sc-name (tn-sc thing)) *xmmword-sc-names*) ++ t)) ++ ++(defun xmm-ea-p (thing) ++ (typecase thing ++ (ea (eq (ea-size thing) :xmmword)) ++ (tn ++ (and (member (sc-name (tn-sc thing)) *xmmword-sc-names*) t)) ++ (t nil))) ++ + (defun accumulator-p (thing) + (and (register-p thing) + (= (tn-offset thing) 0))) +@@ -859,6 +879,8 @@ + :float) + (#.*double-sc-names* + :double) ++ (#.*xmmword-sc-names* ++ :xmmword) + (t + (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) + (ea +@@ -2042,6 +2064,1419 @@ + (:emitter + (emit-header-data segment return-pc-header-widetag))) + ++ ++;;;; SSE instructions ++;;;; ++;;;; Automatically generated ++ ++ ++(DEFINE-INSTRUCTION ADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HADDPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 124) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HSUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 125) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKHPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 21) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKLPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 20) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 208) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDNPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 85) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ANDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 84) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HADDPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 124) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION HSUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 125) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ORPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 86) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKHPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 21) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UNPCKLPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 20) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION XORPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 87) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION COMISD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 47) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBSD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UCOMISD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 46) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION ADDSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 88) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION COMISS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 47) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION DIVSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 94) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MAXSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 95) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MINSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 93) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MULSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 89) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RCPSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 83) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION RSQRTSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 82) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SQRTSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 81) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION SUBSS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 92) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION UCOMISS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 46) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKSSDW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 107) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKSSWB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 99) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PACKUSWB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 103) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 252) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 254) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 212) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 236) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 237) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDUSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 220) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDUSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 221) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PADDW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 253) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAND ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 219) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PANDN ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 223) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAVGB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 224) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PAVGW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 227) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 116) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 118) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPEQW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 117) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 100) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 102) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PCMPGTW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 101) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMADDWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 245) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMAXSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 238) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMAXUB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 222) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMINSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 234) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMINUB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 218) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMOVMSKB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 215) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULHUW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 228) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULHW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 229) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 213) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PMULUDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 244) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION POR ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 235) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSADBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 246) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSSLD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 242) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSLLQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 243) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSLLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 241) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRAD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 226) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRAW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 226) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 210) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 211) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSRLW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 209) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 248) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 250) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 251) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 232) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 233) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBUSB ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 216) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBUSW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 217) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PSUBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 249) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 104) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 106) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHQDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 109) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKHWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 105) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLBW ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 96) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 98) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLQDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 108) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PUNPCKLWD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 97) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PXOR ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 239) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTDQ2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTDQ2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPD2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPI2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPI2PS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2PD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTPS2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSD2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSD2SS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSI2SD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSI2SS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 42) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSS2SD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 90) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTSS2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 45) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPD2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 230) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPD2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPS2DQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 91) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTPS2PI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTSD2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION CVTTSS2SI ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 44) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION LDDQU ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 240) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MASKMOVDQU ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 247) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVDDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVHLPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVLHPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVMSKPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 80) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVMSKPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 80) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTDQ ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 231) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTPD ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 43) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVNTPS ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 43) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVSHDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION MOVSLDUP ++ (SEGMENT DST SRC) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)))) ++ ++(DEFINE-INSTRUCTION PEXTRW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 197) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PINSRW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 196) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFD ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFHW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION PSHUFLW ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 112) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION SHUFPD ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 198) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION SHUFPS ++ (SEGMENT DST SRC BYTE) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 198) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT :BYTE BYTE))) ++ ++(DEFINE-INSTRUCTION CMPPD ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPPS ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPSD ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION CMPSS ++ (SEGMENT DST SRC COND) ++ (:EMITTER (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 194) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST)) ++ (EMIT-SIZED-IMMEDIATE SEGMENT ++ :BYTE ++ (CDR ++ (ASSOC COND ++ '((:EQ . 0) ++ (:E . 0) (:Z . 0) ++ (:L . 1) ++ (:NGE . 1) ++ (:LE . 2) ++ (:NG . 2) ++ (:UNORD . 3) ++ (:NE . 4) ++ (:NZ . 4) ++ (:NL . 5) ++ (:GE . 5) ++ (:NLE . 6) ++ (:G . 6) ++ (:ORD . 7))))))) ++ ++(DEFINE-INSTRUCTION MOVAPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 41) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVAPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 40) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 41) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 126) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVDQA ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 127) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVDQU ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 127) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVHPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 23) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVHPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 22) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 23) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVLPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 19) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVLPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 18) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 19) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVQ ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 214) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVSD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 242) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVSS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 243) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVUPD ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION MOVUPS ++ (SEGMENT DST SRC) ++ (:EMITTER ++ (COND ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 16) ++ (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) ++ (T (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 17) ++ (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) ++ ++(DEFINE-INSTRUCTION PSLLD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLDQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 7) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRAD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 4) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRAW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 4) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLDQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 3) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++ ++ ++;;; CPUID ++ ++ ++(define-instruction cpuid (segment) ++ (:emitter ++ (emit-byte segment #x0F) ++ (emit-byte segment #xA2))) ++ ++ ++ ++ + ;;;; fp instructions + ;;;; + ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. +diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp +--- src-093/compiler/x86/vm.lisp 2005-08-05 16:13:29.000000000 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-17 13:06:11.717026836 +0300 +@@ -21,7 +21,8 @@ + (defvar *byte-register-names* (make-array 8 :initial-element nil)) + (defvar *word-register-names* (make-array 16 :initial-element nil)) + (defvar *dword-register-names* (make-array 16 :initial-element nil)) +- (defvar *float-register-names* (make-array 8 :initial-element nil))) ++ (defvar *float-register-names* (make-array 8 :initial-element nil)) ++ (defvar *xmmword-register-names* (make-array 8 :initial-element nil))) + + (macrolet ((defreg (name offset size) + (let ((offset-sym (symbolicate name "-OFFSET")) +@@ -91,6 +92,17 @@ + (defreg fr7 7 :float) + (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ++ ;; sse registers ++ (defreg xmm0 0 :xmmword) ++ (defreg xmm1 1 :xmmword) ++ (defreg xmm2 2 :xmmword) ++ (defreg xmm3 3 :xmmword) ++ (defreg xmm4 4 :xmmword) ++ (defreg xmm5 5 :xmmword) ++ (defreg xmm6 6 :xmmword) ++ (defreg xmm7 7 :xmmword) ++ (defregset *xmm-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ + ;; registers used to pass arguments + ;; + ;; the number of arguments/return values passed in registers +@@ -118,6 +130,8 @@ + ;;; the new way: + (define-storage-base float-registers :finite :size 8) + ++(define-storage-base xmm-registers :finite :size 8) ++ + (define-storage-base stack :unbounded :size 8) + (define-storage-base constant :non-packed) + (define-storage-base immediate-constant :non-packed) +@@ -186,6 +200,7 @@ + (sap-stack stack) ; System area pointers. + (single-stack stack) ; single-floats + (double-stack stack :element-size 2) ; double-floats. ++ (xmm-stack stack :element-size 4) ; xmm + #!+long-float + (long-stack stack :element-size 3) ; long-floats. + (complex-single-stack stack :element-size 2) ; complex-single-floats +@@ -290,6 +305,12 @@ + :save-p t + :alternate-scs (double-stack)) + ++ ;; non-descriptor XMMs ++ (xmm-reg xmm-registers ++ :locations #.*xmm-regs* ++ :save-p t ++ :alternate-scs (xmm-stack)) ++ + ;; non-descriptor LONG-FLOATs + #!+long-float + (long-reg float-registers +@@ -337,6 +358,7 @@ + ;;; These are used to (at least) determine operand size. + (defparameter *float-sc-names* '(single-reg)) + (defparameter *double-sc-names* '(double-reg double-stack)) ++(defparameter *xmmword-sc-names* '(xmm-reg)) + ) ; EVAL-WHEN + + ;;;; miscellaneous TNs for the various registers +@@ -359,7 +381,8 @@ + (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) + (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) + (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) +- (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) ++ (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) ++ (def-misc-reg-tns xmm-reg xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) + + ;;; TNs for registers used to pass arguments + (defparameter *register-arg-tns* +@@ -444,6 +467,7 @@ + ;; FIXME: Shouldn't this be an ERROR? + (format nil "" offset sc-name)))) + (float-registers (format nil "FR~D" offset)) ++ (xmm-registers (format nil "XMM~D" offset)) + (stack (format nil "S~D" offset)) + (constant (format nil "Const~D" offset)) + (immediate-constant "Immed") -- 2.11.4.GIT From 2620c8db1633ef23d1997f5b5f7acc3ea8abadb7 Mon Sep 17 00:00:00 2001 From: D Herring Date: Fri, 25 Jan 2008 00:09:57 -0500 Subject: [PATCH 15/16] Minor ASDF tweak. --- sb-simd.asd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sb-simd.asd b/sb-simd.asd index 3707c3d..b2c7ba9 100644 --- a/sb-simd.asd +++ b/sb-simd.asd @@ -9,8 +9,5 @@ (:file "sse-matrix" :depends-on ("sse-vops"))) ) -(defmethod perform :after ((o load-op (c (eql (find-system :sb-simd))))) +(defmethod perform :after ((o load-op) (c (eql (find-system :sb-simd)))) (provide 'sb-simd)) - - - -- 2.11.4.GIT From 032b337d82f048206d6b5999c429c0ff38eb38d5 Mon Sep 17 00:00:00 2001 From: D Herring Date: Wed, 30 Jan 2008 01:56:21 -0500 Subject: [PATCH 16/16] Code fixes for test-matrix. --- sse-matrix.lisp | 2 +- test-matrix.lisp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sse-matrix.lisp b/sse-matrix.lisp index c0f5976..1a2de31 100644 --- a/sse-matrix.lisp +++ b/sse-matrix.lisp @@ -105,7 +105,7 @@ http://developer.intel.com/design/pentiumiii/sml/24504501.pdf (inst movss x1 (vect-ea mat1 24)) (inst movss (Vect-ea result) x3) - (inst movhpd (vect-ea result 4) x3) + (inst movhps (vect-ea result 4) x3) (inst addps x6 x5) (inst shufps x1 x1 0) diff --git a/test-matrix.lisp b/test-matrix.lisp index c404ccb..a2fcf4f 100644 --- a/test-matrix.lisp +++ b/test-matrix.lisp @@ -26,16 +26,18 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |# (in-package :cl-user) -(declaim (optimize (speed 3) (space 0) (safety 0) (debug 0))) +; On my machine, naive is 30% faster without this; sse is 10% slower. +;(declaim (optimize (speed 3) (space 0) (safety 0) (debug 0))) (defun test-matrix (&optional (test-count 10000000)) (let ((mat1 (make-array 9 :element-type 'single-float :initial-element 0f0)) (mat2 (make-array 9 :element-type 'single-float :initial-element 0f0)) - (naive #()) - (sse #()) + (naive (make-array 9 :element-type 'single-float)) + (sse (make-array 9 :element-type 'single-float)) ) - (declare (type (simple-vector) naive sse) (type fixnum test-count)) + (declare (type (simple-array single-float (9)) naive sse) + (type fixnum test-count)) (loop for i of-type fixnum from 0 below 9 do (setf (aref mat1 i) (float (random 1f6)) (aref mat2 i) (float (random 1f6)))) -- 2.11.4.GIT