From: rlaakso Date: Wed, 17 Aug 2005 14:08:55 +0000 (+0000) Subject: *** empty log message *** X-Git-Url: https://repo.or.cz/w/sb-simd.git/commitdiff_plain/ab6cf415a1824247039d46ea7ee6d2f1eb82e874 *** empty log message *** --- diff --git a/expand-parse-operand-temp-count.lisp b/expand-parse-operand-temp-count.lisp index 4bfaba1..8b5c6ba 100644 --- a/expand-parse-operand-temp-count.lisp +++ b/expand-parse-operand-temp-count.lisp @@ -1,3 +1,29 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# (in-package :sb-c) (setf *parse-vop-operand-count* 1) diff --git a/generate-sse-instructions.lisp b/generate-sse-instructions.lisp index 2bea781..9bb34aa 100644 --- a/generate-sse-instructions.lisp +++ b/generate-sse-instructions.lisp @@ -323,7 +323,7 @@ STMXCSR . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . (format stream "~S~%~%" `(define-instruction ,(intern (symbol-name inst)) (segment dst src) (:emitter - (cond ((sse-register-p dst) + (cond ((xmm-register-p dst) ,@(emit-ops ops-m2r) (emit-ea segment src (reg-tn-encoding dst))) (t ,@(emit-ops ops-r2m) diff --git a/generate-sse-vops.lisp b/generate-sse-vops.lisp index eb1a36e..079d3fb 100644 --- a/generate-sse-vops.lisp +++ b/generate-sse-vops.lisp @@ -132,8 +132,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,(intern (format nil "SIMPLE-ARRAY-~A" type)) fixnum) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 @@ -186,8 +186,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,(intern (format nil "SIMPLE-ARRAY-~A" type2)) fixnum) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 @@ -233,8 +233,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ,(intern (format nil "SIMPLE-ARRAY-~A" type)) fixnum) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 @@ -283,8 +283,8 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (:constant keyword) ) - (:temporary (:sc sse-reg) sse-temp1) - (:temporary (:sc sse-reg) sse-temp2) + (:temporary (:sc xmm-reg) sse-temp1) + (:temporary (:sc xmm-reg) sse-temp2) (:generator 10 diff --git a/load.lisp b/load.lisp index 673a676..6613a4f 100644 --- a/load.lisp +++ b/load.lisp @@ -11,7 +11,7 @@ (load (compile-file "test-matrix.lisp")) )) -(if t +(if nil (progn (load (compile-file "detect-simd.lisp")) (load (compile-file "expand-parse-operand-temp-count.lisp")) @@ -19,4 +19,14 @@ (load (compile-file "sse-seq.lisp")) (load (compile-file "test-seq.lisp")) )) + +(if t + (progn + (load (compile-file "detect-simd.lisp")) + (load (compile-file "sse-moves.lisp")) + (load (compile-file "expand-parse-operand-temp-count.lisp")) + (load (compile-file "timing.lisp")) + (load (compile-file "sse-vector.lisp")) + (load (compile-file "test-vector.lisp")) + )) \ No newline at end of file diff --git a/push-simd-features.lisp b/push-simd-features.lisp index c9cbbb7..468dc16 100644 --- a/push-simd-features.lisp +++ b/push-simd-features.lisp @@ -1,3 +1,29 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# (in-package :sb-vm) (eval-when (:load-toplevel) diff --git a/sbcl-src/patch_against_sbcl_0_9_3 b/sbcl-src/patch_against_sbcl_0_9_3 index 9a04742..72f17d8 100644 --- a/sbcl-src/patch_against_sbcl_0_9_3 +++ b/sbcl-src/patch_against_sbcl_0_9_3 @@ -1,16 +1,160 @@ +diff -x 'CVS*' -Naur src-093/compiler/x86/float.lisp src/compiler/x86/float.lisp +--- src-093/compiler/x86/float.lisp 2005-08-17 16:56:53.996387102 +0300 ++++ src/compiler/x86/float.lisp 2005-08-17 15:04:50.040162831 +0300 +@@ -4308,3 +4308,122 @@ + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) ++ ++ ++;; XMM Moves ++ ++ ++(defun ea-for-xmm-desc (tn) ++ (make-ea :xmmword :base tn ++ :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) ++ ++(defun ea-for-xmm-stack (tn) ++ (make-ea :xmmword :base ebp-tn ++ :disp (- (* (+ (tn-offset tn) ++ 4) ++ n-word-bytes)))) ++ ++(define-move-fun (load-xmm 2) (vop x y) ++ ((xmm-stack) (xmm-reg)) ++ (inst movdqu y (ea-for-xmm-stack x))) ++ ++(define-move-fun (store-xmm 2) (vop x y) ++ ((xmm-reg) (xmm-stack)) ++ (inst movdqu (ea-for-xmm-stack y) x)) ++ ++(define-move-fun (load-xmm-single 2) (vop x y) ++ ((single-stack) (xmm-reg)) ++ (inst movss y (ea-for-sf-stack x))) ++ ++(define-move-fun (store-xmm-single 2) (vop x y) ++ ((xmm-reg) (single-stack)) ++ (inst movss (ea-for-sf-stack y) x)) ++ ++ ++(define-vop (%load-xmm-from-array/single-float) ++ (:policy :fast-safe) ++ (:args (src :scs (descriptor-reg)) ++ (index :scs (unsigned-reg))) ++ (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) ++ (:results (dest :scs (xmm-reg))) ++ (:result-types xmm) ++ (:generator 1 ++ (inst shl index 2) ++ (inst movdqu dest (make-ea :xmmword :base src :index index ++ :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) ++ ++ ++(define-vop (%store-xmm-to-array/single-float) ++ (:policy :fast-safe) ++ (:args (dest :scs (descriptor-reg)) ++ (index :scs (unsigned-reg)) ++ (src :scs (xmm-reg))) ++ (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) ++ (:generator 1 ++ (inst shl index 2) ++ (inst movdqu (make-ea :xmmword :base dest :index index ++ :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) ++ src))) ++ ++ ++(define-vop (xmm-move) ++ (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) ++ (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) ++ (:note "xmm move") ++ (:generator 0 ++ (unless (location= x y) ++ (inst movdqa y x)))) ++ ++(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) ++ ++(define-vop (move-from-xmm) ++ (:args (x :scs (xmm-reg) :to :save)) ++ (:results (y :scs (descriptor-reg))) ++ (:node-var node) ++ (:note "xmm to pointer coercion") ++ (:generator 13 ++ (with-fixed-allocation (y ++ xmm-widetag ++ xmm-size node) ++ (inst movdqu (ea-for-xmm-desc y) x)))) ++ ++(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) ++ ++(define-vop (move-to-xmm) ++ (:args (x :scs (descriptor-reg))) ++ (:results (y :scs (xmm-reg))) ++ (:note "pointer to xmm coercion") ++ (:generator 2 ++ (inst movdqu y (ea-for-xmm-desc x)))) ++ ++(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) ++ ++ ++(define-vop (move-xmm-arg) ++ (:args (x :scs (xmm-reg) :target y) ++ (fp :scs (any-reg) ++ :load-if (not (sc-is y xmm-reg)))) ++ (:results (y)) ++ (:note "xmm argument move") ++ (:generator 6 ++ (sc-case y ++ (xmm-reg ++ (unless (location= x y) ++ (inst movdqa y x))) ++ ++ (xmm-stack ++ (if (= (tn-offset fp) esp-offset) ++ (let* ((offset (* (tn-offset y) n-word-bytes)) ++ (ea (make-ea :xmmword :base fp :disp offset))) ++ (inst movdqu ea x)) ++ ++ (let ((ea (make-ea :xmmword :base fp ++ :disp (- (* (+ (tn-offset y) 4) ++ n-word-bytes))))) ++ (inst movdqu ea x))))))) ++ ++(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) ++ ++(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) ++ ++ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp --- src-093/compiler/x86/insts.lisp 2005-08-05 16:13:29.000000000 +0300 -+++ src/compiler/x86/insts.lisp 2005-08-08 16:30:23.352842152 +0300 -@@ -192,6 +192,8 @@ ++++ src/compiler/x86/insts.lisp 2005-08-16 10:39:07.027823783 +0300 +@@ -39,13 +39,16 @@ + #(ax cx dx bx sp bp si di)) + (defparameter *dword-reg-names* + #(eax ecx edx ebx esp ebp esi edi)) ++(defparameter *xmmword-reg-names* ++ #(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) + + (defun print-reg-with-width (value width stream dstate) + (declare (ignore dstate)) + (princ (aref (ecase width + (:byte *byte-reg-names*) + (:word *word-reg-names*) +- (:dword *dword-reg-names*)) ++ (:dword *dword-reg-names*) ++ (:xmmword *xmmword-reg-names*)) + value) + stream) + ;; XXX plus should do some source-var notes +@@ -192,6 +195,8 @@ (:byte 8) (:word 16) (:dword 32) + (:qword 64) -+ (:dqword 128) ++ (:xmmword 128) (:float 32) (:double 64))) -@@ -671,7 +673,7 @@ +@@ -671,14 +676,14 @@ (defun reg-tn-encoding (tn) (declare (type tn tn)) @@ -19,27 +163,53 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp (let ((offset (tn-offset tn))) (logior (ash (logand offset 1) 2) (ash offset -1)))) -@@ -718,6 +720,8 @@ + + (defstruct (ea (:constructor make-ea (size &key base index scale disp)) + (:copier nil)) +- (size nil :type (member :byte :word :dword)) ++ (size nil :type (member :byte :word :dword :xmmword)) + (base nil :type (or tn null)) + (index nil :type (or tn null)) + (scale 1 :type (member 1 2 4 8)) +@@ -718,6 +723,8 @@ (ecase (sb-name (sc-sb (tn-sc thing))) (registers (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) -+ (sse-registers ++ (xmm-registers + (emit-mod-reg-r/m-byte segment #b11 reg (reg-tn-encoding thing))) (stack ;; Convert stack tns into an index off of EBP. (let ((disp (- (* (1+ (tn-offset thing)) n-word-bytes)))) -@@ -830,6 +834,10 @@ +@@ -830,6 +837,19 @@ (and (tn-p thing) (eq (sb-name (sc-sb (tn-sc thing))) 'registers))) -+(defun sse-register-p (thing) ++(defun xmm-register-p (thing) + (and (tn-p thing) -+ (eq (sb-name (sc-sb (tn-sc thing))) 'sse-registers))) ++ (eq (sb-name (sc-sb (tn-sc thing))) 'xmm-registers) ++ (member (sc-name (tn-sc thing)) *xmmword-sc-names*) ++ t)) ++ ++(defun xmm-ea-p (thing) ++ (typecase thing ++ (ea (eq (ea-size thing) :xmmword)) ++ (tn ++ (and (member (sc-name (tn-sc thing)) *xmmword-sc-names*) t)) ++ (t nil))) + (defun accumulator-p (thing) (and (register-p thing) (= (tn-offset thing) 0))) -@@ -2042,6 +2050,1339 @@ +@@ -859,6 +879,8 @@ + :float) + (#.*double-sc-names* + :double) ++ (#.*xmmword-sc-names* ++ :xmmword) + (t + (error "can't tell the size of ~S ~S" thing (sc-name (tn-sc thing)))))) + (ea +@@ -2042,6 +2064,1419 @@ (:emitter (emit-header-data segment return-pc-header-widetag))) @@ -1203,7 +1373,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 40) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1215,7 +1385,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 40) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) @@ -1226,7 +1396,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 110) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1238,7 +1408,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1250,7 +1420,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 111) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 243) @@ -1262,7 +1432,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1274,7 +1444,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 22) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) @@ -1285,7 +1455,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1297,7 +1467,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 18) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) @@ -1308,7 +1478,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 126) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1320,7 +1490,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 242) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 242) @@ -1332,7 +1502,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 243) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 243) @@ -1344,7 +1514,7 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 102) + (EMIT-BYTE SEGMENT 15) (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 102) @@ -1356,13 +1526,93 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp + (SEGMENT DST SRC) + (:EMITTER + (COND -+ ((SSE-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) ++ ((XMM-REGISTER-P DST) (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 16) + (EMIT-EA SEGMENT SRC (REG-TN-ENCODING DST))) + (T (EMIT-BYTE SEGMENT 15) + (EMIT-BYTE SEGMENT 17) + (EMIT-EA SEGMENT DST (REG-TN-ENCODING SRC)))))) + ++(DEFINE-INSTRUCTION PSLLD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLDQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 7) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSLLW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 6) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRAD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 4) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRAW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 4) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLD-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 114) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLDQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 3) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLQ-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 115) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ ++(DEFINE-INSTRUCTION PSRLW-IB ++ (SEGMENT DST AMOUNT) ++ (:EMITTER (EMIT-BYTE SEGMENT 102) ++ (EMIT-BYTE SEGMENT 15) ++ (EMIT-BYTE SEGMENT 113) ++ (EMIT-EA SEGMENT DST 2) ++ (EMIT-BYTE SEGMENT AMOUNT))) ++ + + +;;; CPUID @@ -1381,14 +1631,14 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/insts.lisp src/compiler/x86/insts.lisp ;;;; FIXME: This section said "added by jrd", which should end up in CREDITS. diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp --- src-093/compiler/x86/vm.lisp 2005-08-05 16:13:29.000000000 +0300 -+++ src/compiler/x86/vm.lisp 2005-08-08 16:32:19.609588299 +0300 ++++ src/compiler/x86/vm.lisp 2005-08-17 13:06:11.717026836 +0300 @@ -21,7 +21,8 @@ (defvar *byte-register-names* (make-array 8 :initial-element nil)) (defvar *word-register-names* (make-array 16 :initial-element nil)) (defvar *dword-register-names* (make-array 16 :initial-element nil)) - (defvar *float-register-names* (make-array 8 :initial-element nil))) + (defvar *float-register-names* (make-array 8 :initial-element nil)) -+ (defvar *dqword-register-names* (make-array 8 :initial-element nil))) ++ (defvar *xmmword-register-names* (make-array 8 :initial-element nil))) (macrolet ((defreg (name offset size) (let ((offset-sym (symbolicate name "-OFFSET")) @@ -1397,15 +1647,15 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp (defregset *float-regs* fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + ;; sse registers -+ (defreg xmm0 0 :dqword) -+ (defreg xmm1 1 :dqword) -+ (defreg xmm2 2 :dqword) -+ (defreg xmm3 3 :dqword) -+ (defreg xmm4 4 :dqword) -+ (defreg xmm5 5 :dqword) -+ (defreg xmm6 6 :dqword) -+ (defreg xmm7 7 :dqword) -+ (defregset *sse-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) ++ (defreg xmm0 0 :xmmword) ++ (defreg xmm1 1 :xmmword) ++ (defreg xmm2 2 :xmmword) ++ (defreg xmm3 3 :xmmword) ++ (defreg xmm4 4 :xmmword) ++ (defreg xmm5 5 :xmmword) ++ (defreg xmm6 6 :xmmword) ++ (defreg xmm7 7 :xmmword) ++ (defregset *xmm-regs* xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7) + ;; registers used to pass arguments ;; @@ -1414,33 +1664,55 @@ diff -x 'CVS*' -Naur src-093/compiler/x86/vm.lisp src/compiler/x86/vm.lisp ;;; the new way: (define-storage-base float-registers :finite :size 8) -+(define-storage-base sse-registers :finite :size 8) ++(define-storage-base xmm-registers :finite :size 8) + (define-storage-base stack :unbounded :size 8) (define-storage-base constant :non-packed) (define-storage-base immediate-constant :non-packed) -@@ -320,6 +334,8 @@ - :save-p t - :alternate-scs (complex-long-stack)) +@@ -186,6 +200,7 @@ + (sap-stack stack) ; System area pointers. + (single-stack stack) ; single-floats + (double-stack stack :element-size 2) ; double-floats. ++ (xmm-stack stack :element-size 4) ; xmm + #!+long-float + (long-stack stack :element-size 3) ; long-floats. + (complex-single-stack stack :element-size 2) ; complex-single-floats +@@ -290,6 +305,12 @@ + :save-p t + :alternate-scs (double-stack)) -+ (sse-reg sse-registers -+ :locations #.*sse-regs*) - ;; a catch or unwind block - (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) - -@@ -337,6 +353,7 @@ ++ ;; non-descriptor XMMs ++ (xmm-reg xmm-registers ++ :locations #.*xmm-regs* ++ :save-p t ++ :alternate-scs (xmm-stack)) ++ + ;; non-descriptor LONG-FLOATs + #!+long-float + (long-reg float-registers +@@ -337,6 +358,7 @@ ;;; These are used to (at least) determine operand size. (defparameter *float-sc-names* '(single-reg)) (defparameter *double-sc-names* '(double-reg double-stack)) -+(defparameter *dqword-sc-names* '(sse-reg)) ++(defparameter *xmmword-sc-names* '(xmm-reg)) ) ; EVAL-WHEN ;;;; miscellaneous TNs for the various registers -@@ -444,6 +461,7 @@ +@@ -359,7 +381,8 @@ + (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) + (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) + (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) +- (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) ++ (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) ++ (def-misc-reg-tns xmm-reg xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) + + ;;; TNs for registers used to pass arguments + (defparameter *register-arg-tns* +@@ -444,6 +467,7 @@ ;; FIXME: Shouldn't this be an ERROR? (format nil "" offset sc-name)))) (float-registers (format nil "FR~D" offset)) -+ (sse-registers (format nil "XMM~D" offset)) ++ (xmm-registers (format nil "XMM~D" offset)) (stack (format nil "S~D" offset)) (constant (format nil "Const~D" offset)) (immediate-constant "Immed") diff --git a/sbcl-src/patch_against_sbcl_0_9_3 b/sbcl-src/patch_against_sbcl_0_9_3__08082005 similarity index 100% copy from sbcl-src/patch_against_sbcl_0_9_3 copy to sbcl-src/patch_against_sbcl_0_9_3__08082005 diff --git a/sbcl-src/src-093/compiler/x86/float.lisp b/sbcl-src/src-093/compiler/x86/float.lisp new file mode 100644 index 0000000..621a1cd --- /dev/null +++ b/sbcl-src/src-093/compiler/x86/float.lisp @@ -0,0 +1,4310 @@ +;;;; floating point support for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(macrolet ((ea-for-xf-desc (tn slot) + `(make-ea + :dword :base ,tn + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) + (defun ea-for-sf-desc (tn) + (ea-for-xf-desc tn single-float-value-slot)) + (defun ea-for-df-desc (tn) + (ea-for-xf-desc tn double-float-value-slot)) + #!+long-float + (defun ea-for-lf-desc (tn) + (ea-for-xf-desc tn long-float-value-slot)) + ;; complex floats + (defun ea-for-csf-real-desc (tn) + (ea-for-xf-desc tn complex-single-float-real-slot)) + (defun ea-for-csf-imag-desc (tn) + (ea-for-xf-desc tn complex-single-float-imag-slot)) + (defun ea-for-cdf-real-desc (tn) + (ea-for-xf-desc tn complex-double-float-real-slot)) + (defun ea-for-cdf-imag-desc (tn) + (ea-for-xf-desc tn complex-double-float-imag-slot)) + #!+long-float + (defun ea-for-clf-real-desc (tn) + (ea-for-xf-desc tn complex-long-float-real-slot)) + #!+long-float + (defun ea-for-clf-imag-desc (tn) + (ea-for-xf-desc tn complex-long-float-imag-slot))) + +(macrolet ((ea-for-xf-stack (tn kind) + `(make-ea + :dword :base ebp-tn + :disp (- (* (+ (tn-offset ,tn) + (ecase ,kind (:single 1) (:double 2) (:long 3))) + n-word-bytes))))) + (defun ea-for-sf-stack (tn) + (ea-for-xf-stack tn :single)) + (defun ea-for-df-stack (tn) + (ea-for-xf-stack tn :double)) + #!+long-float + (defun ea-for-lf-stack (tn) + (ea-for-xf-stack tn :long))) + +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +;;; +;;; Until 2004-03-15, the implementation of this was buggy; it +;;; unconditionally emitted the WAIT instruction. It turns out that +;;; this is the right thing to do anyway; omitting them can lead to +;;; system corruption on conforming code. -- CSR +(defun maybe-fp-wait (node &optional note-next-instruction) + (declare (ignore node)) + #+nil + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base) + `(make-ea + :dword :base ,base + :disp (- (* (+ (tn-offset ,tn) + (* (ecase ,kind + (:single 1) + (:double 2) + (:long 3)) + (ecase ,slot (:real 1) (:imag 2)))) + n-word-bytes))))) + (defun ea-for-csf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :real base)) + (defun ea-for-csf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :imag base)) + (defun ea-for-cdf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :real base)) + (defun ea-for-cdf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :imag base)) + #!+long-float + (defun ea-for-clf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :real base)) + #!+long-float + (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :imag base))) + +;;; Abstract out the copying of a FP register to the FP stack top, and +;;; provide two alternatives for its implementation. Note: it's not +;;; necessary to distinguish between a single or double register move +;;; here. +;;; +;;; Using a Pop then load. +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fstp fr0-tn) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset reg))))) +;;; Using Fxch then Fst to restore the original reg contents. +#+nil +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fxch reg) + (inst fst reg)) + +;;; The x86 can't store a long-float to memory without popping the +;;; stack and marking a register as empty, so it is necessary to +;;; restore the register from memory. +#!+long-float +(defun store-long-float (ea) + (inst fstpl ea) + (inst fldl ea)) + +;;;; move functions + +;;; X is source, Y is destination. +(define-move-fun (load-single 2) (vop x y) + ((single-stack) (single-reg)) + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-stack x)))) + +(define-move-fun (store-single 2) (vop x y) + ((single-reg) (single-stack)) + (cond ((zerop (tn-offset x)) + (inst fst (ea-for-sf-stack y))) + (t + (inst fxch x) + (inst fst (ea-for-sf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +(define-move-fun (load-double 2) (vop x y) + ((double-stack) (double-reg)) + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-stack x)))) + +(define-move-fun (store-double 2) (vop x y) + ((double-reg) (double-stack)) + (cond ((zerop (tn-offset x)) + (inst fstd (ea-for-df-stack y))) + (t + (inst fxch x) + (inst fstd (ea-for-df-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +#!+long-float +(define-move-fun (load-long 2) (vop x y) + ((long-stack) (long-reg)) + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-stack x)))) + +#!+long-float +(define-move-fun (store-long 2) (vop x y) + ((long-reg) (long-stack)) + (cond ((zerop (tn-offset x)) + (store-long-float (ea-for-lf-stack y))) + (t + (inst fxch x) + (store-long-float (ea-for-lf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* + #!+long-float 'long-float #!-long-float 'double-float)) +(define-move-fun (load-fp-constant 2) (vop x y) + ((fp-constant) (single-reg double-reg #!+long-float long-reg)) + (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) + (with-empty-tn@fp-top(y) + (cond ((zerop value) + (inst fldz)) + ((= value 1e0) + (inst fld1)) + ((= value (coerce pi *read-default-float-format*)) + (inst fldpi)) + ((= value (log 10e0 2e0)) + (inst fldl2t)) + ((= value (log 2.718281828459045235360287471352662e0 2e0)) + (inst fldl2e)) + ((= value (log 2e0 10e0)) + (inst fldlg2)) + ((= value (log 2e0 2.718281828459045235360287471352662e0)) + (inst fldln2)) + (t (warn "ignoring bogus i387 constant ~A" value)))))) +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'single-float)) + +;;;; complex float move functions + +(defun complex-single-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (tn-offset x))) +(defun complex-single-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (1+ (tn-offset x)))) + +(defun complex-double-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (tn-offset x))) +(defun complex-double-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (1+ (tn-offset x)))) + +#!+long-float +(defun complex-long-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (tn-offset x))) +#!+long-float +(defun complex-long-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (1+ (tn-offset x)))) + +;;; X is source, Y is destination. +(define-move-fun (load-complex-single 2) (vop x y) + ((complex-single-stack) (complex-single-reg)) + (let ((real-tn (complex-single-reg-real-tn y))) + (with-empty-tn@fp-top (real-tn) + (inst fld (ea-for-csf-real-stack x)))) + (let ((imag-tn (complex-single-reg-imag-tn y))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (ea-for-csf-imag-stack x))))) + +(define-move-fun (store-complex-single 2) (vop x y) + ((complex-single-reg) (complex-single-stack)) + (let ((real-tn (complex-single-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fst (ea-for-csf-real-stack y))) + (t + (inst fxch real-tn) + (inst fst (ea-for-csf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fst (ea-for-csf-imag-stack y)) + (inst fxch imag-tn))) + +(define-move-fun (load-complex-double 2) (vop x y) + ((complex-double-stack) (complex-double-reg)) + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldd (ea-for-cdf-real-stack x)))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldd (ea-for-cdf-imag-stack x))))) + +(define-move-fun (store-complex-double 2) (vop x y) + ((complex-double-reg) (complex-double-stack)) + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fstd (ea-for-cdf-real-stack y))) + (t + (inst fxch real-tn) + (inst fstd (ea-for-cdf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fstd (ea-for-cdf-imag-stack y)) + (inst fxch imag-tn))) + +#!+long-float +(define-move-fun (load-complex-long 2) (vop x y) + ((complex-long-stack) (complex-long-reg)) + (let ((real-tn (complex-long-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldl (ea-for-clf-real-stack x)))) + (let ((imag-tn (complex-long-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldl (ea-for-clf-imag-stack x))))) + +#!+long-float +(define-move-fun (store-complex-long 2) (vop x y) + ((complex-long-reg) (complex-long-stack)) + (let ((real-tn (complex-long-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (store-long-float (ea-for-clf-real-stack y))) + (t + (inst fxch real-tn) + (store-long-float (ea-for-clf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (inst fxch imag-tn) + (store-long-float (ea-for-clf-imag-stack y)) + (inst fxch imag-tn))) + + +;;;; move VOPs + +;;; float register to register moves +(define-vop (float-move) + (:args (x)) + (:results (y)) + (:note "float move") + (:generator 0 + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x)))))) + +(define-vop (single-move float-move) + (:args (x :scs (single-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (single-reg) :load-if (not (location= x y))))) +(define-move-vop single-move :move (single-reg) (single-reg)) + +(define-vop (double-move float-move) + (:args (x :scs (double-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (double-reg) :load-if (not (location= x y))))) +(define-move-vop double-move :move (double-reg) (double-reg)) + +#!+long-float +(define-vop (long-move float-move) + (:args (x :scs (long-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop long-move :move (long-reg) (long-reg)) + +;;; complex float register to register moves +(define-vop (complex-float-move) + (:args (x :target y :load-if (not (location= x y)))) + (:results (y :load-if (not (location= x y)))) + (:note "complex float move") + (:generator 0 + (unless (location= x y) + ;; Note the complex-float-regs are aligned to every second + ;; float register so there is not need to worry about overlap. + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag))))) + +(define-vop (complex-single-move complex-float-move) + (:args (x :scs (complex-single-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))) +(define-move-vop complex-single-move :move + (complex-single-reg) (complex-single-reg)) + +(define-vop (complex-double-move complex-float-move) + (:args (x :scs (complex-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))) +(define-move-vop complex-double-move :move + (complex-double-reg) (complex-double-reg)) + +#!+long-float +(define-vop (complex-long-move complex-float-move) + (:args (x :scs (complex-long-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop complex-long-move :move + (complex-long-reg) (complex-long-reg)) + +;;; Move from float to a descriptor reg. allocating a new float +;;; object in the process. +(define-vop (move-from-single) + (:args (x :scs (single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + single-float-widetag + single-float-size node) + (with-tn@fp-top(x) + (inst fst (ea-for-sf-desc y)))))) +(define-move-vop move-from-single :move + (single-reg) (descriptor-reg)) + +(define-vop (move-from-double) + (:args (x :scs (double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + double-float-widetag + double-float-size + node) + (with-tn@fp-top(x) + (inst fstd (ea-for-df-desc y)))))) +(define-move-vop move-from-double :move + (double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-long) + (:args (x :scs (long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + long-float-widetag + long-float-size + node) + (with-tn@fp-top(x) + (store-long-float (ea-for-lf-desc y)))))) +#!+long-float +(define-move-vop move-from-long :move + (long-reg) (descriptor-reg)) + +(define-vop (move-from-fp-constant) + (:args (x :scs (fp-constant))) + (:results (y :scs (descriptor-reg))) + (:generator 2 + (ecase (sb!c::constant-value (sb!c::tn-leaf x)) + (0f0 (load-symbol-value y *fp-constant-0f0*)) + (1f0 (load-symbol-value y *fp-constant-1f0*)) + (0d0 (load-symbol-value y *fp-constant-0d0*)) + (1d0 (load-symbol-value y *fp-constant-1d0*)) + #!+long-float + (0l0 (load-symbol-value y *fp-constant-0l0*)) + #!+long-float + (1l0 (load-symbol-value y *fp-constant-1l0*)) + #!+long-float + (#.pi (load-symbol-value y *fp-constant-pi*)) + #!+long-float + (#.(log 10l0 2l0) (load-symbol-value y *fp-constant-l2t*)) + #!+long-float + (#.(log 2.718281828459045235360287471352662L0 2l0) + (load-symbol-value y *fp-constant-l2e*)) + #!+long-float + (#.(log 2l0 10l0) (load-symbol-value y *fp-constant-lg2*)) + #!+long-float + (#.(log 2l0 2.718281828459045235360287471352662L0) + (load-symbol-value y *fp-constant-ln2*))))) +(define-move-vop move-from-fp-constant :move + (fp-constant) (descriptor-reg)) + +;;; Move from a descriptor to a float register. +(define-vop (move-to-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (single-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-desc x))))) +(define-move-vop move-to-single :move (descriptor-reg) (single-reg)) + +(define-vop (move-to-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-desc x))))) +(define-move-vop move-to-double :move (descriptor-reg) (double-reg)) + +#!+long-float +(define-vop (move-to-long) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (long-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-desc x))))) +#!+long-float +(define-move-vop move-to-long :move (descriptor-reg) (long-reg)) + +;;; Move from complex float to a descriptor reg. allocating a new +;;; complex float object in the process. +(define-vop (move-from-complex-single) + (:args (x :scs (complex-single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-single-float-widetag + complex-single-float-size + node) + (let ((real-tn (complex-single-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fst (ea-for-csf-real-desc y)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fst (ea-for-csf-imag-desc y))))))) +(define-move-vop move-from-complex-single :move + (complex-single-reg) (descriptor-reg)) + +(define-vop (move-from-complex-double) + (:args (x :scs (complex-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-double-float-widetag + complex-double-float-size + node) + (let ((real-tn (complex-double-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fstd (ea-for-cdf-real-desc y)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fstd (ea-for-cdf-imag-desc y))))))) +(define-move-vop move-from-complex-double :move + (complex-double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-complex-long) + (:args (x :scs (complex-long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-long-float-widetag + complex-long-float-size + node) + (let ((real-tn (complex-long-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (store-long-float (ea-for-clf-real-desc y)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (store-long-float (ea-for-clf-imag-desc y))))))) +#!+long-float +(define-move-vop move-from-complex-long :move + (complex-long-reg) (descriptor-reg)) + +;;; Move from a descriptor to a complex float register. +(macrolet ((frob (name sc format) + `(progn + (define-vop (,name) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (,sc))) + (:note "pointer to complex float coercion") + (:generator 2 + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-real-desc x)))) + (:double '((inst fldd (ea-for-cdf-real-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-real-desc x))))))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-imag-desc x)))) + (:double '((inst fldd (ea-for-cdf-imag-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-imag-desc x))))))))) + (define-move-vop ,name :move (descriptor-reg) (,sc))))) + (frob move-to-complex-single complex-single-reg :single) + (frob move-to-complex-double complex-double-reg :double) + #!+long-float + (frob move-to-complex-double complex-long-reg :long)) + +;;;; the move argument vops +;;;; +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. + +;;; the general MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "float argument move") + (:generator ,(case format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x))))) + (,stack-sc + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :dword :base fp :disp offset))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea)))))) + (let ((ea (make-ea + :dword :base fp + :disp (- (* (+ (tn-offset y) + ,(case format + (:single 1) + (:double 2) + (:long 3))) + n-word-bytes))))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea))))))))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-single-float-arg single-reg single-stack :single) + (frob move-double-float-arg double-reg double-stack :double) + #!+long-float + (frob move-long-float-arg long-reg long-stack :long)) + +;;;; complex float MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "complex float argument move") + (:generator ,(ecase format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag)))) + (,stack-sc + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp)))))) + (t + (inst fxch real-tn) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp))))) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + ,@(ecase format + (:single + '((inst fst (ea-for-csf-imag-stack y fp)))) + (:double + '((inst fstd (ea-for-cdf-imag-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-imag-stack y fp))))) + (inst fxch imag-tn)))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-complex-single-float-arg + complex-single-reg complex-single-stack :single) + (frob move-complex-double-float-arg + complex-double-reg complex-double-stack :double) + #!+long-float + (frob move-complex-long-float-arg + complex-long-reg complex-long-stack :long)) + +(define-move-vop move-arg :move-arg + (single-reg double-reg #!+long-float long-reg + complex-single-reg complex-double-reg #!+long-float complex-long-reg) + (descriptor-reg)) + + +;;;; arithmetic VOPs + +;;; dtc: the floating point arithmetic vops +;;; +;;; Note: Although these can accept x and y on the stack or pointed to +;;; from a descriptor register, they will work with register loading +;;; without these. Same deal with the result - it need only be a +;;; register. When load-tns are needed they will probably be in ST0 +;;; and the code below should be able to correctly handle all cases. +;;; +;;; However it seems to produce better code if all arg. and result +;;; options are used; on the P86 there is no extra cost in using a +;;; memory operand to the FP instructions - not so on the PPro. +;;; +;;; It may also be useful to handle constant args? +;;; +;;; 22-Jul-97: descriptor args lose in some simple cases when +;;; a function result computed in a loop. Then Python insists +;;; on consing the intermediate values! For example +;;; +;;; (defun test(a n) +;;; (declare (type (simple-array double-float (*)) a) +;;; (fixnum n)) +;;; (let ((sum 0d0)) +;;; (declare (type double-float sum)) +;;; (dotimes (i n) +;;; (incf sum (* (aref a i)(aref a i)))) +;;; sum)) +;;; +;;; So, disabling descriptor args until this can be fixed elsewhere. +(macrolet + ((frob (op fop-sti fopr-sti + fop fopr sname scost + fopd foprd dname dcost + lname lcost) + #!-long-float (declare (ignore lcost lname)) + `(progn + (define-vop (,sname) + (:translate ,op) + (:args (x :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval) + (y :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc single-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (single-reg single-stack))) + (:arg-types single-float single-float) + (:result-types single-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,scost + ;; Handle a few special cases + (cond + ;; x, y, and r are the same register. + ((and (sc-is x single-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch r) + (inst ,fop fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x single-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (single-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fop y)) + (single-stack + ;; ST(0) = ST(0) op Mem + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + (t + ;; y to ST0 + (sc-case y + (single-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y single-stack) + (inst fld (ea-for-sf-stack y)) + (inst fld (ea-for-sf-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y single-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (single-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,fopr x)) + (single-stack + ;; ST(0) = Mem op ST(0) + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x single-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + ;; y is in ST0 + ((and (sc-is y single-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (single-reg + (inst ,fopr x)) + (single-stack + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (copy-fp-reg-to-fr0 x)) + (single-stack + (inst fstp fr0) + (inst fld (ea-for-sf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fld (ea-for-sf-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (single-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (single-stack + (inst fst (ea-for-sf-stack r)))))))) + + (define-vop (,dname) + (:translate ,op) + (:args (x :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval) + (y :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc double-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (double-reg double-stack))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,dcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (sc-is x double-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x double-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (double-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (double-stack + ;; ST(0) = ST(0) op Mem + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + (t + ;; y to ST0 + (sc-case y + (double-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y double-stack) + (inst fldd (ea-for-df-stack y)) + (inst fldd (ea-for-df-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y double-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (double-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (double-stack + ;; ST(0) = Mem op ST(0) + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + ;; y is in ST0 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (double-reg + (inst ,foprd x)) + (double-stack + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (double-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (double-stack + (inst fstd (ea-for-df-stack r)))))))) + + #!+long-float + (define-vop (,lname) + (:translate ,op) + (:args (x :scs (long-reg) :to :eval) + (y :scs (long-reg) :to :eval)) + (:temporary (:sc long-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,lcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((location= x r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (t + ;; y to ST0 + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y)) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((location= y r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (t + ;; x to ST0 + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0. + ((zerop (tn-offset x)) + ;; ST0 = ST0 op y + (inst ,fopd y)) + ;; y is in ST0 + ((zerop (tn-offset y)) + ;; ST0 = x op ST0 + (inst ,foprd x)) + (t + ;; x to ST0 + (copy-fp-reg-to-fr0 x) + ;; ST0 = ST0 op y + (inst ,fopd y))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))))))))) + + (frob + fadd-sti fadd-sti + fadd fadd +/single-float 2 + faddd faddd +/double-float 2 + +/long-float 2) + (frob - fsub-sti fsubr-sti + fsub fsubr -/single-float 2 + fsubd fsubrd -/double-float 2 + -/long-float 2) + (frob * fmul-sti fmul-sti + fmul fmul */single-float 3 + fmuld fmuld */double-float 3 + */long-float 3) + (frob / fdiv-sti fdivr-sti + fdiv fdivr //single-float 12 + fdivd fdivrd //double-float 12 + //long-float 12)) + +(macrolet ((frob (name inst translate sc type) + `(define-vop (,name) + (:args (x :scs (,sc) :target fr0)) + (:results (y :scs (,sc))) + (:translate ,translate) + (:policy :fast-safe) + (:arg-types ,type) + (:result-types ,type) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. + (unless (zerop (tn-offset y)) + (inst fst y)))))) + + (frob abs/single-float fabs abs single-reg single-float) + (frob abs/double-float fabs abs double-reg double-float) + #!+long-float + (frob abs/long-float fabs abs long-reg long-float) + (frob %negate/single-float fchs %negate single-reg single-float) + (frob %negate/double-float fchs %negate double-reg double-float) + #!+long-float + (frob %negate/long-float fchs %negate long-reg long-float)) + +;;;; comparison + +(define-vop (=/float) + (:args (x) (y)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0; y is in any reg. + ((zerop (tn-offset x)) + (inst fucom y)) + ;; y is in ST0; x is in another reg. + ((zerop (tn-offset y)) + (inst fucom x)) + ;; x and y are the same register, not ST0 + ((location= x y) + (inst fxch x) + (inst fucom fr0-tn) + (inst fxch x)) + ;; x and y are different registers, neither ST0. + (t + (inst fxch x) + (inst fucom y) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (inst cmp ah-tn #x40) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=/single-float =/float) + (:translate =) + (:args (x :scs (single-reg)) + (y :scs (single-reg))) + (:arg-types single-float single-float)) + +(define-vop (=/double-float =/float) + (:translate =) + (:args (x :scs (double-reg)) + (y :scs (double-reg))) + (:arg-types double-float double-float)) + +#!+long-float +(define-vop (=/long-float =/float) + (:translate =) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float)) + +(define-vop (single-float) + (:translate >) + (:args (x :scs (single-reg single-stack descriptor-reg)) + (y :scs (single-reg single-stack descriptor-reg))) + (:arg-types single-float single-float) + (:temporary (:sc single-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y single-reg) (zerop (tn-offset y))) + (sc-case x + (single-reg + (inst fcom x)) + ((single-stack descriptor-reg) + (if (sc-is x single-stack) + (inst fcom (ea-for-sf-stack x)) + (inst fcom (ea-for-sf-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + (sc-case y + (single-reg + (inst fcom y)) + ((single-stack descriptor-reg) + (if (sc-is y single-stack) + (inst fcom (ea-for-sf-stack y)) + (inst fcom (ea-for-sf-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (>double-float) + (:translate >) + (:args (x :scs (double-reg double-stack descriptor-reg)) + (y :scs (double-reg double-stack descriptor-reg))) + (:arg-types double-float double-float) + (:temporary (:sc double-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (sc-case x + (double-reg + (inst fcomd x)) + ((double-stack descriptor-reg) + (if (sc-is x double-stack) + (inst fcomd (ea-for-df-stack x)) + (inst fcomd (ea-for-df-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (sc-case y + (double-reg + (inst fcomd y)) + ((double-stack descriptor-reg) + (if (sc-is y double-stack) + (inst fcomd (ea-for-df-stack y)) + (inst fcomd (ea-for-df-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +#!+long-float +(define-vop (>long-float) + (:translate >) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (cond + ;; y is in ST0; x is in any reg. + ((zerop (tn-offset y)) + (inst fcomd x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + ;; x is in ST0; y is in another reg. + ((zerop (tn-offset x)) + (inst fcomd y) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45)) + ;; y and x are the same register, not ST0 + ;; y and x are different registers, neither ST0. + (t + (inst fxch x) + (inst fcomd y) + (inst fxch x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +;;; Comparisons with 0 can use the FTST instruction. + +(define-vop (float-test) + (:args (x)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p y) + (:variant-vars code) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp y) + (:generator 2 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0 + ((zerop (tn-offset x)) + (inst ftst)) + ;; x not ST0 + (t + (inst fxch x) + (inst ftst) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (unless (zerop code) + (inst cmp ah-tn code)) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=0/single-float float-test) + (:translate =) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x40)) +(define-vop (=0/double-float float-test) + (:translate =) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x40)) +#!+long-float +(define-vop (=0/long-float float-test) + (:translate =) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x40)) + +(define-vop (<0/single-float float-test) + (:translate <) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x01)) +(define-vop (<0/double-float float-test) + (:translate <) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x01)) +#!+long-float +(define-vop (<0/long-float float-test) + (:translate <) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x01)) + +(define-vop (>0/single-float float-test) + (:translate >) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x00)) +(define-vop (>0/double-float float-test) + (:translate >) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x00)) +#!+long-float +(define-vop (>0/long-float float-test) + (:translate >) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x00)) + +#!+long-float +(deftransform eql ((x y) (long-float long-float)) + `(and (= (long-float-low-bits x) (long-float-low-bits y)) + (= (long-float-high-bits x) (long-float-high-bits y)) + (= (long-float-exp-bits x) (long-float-exp-bits y)))) + +;;;; conversion + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc signed-stack) temp) + (:results (y :scs (,to-sc))) + (:arg-types signed-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case x + (signed-reg + (inst mov temp x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild temp))) + (signed-stack + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild x)))))))) + (frob %single-float/signed %single-float single-reg single-float) + (frob %double-float/signed %double-float double-reg double-float) + #!+long-float + (frob %long-float/signed %long-float long-reg long-float)) + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (unsigned-reg))) + (:results (y :scs (,to-sc))) + (:arg-types unsigned-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 6 + (inst push 0) + (inst push x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fildl (make-ea :dword :base esp-tn))) + (inst add esp-tn 8))))) + (frob %single-float/unsigned %single-float single-reg single-float) + (frob %double-float/unsigned %double-float double-reg double-float) + #!+long-float + (frob %long-float/unsigned %long-float long-reg long-float)) + +;;; These should be no-ops but the compiler might want to move some +;;; things around. +(macrolet ((frob (name translate from-sc from-type to-sc to-type) + `(define-vop (,name) + (:args (x :scs (,from-sc) :target y)) + (:results (y :scs (,to-sc))) + (:arg-types ,from-type) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 2 + (note-this-location vop :internal-error) + (unless (location= x y) + (cond + ((zerop (tn-offset x)) + ;; x is in ST0, y is in another reg. not ST0 + (inst fst y)) + ((zerop (tn-offset y)) + ;; y is in ST0, x is in another reg. not ST0 + (copy-fp-reg-to-fr0 x)) + (t + ;; Neither x or y are in ST0, and they are not in + ;; the same reg. + (inst fxch x) + (inst fst y) + (inst fxch x)))))))) + + (frob %single-float/double-float %single-float double-reg + double-float single-reg single-float) + #!+long-float + (frob %single-float/long-float %single-float long-reg + long-float single-reg single-float) + (frob %double-float/single-float %double-float single-reg single-float + double-reg double-float) + #!+long-float + (frob %double-float/long-float %double-float long-reg long-float + double-reg double-float) + #!+long-float + (frob %long-float/single-float %long-float single-reg single-float + long-reg long-float) + #!+long-float + (frob %long-float/double-float %long-float double-reg double-float + long-reg long-float)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type)) + (:args (x :scs (,from-sc))) + (:temporary (:sc signed-stack) stack-temp) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (signed-reg))) + (:arg-types ,from-type) + (:result-types signed-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + (,(if round-p 'progn 'pseudo-atomic) + ;; Normal mode (for now) is "round to best". + (with-tn@fp-top (x) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (sc-case y + (signed-stack + (inst fist y)) + (signed-reg + (inst fist stack-temp) + (inst mov y stack-temp))) + ,@(unless round-p + '((inst fldcw scw))))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type "=>UNSIGNED")) + (:args (x :scs (,from-sc) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) stack-temp) + (:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (unsigned-reg))) + (:arg-types ,from-type) + (:result-types unsigned-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + ;; Normal mode (for now) is "round to best". + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (inst sub esp-tn 8) + (inst fistpl (make-ea :dword :base esp-tn)) + (inst pop y) + (inst fld fr0) ; copy fr0 to at least restore stack. + (inst add esp-tn 4) + ,@(unless round-p + '((inst fldcw scw))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(define-vop (make-single-float) + (:args (bits :scs (signed-reg) :target res + :load-if (not (or (and (sc-is bits signed-stack) + (sc-is res single-reg)) + (and (sc-is bits signed-stack) + (sc-is res single-stack) + (location= bits res)))))) + (:results (res :scs (single-reg single-stack))) + (:temporary (:sc signed-stack) stack-temp) + (:arg-types signed-num) + (:result-types single-float) + (:translate make-single-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case res + (single-stack + (sc-case bits + (signed-reg + (inst mov res bits)) + (signed-stack + (aver (location= bits res))))) + (single-reg + (sc-case bits + (signed-reg + ;; source must be in memory + (inst mov stack-temp bits) + (with-empty-tn@fp-top(res) + (inst fld stack-temp))) + (signed-stack + (with-empty-tn@fp-top(res) + (inst fld bits)))))))) + +(define-vop (make-double-float) + (:args (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (double-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types signed-num unsigned-num) + (:result-types double-float) + (:translate make-double-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 2 + (let ((offset (1+ (tn-offset temp)))) + (storew hi-bits ebp-tn (- offset)) + (storew lo-bits ebp-tn (- (1+ offset))) + (with-empty-tn@fp-top(res) + (inst fldd (make-ea :dword :base ebp-tn + :disp (- (* (1+ offset) n-word-bytes)))))))) + +#!+long-float +(define-vop (make-long-float) + (:args (exp-bits :scs (signed-reg)) + (hi-bits :scs (unsigned-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (long-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types signed-num unsigned-num unsigned-num) + (:result-types long-float) + (:translate make-long-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 3 + (let ((offset (1+ (tn-offset temp)))) + (storew exp-bits ebp-tn (- offset)) + (storew hi-bits ebp-tn (- (1+ offset))) + (storew lo-bits ebp-tn (- (+ offset 2))) + (with-empty-tn@fp-top(res) + (inst fldl (make-ea :dword :base ebp-tn + :disp (- (* (+ offset 2) n-word-bytes)))))))) + +(define-vop (single-float-bits) + (:args (float :scs (single-reg descriptor-reg) + :load-if (not (sc-is float single-stack)))) + (:results (bits :scs (signed-reg))) + (:temporary (:sc signed-stack :from :argument :to :result) stack-temp) + (:arg-types single-float) + (:result-types signed-num) + (:translate single-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case bits + (signed-reg + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst stack-temp) + (inst mov bits stack-temp))) + (single-stack + (inst mov bits float)) + (descriptor-reg + (loadw + bits float single-float-value-slot + other-pointer-lowtag)))) + (signed-stack + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst bits)))))))) + +(define-vop (double-float-high-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (hi-bits :scs (signed-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types signed-num) + (:translate double-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw hi-bits ebp-tn (- (1+ (tn-offset temp))))) + (double-stack + (loadw hi-bits ebp-tn (- (1+ (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + other-pointer-lowtag))))) + +(define-vop (double-float-low-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types unsigned-num) + (:translate double-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset temp))))) + (double-stack + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw lo-bits float double-float-value-slot + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-exp-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (exp-bits :scs (signed-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types signed-num) + (:translate long-float-exp-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset temp))) n-word-bytes)))) + (long-stack + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset float))) n-word-bytes)))) + (descriptor-reg + (inst movsx exp-bits + (make-ea :word :base float + :disp (- (* (+ 2 long-float-value-slot) + n-word-bytes) + other-pointer-lowtag))))))) + +#!+long-float +(define-vop (long-float-high-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (hi-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw hi-bits ebp-tn (- (+ (tn-offset temp) 2)))) + (long-stack + (loadw hi-bits ebp-tn (- (+ (tn-offset float) 2)))) + (descriptor-reg + (loadw hi-bits float (1+ long-float-value-slot) + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-low-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw lo-bits ebp-tn (- (+ (tn-offset temp) 3)))) + (long-stack + (loadw lo-bits ebp-tn (- (+ (tn-offset float) 3)))) + (descriptor-reg + (loadw lo-bits float long-float-value-slot + other-pointer-lowtag))))) + +;;;; float mode hackery + +(sb!xc:deftype float-modes () '(unsigned-byte 32)) ; really only 16 +(defknown floating-point-modes () float-modes (flushable)) +(defknown ((setf floating-point-modes)) (float-modes) + float-modes) + +(def!constant npx-env-size (* 7 n-word-bytes)) +(def!constant npx-cw-offset 0) +(def!constant npx-sw-offset 4) + +(define-vop (floating-point-modes) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset :target res + :to :result) eax) + (:generator 8 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions + (inst fstenv (make-ea :dword :base esp-tn)) ; masks all exceptions + (inst fldenv (make-ea :dword :base esp-tn)) ; Restore previous state. + ;; Move current status to high word. + (inst mov eax (make-ea :dword :base esp-tn :disp (- npx-sw-offset 2))) + ;; Move exception mask to low word. + (inst mov ax-tn (make-ea :word :base esp-tn :disp npx-cw-offset)) + (inst add esp-tn npx-env-size) ; Pop stack. + (inst xor eax #x3f) ; Flip exception mask to trap enable bits. + (move res eax))) + +(define-vop (set-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:results (res :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:result-types unsigned-num) + (:translate (setf floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:generator 3 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions. + (inst fstenv (make-ea :dword :base esp-tn)) + (inst mov eax new) + (inst xor eax #x3f) ; Turn trap enable bits into exception mask. + (inst mov (make-ea :word :base esp-tn :disp npx-cw-offset) ax-tn) + (inst shr eax 16) ; position status word + (inst mov (make-ea :word :base esp-tn :disp npx-sw-offset) ax-tn) + (inst fldenv (make-ea :dword :base esp-tn)) + (inst add esp-tn npx-env-size) ; Pop stack. + (move res new))) + +#!-long-float +(progn + +;;; Let's use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of fsin and fcos that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; KLUDGE: these versions of fsin, fcos, and ftan simply load a 0.0 +;;; result if the argument is out of range 2^63 and would thus be +;;; hopelessly inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; %exp that handles the following special cases: exp(+Inf) is +Inf; +;;; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (double-reg))) + (:arg-types double-float signed-num) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (double-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (double-reg) :to :result)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p with limited x range function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 4 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr1) + (y :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y)) + (sc-is x double-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (zerop (tn-offset x))) + ;; copy x to fr1 + (inst fst fr1)) + ;; y in fr0; x not in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))) + ;; Load y to fr0 + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) +) ; PROGN #!-LONG-FLOAT + +#!+long-float +(progn + +;;; Lets use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of FSIN and FCOS that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if +;;; the argument is out of range 2^63 and would thus be hopelessly +;;; inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fldz) ; Load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; Modified exp that handles the following special cases: +;;; exp(+Inf) is +Inf; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (long-reg))) + (:arg-types long-float signed-num) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (long-reg) :to :result)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + ;; FIXME 1: This appears to be the second DEFINE-VOP of FLOG1P. + ;; Perhaps this is OK because of the #!+LONG-FLOAT wrapped around + ;; an enormous PROGN above. Still, it would be probably be good to + ;; add some code to warn about redefining VOPs. + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p function") + (:generator 5 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr1) + (y :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y)) + (sc-is x long-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; y in fr0; x not in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))) + ;; Load y to fr0 + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +) ; PROGN #!+LONG-FLOAT + +;;;; complex float VOPs + +(define-vop (make-complex-single-float) + (:translate complex) + (:args (real :scs (single-reg) :to :result :target r + :load-if (not (location= real r))) + (imag :scs (single-reg) :to :save)) + (:arg-types single-float single-float) + (:results (r :scs (complex-single-reg) :from (:argument 0) + :load-if (not (sc-is r complex-single-stack)))) + (:result-types complex-single-float) + (:note "inline complex single-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-single-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-single-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fst (ea-for-csf-real-stack r))) + (t + (inst fxch real) + (inst fst (ea-for-csf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fst (ea-for-csf-imag-stack r)) + (inst fxch imag))))) + +(define-vop (make-complex-double-float) + (:translate complex) + (:args (real :scs (double-reg) :target r + :load-if (not (location= real r))) + (imag :scs (double-reg) :to :save)) + (:arg-types double-float double-float) + (:results (r :scs (complex-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-stack)))) + (:result-types complex-double-float) + (:note "inline complex double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-double-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fstd (ea-for-cdf-real-stack r))) + (t + (inst fxch real) + (inst fstd (ea-for-cdf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fstd (ea-for-cdf-imag-stack r)) + (inst fxch imag))))) + +#!+long-float +(define-vop (make-complex-long-float) + (:translate complex) + (:args (real :scs (long-reg) :target r + :load-if (not (location= real r))) + (imag :scs (long-reg) :to :save)) + (:arg-types long-float long-float) + (:results (r :scs (complex-long-reg) :from (:argument 0) + :load-if (not (sc-is r complex-long-stack)))) + (:result-types complex-long-float) + (:note "inline complex long-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-long-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-long-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (store-long-float (ea-for-clf-real-stack r))) + (t + (inst fxch real) + (store-long-float (ea-for-clf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (store-long-float (ea-for-clf-imag-stack r)) + (inst fxch imag))))) + + +(define-vop (complex-float-value) + (:args (x :target r)) + (:results (r)) + (:variant-vars offset) + (:policy :fast-safe) + (:generator 3 + (cond ((sc-is x complex-single-reg complex-double-reg + #!+long-float complex-long-reg) + (let ((value-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (+ offset (tn-offset x))))) + (unless (location= value-tn r) + (cond ((zerop (tn-offset r)) + (copy-fp-reg-to-fr0 value-tn)) + ((zerop (tn-offset value-tn)) + (inst fstd r)) + (t + (inst fxch value-tn) + (inst fstd r) + (inst fxch value-tn)))))) + ((sc-is r single-reg) + (let ((ea (sc-case x + (complex-single-stack + (ecase offset + (0 (ea-for-csf-real-stack x)) + (1 (ea-for-csf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-csf-real-desc x)) + (1 (ea-for-csf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fld ea)))) + ((sc-is r double-reg) + (let ((ea (sc-case x + (complex-double-stack + (ecase offset + (0 (ea-for-cdf-real-stack x)) + (1 (ea-for-cdf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-cdf-real-desc x)) + (1 (ea-for-cdf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldd ea)))) + #!+long-float + ((sc-is r long-reg) + (let ((ea (sc-case x + (complex-long-stack + (ecase offset + (0 (ea-for-clf-real-stack x)) + (1 (ea-for-clf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-clf-real-desc x)) + (1 (ea-for-clf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldl ea)))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) + +(define-vop (realpart/complex-single-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (realpart/complex-double-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float realpart") + (:variant 0)) + +#!+long-float +(define-vop (realpart/complex-long-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (imagpart/complex-single-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float imagpart") + (:variant 1)) + +(define-vop (imagpart/complex-double-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float imagpart") + (:variant 1)) + +#!+long-float +(define-vop (imagpart/complex-long-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float imagpart") + (:variant 1)) + +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations +(defknown double-float-reg-bias (double-float) (values)) +(define-vop (double-float-reg-bias) + (:translate double-float-reg-bias) + (:args (x :scs (double-reg double-stack) :load-if nil)) + (:arg-types double-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) +(defknown single-float-reg-bias (single-float) (values)) +(define-vop (single-float-reg-bias) + (:translate single-float-reg-bias) + (:args (x :scs (single-reg single-stack) :load-if nil)) + (:arg-types single-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) diff --git a/sbcl-src/src/compiler/x86/float.lisp b/sbcl-src/src/compiler/x86/float.lisp new file mode 100644 index 0000000..11976c3 --- /dev/null +++ b/sbcl-src/src/compiler/x86/float.lisp @@ -0,0 +1,4429 @@ +;;;; floating point support for the x86 + +;;;; This software is part of the SBCL system. See the README file for +;;;; more information. +;;;; +;;;; This software is derived from the CMU CL system, which was +;;;; written at Carnegie Mellon University and released into the +;;;; public domain. The software is in the public domain and is +;;;; provided with absolutely no warranty. See the COPYING and CREDITS +;;;; files for more information. + +(in-package "SB!VM") + +(macrolet ((ea-for-xf-desc (tn slot) + `(make-ea + :dword :base ,tn + :disp (- (* ,slot n-word-bytes) + other-pointer-lowtag)))) + (defun ea-for-sf-desc (tn) + (ea-for-xf-desc tn single-float-value-slot)) + (defun ea-for-df-desc (tn) + (ea-for-xf-desc tn double-float-value-slot)) + #!+long-float + (defun ea-for-lf-desc (tn) + (ea-for-xf-desc tn long-float-value-slot)) + ;; complex floats + (defun ea-for-csf-real-desc (tn) + (ea-for-xf-desc tn complex-single-float-real-slot)) + (defun ea-for-csf-imag-desc (tn) + (ea-for-xf-desc tn complex-single-float-imag-slot)) + (defun ea-for-cdf-real-desc (tn) + (ea-for-xf-desc tn complex-double-float-real-slot)) + (defun ea-for-cdf-imag-desc (tn) + (ea-for-xf-desc tn complex-double-float-imag-slot)) + #!+long-float + (defun ea-for-clf-real-desc (tn) + (ea-for-xf-desc tn complex-long-float-real-slot)) + #!+long-float + (defun ea-for-clf-imag-desc (tn) + (ea-for-xf-desc tn complex-long-float-imag-slot))) + +(macrolet ((ea-for-xf-stack (tn kind) + `(make-ea + :dword :base ebp-tn + :disp (- (* (+ (tn-offset ,tn) + (ecase ,kind (:single 1) (:double 2) (:long 3))) + n-word-bytes))))) + (defun ea-for-sf-stack (tn) + (ea-for-xf-stack tn :single)) + (defun ea-for-df-stack (tn) + (ea-for-xf-stack tn :double)) + #!+long-float + (defun ea-for-lf-stack (tn) + (ea-for-xf-stack tn :long))) + +;;; Telling the FPU to wait is required in order to make signals occur +;;; at the expected place, but naturally slows things down. +;;; +;;; NODE is the node whose compilation policy controls the decision +;;; whether to just blast through carelessly or carefully emit wait +;;; instructions and whatnot. +;;; +;;; NOTE-NEXT-INSTRUCTION, if supplied, is to be passed to +;;; #'NOTE-NEXT-INSTRUCTION. +;;; +;;; Until 2004-03-15, the implementation of this was buggy; it +;;; unconditionally emitted the WAIT instruction. It turns out that +;;; this is the right thing to do anyway; omitting them can lead to +;;; system corruption on conforming code. -- CSR +(defun maybe-fp-wait (node &optional note-next-instruction) + (declare (ignore node)) + #+nil + (when (policy node (or (= debug 3) (> safety speed)))) + (when note-next-instruction + (note-next-instruction note-next-instruction :internal-error)) + (inst wait)) + +;;; complex float stack EAs +(macrolet ((ea-for-cxf-stack (tn kind slot &optional base) + `(make-ea + :dword :base ,base + :disp (- (* (+ (tn-offset ,tn) + (* (ecase ,kind + (:single 1) + (:double 2) + (:long 3)) + (ecase ,slot (:real 1) (:imag 2)))) + n-word-bytes))))) + (defun ea-for-csf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :real base)) + (defun ea-for-csf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :single :imag base)) + (defun ea-for-cdf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :real base)) + (defun ea-for-cdf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :double :imag base)) + #!+long-float + (defun ea-for-clf-real-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :real base)) + #!+long-float + (defun ea-for-clf-imag-stack (tn &optional (base ebp-tn)) + (ea-for-cxf-stack tn :long :imag base))) + +;;; Abstract out the copying of a FP register to the FP stack top, and +;;; provide two alternatives for its implementation. Note: it's not +;;; necessary to distinguish between a single or double register move +;;; here. +;;; +;;; Using a Pop then load. +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fstp fr0-tn) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset reg))))) +;;; Using Fxch then Fst to restore the original reg contents. +#+nil +(defun copy-fp-reg-to-fr0 (reg) + (aver (not (zerop (tn-offset reg)))) + (inst fxch reg) + (inst fst reg)) + +;;; The x86 can't store a long-float to memory without popping the +;;; stack and marking a register as empty, so it is necessary to +;;; restore the register from memory. +#!+long-float +(defun store-long-float (ea) + (inst fstpl ea) + (inst fldl ea)) + +;;;; move functions + +;;; X is source, Y is destination. +(define-move-fun (load-single 2) (vop x y) + ((single-stack) (single-reg)) + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-stack x)))) + +(define-move-fun (store-single 2) (vop x y) + ((single-reg) (single-stack)) + (cond ((zerop (tn-offset x)) + (inst fst (ea-for-sf-stack y))) + (t + (inst fxch x) + (inst fst (ea-for-sf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +(define-move-fun (load-double 2) (vop x y) + ((double-stack) (double-reg)) + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-stack x)))) + +(define-move-fun (store-double 2) (vop x y) + ((double-reg) (double-stack)) + (cond ((zerop (tn-offset x)) + (inst fstd (ea-for-df-stack y))) + (t + (inst fxch x) + (inst fstd (ea-for-df-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +#!+long-float +(define-move-fun (load-long 2) (vop x y) + ((long-stack) (long-reg)) + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-stack x)))) + +#!+long-float +(define-move-fun (store-long 2) (vop x y) + ((long-reg) (long-stack)) + (cond ((zerop (tn-offset x)) + (store-long-float (ea-for-lf-stack y))) + (t + (inst fxch x) + (store-long-float (ea-for-lf-stack y)) + ;; This may not be necessary as ST0 is likely invalid now. + (inst fxch x)))) + +;;; The i387 has instructions to load some useful constants. This +;;; doesn't save much time but might cut down on memory access and +;;; reduce the size of the constant vector (CV). Intel claims they are +;;; stored in a more precise form on chip. Anyhow, might as well use +;;; the feature. It can be turned off by hacking the +;;; "immediate-constant-sc" in vm.lisp. +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* + #!+long-float 'long-float #!-long-float 'double-float)) +(define-move-fun (load-fp-constant 2) (vop x y) + ((fp-constant) (single-reg double-reg #!+long-float long-reg)) + (let ((value (sb!c::constant-value (sb!c::tn-leaf x)))) + (with-empty-tn@fp-top(y) + (cond ((zerop value) + (inst fldz)) + ((= value 1e0) + (inst fld1)) + ((= value (coerce pi *read-default-float-format*)) + (inst fldpi)) + ((= value (log 10e0 2e0)) + (inst fldl2t)) + ((= value (log 2.718281828459045235360287471352662e0 2e0)) + (inst fldl2e)) + ((= value (log 2e0 10e0)) + (inst fldlg2)) + ((= value (log 2e0 2.718281828459045235360287471352662e0)) + (inst fldln2)) + (t (warn "ignoring bogus i387 constant ~A" value)))))) +(eval-when (:compile-toplevel :execute) + (setf *read-default-float-format* 'single-float)) + +;;;; complex float move functions + +(defun complex-single-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (tn-offset x))) +(defun complex-single-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'single-reg) + :offset (1+ (tn-offset x)))) + +(defun complex-double-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (tn-offset x))) +(defun complex-double-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'double-reg) + :offset (1+ (tn-offset x)))) + +#!+long-float +(defun complex-long-reg-real-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (tn-offset x))) +#!+long-float +(defun complex-long-reg-imag-tn (x) + (make-random-tn :kind :normal :sc (sc-or-lose 'long-reg) + :offset (1+ (tn-offset x)))) + +;;; X is source, Y is destination. +(define-move-fun (load-complex-single 2) (vop x y) + ((complex-single-stack) (complex-single-reg)) + (let ((real-tn (complex-single-reg-real-tn y))) + (with-empty-tn@fp-top (real-tn) + (inst fld (ea-for-csf-real-stack x)))) + (let ((imag-tn (complex-single-reg-imag-tn y))) + (with-empty-tn@fp-top (imag-tn) + (inst fld (ea-for-csf-imag-stack x))))) + +(define-move-fun (store-complex-single 2) (vop x y) + ((complex-single-reg) (complex-single-stack)) + (let ((real-tn (complex-single-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fst (ea-for-csf-real-stack y))) + (t + (inst fxch real-tn) + (inst fst (ea-for-csf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fst (ea-for-csf-imag-stack y)) + (inst fxch imag-tn))) + +(define-move-fun (load-complex-double 2) (vop x y) + ((complex-double-stack) (complex-double-reg)) + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldd (ea-for-cdf-real-stack x)))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldd (ea-for-cdf-imag-stack x))))) + +(define-move-fun (store-complex-double 2) (vop x y) + ((complex-double-reg) (complex-double-stack)) + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (inst fstd (ea-for-cdf-real-stack y))) + (t + (inst fxch real-tn) + (inst fstd (ea-for-cdf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + (inst fstd (ea-for-cdf-imag-stack y)) + (inst fxch imag-tn))) + +#!+long-float +(define-move-fun (load-complex-long 2) (vop x y) + ((complex-long-stack) (complex-long-reg)) + (let ((real-tn (complex-long-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + (inst fldl (ea-for-clf-real-stack x)))) + (let ((imag-tn (complex-long-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + (inst fldl (ea-for-clf-imag-stack x))))) + +#!+long-float +(define-move-fun (store-complex-long 2) (vop x y) + ((complex-long-reg) (complex-long-stack)) + (let ((real-tn (complex-long-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + (store-long-float (ea-for-clf-real-stack y))) + (t + (inst fxch real-tn) + (store-long-float (ea-for-clf-real-stack y)) + (inst fxch real-tn)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (inst fxch imag-tn) + (store-long-float (ea-for-clf-imag-stack y)) + (inst fxch imag-tn))) + + +;;;; move VOPs + +;;; float register to register moves +(define-vop (float-move) + (:args (x)) + (:results (y)) + (:note "float move") + (:generator 0 + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x)))))) + +(define-vop (single-move float-move) + (:args (x :scs (single-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (single-reg) :load-if (not (location= x y))))) +(define-move-vop single-move :move (single-reg) (single-reg)) + +(define-vop (double-move float-move) + (:args (x :scs (double-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (double-reg) :load-if (not (location= x y))))) +(define-move-vop double-move :move (double-reg) (double-reg)) + +#!+long-float +(define-vop (long-move float-move) + (:args (x :scs (long-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop long-move :move (long-reg) (long-reg)) + +;;; complex float register to register moves +(define-vop (complex-float-move) + (:args (x :target y :load-if (not (location= x y)))) + (:results (y :load-if (not (location= x y)))) + (:note "complex float move") + (:generator 0 + (unless (location= x y) + ;; Note the complex-float-regs are aligned to every second + ;; float register so there is not need to worry about overlap. + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag))))) + +(define-vop (complex-single-move complex-float-move) + (:args (x :scs (complex-single-reg) :target y + :load-if (not (location= x y)))) + (:results (y :scs (complex-single-reg) :load-if (not (location= x y))))) +(define-move-vop complex-single-move :move + (complex-single-reg) (complex-single-reg)) + +(define-vop (complex-double-move complex-float-move) + (:args (x :scs (complex-double-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-double-reg) :load-if (not (location= x y))))) +(define-move-vop complex-double-move :move + (complex-double-reg) (complex-double-reg)) + +#!+long-float +(define-vop (complex-long-move complex-float-move) + (:args (x :scs (complex-long-reg) + :target y :load-if (not (location= x y)))) + (:results (y :scs (complex-long-reg) :load-if (not (location= x y))))) +#!+long-float +(define-move-vop complex-long-move :move + (complex-long-reg) (complex-long-reg)) + +;;; Move from float to a descriptor reg. allocating a new float +;;; object in the process. +(define-vop (move-from-single) + (:args (x :scs (single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + single-float-widetag + single-float-size node) + (with-tn@fp-top(x) + (inst fst (ea-for-sf-desc y)))))) +(define-move-vop move-from-single :move + (single-reg) (descriptor-reg)) + +(define-vop (move-from-double) + (:args (x :scs (double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + double-float-widetag + double-float-size + node) + (with-tn@fp-top(x) + (inst fstd (ea-for-df-desc y)))))) +(define-move-vop move-from-double :move + (double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-long) + (:args (x :scs (long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + long-float-widetag + long-float-size + node) + (with-tn@fp-top(x) + (store-long-float (ea-for-lf-desc y)))))) +#!+long-float +(define-move-vop move-from-long :move + (long-reg) (descriptor-reg)) + +(define-vop (move-from-fp-constant) + (:args (x :scs (fp-constant))) + (:results (y :scs (descriptor-reg))) + (:generator 2 + (ecase (sb!c::constant-value (sb!c::tn-leaf x)) + (0f0 (load-symbol-value y *fp-constant-0f0*)) + (1f0 (load-symbol-value y *fp-constant-1f0*)) + (0d0 (load-symbol-value y *fp-constant-0d0*)) + (1d0 (load-symbol-value y *fp-constant-1d0*)) + #!+long-float + (0l0 (load-symbol-value y *fp-constant-0l0*)) + #!+long-float + (1l0 (load-symbol-value y *fp-constant-1l0*)) + #!+long-float + (#.pi (load-symbol-value y *fp-constant-pi*)) + #!+long-float + (#.(log 10l0 2l0) (load-symbol-value y *fp-constant-l2t*)) + #!+long-float + (#.(log 2.718281828459045235360287471352662L0 2l0) + (load-symbol-value y *fp-constant-l2e*)) + #!+long-float + (#.(log 2l0 10l0) (load-symbol-value y *fp-constant-lg2*)) + #!+long-float + (#.(log 2l0 2.718281828459045235360287471352662L0) + (load-symbol-value y *fp-constant-ln2*))))) +(define-move-vop move-from-fp-constant :move + (fp-constant) (descriptor-reg)) + +;;; Move from a descriptor to a float register. +(define-vop (move-to-single) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (single-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fld (ea-for-sf-desc x))))) +(define-move-vop move-to-single :move (descriptor-reg) (single-reg)) + +(define-vop (move-to-double) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (double-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldd (ea-for-df-desc x))))) +(define-move-vop move-to-double :move (descriptor-reg) (double-reg)) + +#!+long-float +(define-vop (move-to-long) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (long-reg))) + (:note "pointer to float coercion") + (:generator 2 + (with-empty-tn@fp-top(y) + (inst fldl (ea-for-lf-desc x))))) +#!+long-float +(define-move-vop move-to-long :move (descriptor-reg) (long-reg)) + +;;; Move from complex float to a descriptor reg. allocating a new +;;; complex float object in the process. +(define-vop (move-from-complex-single) + (:args (x :scs (complex-single-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-single-float-widetag + complex-single-float-size + node) + (let ((real-tn (complex-single-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fst (ea-for-csf-real-desc y)))) + (let ((imag-tn (complex-single-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fst (ea-for-csf-imag-desc y))))))) +(define-move-vop move-from-complex-single :move + (complex-single-reg) (descriptor-reg)) + +(define-vop (move-from-complex-double) + (:args (x :scs (complex-double-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-double-float-widetag + complex-double-float-size + node) + (let ((real-tn (complex-double-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (inst fstd (ea-for-cdf-real-desc y)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (inst fstd (ea-for-cdf-imag-desc y))))))) +(define-move-vop move-from-complex-double :move + (complex-double-reg) (descriptor-reg)) + +#!+long-float +(define-vop (move-from-complex-long) + (:args (x :scs (complex-long-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "complex float to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + complex-long-float-widetag + complex-long-float-size + node) + (let ((real-tn (complex-long-reg-real-tn x))) + (with-tn@fp-top(real-tn) + (store-long-float (ea-for-clf-real-desc y)))) + (let ((imag-tn (complex-long-reg-imag-tn x))) + (with-tn@fp-top(imag-tn) + (store-long-float (ea-for-clf-imag-desc y))))))) +#!+long-float +(define-move-vop move-from-complex-long :move + (complex-long-reg) (descriptor-reg)) + +;;; Move from a descriptor to a complex float register. +(macrolet ((frob (name sc format) + `(progn + (define-vop (,name) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (,sc))) + (:note "pointer to complex float coercion") + (:generator 2 + (let ((real-tn (complex-double-reg-real-tn y))) + (with-empty-tn@fp-top(real-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-real-desc x)))) + (:double '((inst fldd (ea-for-cdf-real-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-real-desc x))))))) + (let ((imag-tn (complex-double-reg-imag-tn y))) + (with-empty-tn@fp-top(imag-tn) + ,@(ecase format + (:single '((inst fld (ea-for-csf-imag-desc x)))) + (:double '((inst fldd (ea-for-cdf-imag-desc x)))) + #!+long-float + (:long '((inst fldl (ea-for-clf-imag-desc x))))))))) + (define-move-vop ,name :move (descriptor-reg) (,sc))))) + (frob move-to-complex-single complex-single-reg :single) + (frob move-to-complex-double complex-double-reg :double) + #!+long-float + (frob move-to-complex-double complex-long-reg :long)) + +;;;; the move argument vops +;;;; +;;;; Note these are also used to stuff fp numbers onto the c-call +;;;; stack so the order is different than the lisp-stack. + +;;; the general MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "float argument move") + (:generator ,(case format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (cond ((zerop (tn-offset y)) + (copy-fp-reg-to-fr0 x)) + ((zerop (tn-offset x)) + (inst fstd y)) + (t + (inst fxch x) + (inst fstd y) + (inst fxch x))))) + (,stack-sc + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :dword :base fp :disp offset))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea)))))) + (let ((ea (make-ea + :dword :base fp + :disp (- (* (+ (tn-offset y) + ,(case format + (:single 1) + (:double 2) + (:long 3))) + n-word-bytes))))) + (with-tn@fp-top(x) + ,@(ecase format + (:single '((inst fst ea))) + (:double '((inst fstd ea))) + #!+long-float + (:long '((store-long-float ea))))))))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-single-float-arg single-reg single-stack :single) + (frob move-double-float-arg double-reg double-stack :double) + #!+long-float + (frob move-long-float-arg long-reg long-stack :long)) + +;;;; complex float MOVE-ARG VOP +(macrolet ((frob (name sc stack-sc format) + `(progn + (define-vop (,name) + (:args (x :scs (,sc) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y ,sc)))) + (:results (y)) + (:note "complex float argument move") + (:generator ,(ecase format (:single 2) (:double 3) (:long 4)) + (sc-case y + (,sc + (unless (location= x y) + (let ((x-real (complex-double-reg-real-tn x)) + (y-real (complex-double-reg-real-tn y))) + (cond ((zerop (tn-offset y-real)) + (copy-fp-reg-to-fr0 x-real)) + ((zerop (tn-offset x-real)) + (inst fstd y-real)) + (t + (inst fxch x-real) + (inst fstd y-real) + (inst fxch x-real)))) + (let ((x-imag (complex-double-reg-imag-tn x)) + (y-imag (complex-double-reg-imag-tn y))) + (inst fxch x-imag) + (inst fstd y-imag) + (inst fxch x-imag)))) + (,stack-sc + (let ((real-tn (complex-double-reg-real-tn x))) + (cond ((zerop (tn-offset real-tn)) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp)))))) + (t + (inst fxch real-tn) + ,@(ecase format + (:single + '((inst fst + (ea-for-csf-real-stack y fp)))) + (:double + '((inst fstd + (ea-for-cdf-real-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-real-stack y fp))))) + (inst fxch real-tn)))) + (let ((imag-tn (complex-double-reg-imag-tn x))) + (inst fxch imag-tn) + ,@(ecase format + (:single + '((inst fst (ea-for-csf-imag-stack y fp)))) + (:double + '((inst fstd (ea-for-cdf-imag-stack y fp)))) + #!+long-float + (:long + '((store-long-float + (ea-for-clf-imag-stack y fp))))) + (inst fxch imag-tn)))))) + (define-move-vop ,name :move-arg + (,sc descriptor-reg) (,sc))))) + (frob move-complex-single-float-arg + complex-single-reg complex-single-stack :single) + (frob move-complex-double-float-arg + complex-double-reg complex-double-stack :double) + #!+long-float + (frob move-complex-long-float-arg + complex-long-reg complex-long-stack :long)) + +(define-move-vop move-arg :move-arg + (single-reg double-reg #!+long-float long-reg + complex-single-reg complex-double-reg #!+long-float complex-long-reg) + (descriptor-reg)) + + +;;;; arithmetic VOPs + +;;; dtc: the floating point arithmetic vops +;;; +;;; Note: Although these can accept x and y on the stack or pointed to +;;; from a descriptor register, they will work with register loading +;;; without these. Same deal with the result - it need only be a +;;; register. When load-tns are needed they will probably be in ST0 +;;; and the code below should be able to correctly handle all cases. +;;; +;;; However it seems to produce better code if all arg. and result +;;; options are used; on the P86 there is no extra cost in using a +;;; memory operand to the FP instructions - not so on the PPro. +;;; +;;; It may also be useful to handle constant args? +;;; +;;; 22-Jul-97: descriptor args lose in some simple cases when +;;; a function result computed in a loop. Then Python insists +;;; on consing the intermediate values! For example +;;; +;;; (defun test(a n) +;;; (declare (type (simple-array double-float (*)) a) +;;; (fixnum n)) +;;; (let ((sum 0d0)) +;;; (declare (type double-float sum)) +;;; (dotimes (i n) +;;; (incf sum (* (aref a i)(aref a i)))) +;;; sum)) +;;; +;;; So, disabling descriptor args until this can be fixed elsewhere. +(macrolet + ((frob (op fop-sti fopr-sti + fop fopr sname scost + fopd foprd dname dcost + lname lcost) + #!-long-float (declare (ignore lcost lname)) + `(progn + (define-vop (,sname) + (:translate ,op) + (:args (x :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval) + (y :scs (single-reg single-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc single-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (single-reg single-stack))) + (:arg-types single-float single-float) + (:result-types single-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,scost + ;; Handle a few special cases + (cond + ;; x, y, and r are the same register. + ((and (sc-is x single-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch r) + (inst ,fop fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x single-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (single-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fop y)) + (single-stack + ;; ST(0) = ST(0) op Mem + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + (t + ;; y to ST0 + (sc-case y + (single-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y single-stack) + (inst fld (ea-for-sf-stack y)) + (inst fld (ea-for-sf-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y single-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (single-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,fopr x)) + (single-stack + ;; ST(0) = Mem op ST(0) + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x single-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y))))) + ;; y is in ST0 + ((and (sc-is y single-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (single-reg + (inst ,fopr x)) + (single-stack + (inst ,fopr (ea-for-sf-stack x))) + (descriptor-reg + (inst ,fopr (ea-for-sf-desc x))))) + (t + ;; x to ST0 + (sc-case x + (single-reg + (copy-fp-reg-to-fr0 x)) + (single-stack + (inst fstp fr0) + (inst fld (ea-for-sf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fld (ea-for-sf-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (single-reg + (inst ,fop y)) + (single-stack + (inst ,fop (ea-for-sf-stack y))) + (descriptor-reg + (inst ,fop (ea-for-sf-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (single-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (single-stack + (inst fst (ea-for-sf-stack r)))))))) + + (define-vop (,dname) + (:translate ,op) + (:args (x :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval) + (y :scs (double-reg double-stack #+nil descriptor-reg) + :to :eval)) + (:temporary (:sc double-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (double-reg double-stack))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,dcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (sc-is x double-reg) (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((and (sc-is x double-reg) (location= x r)) + (cond ((zerop (tn-offset r)) + (sc-case y + (double-reg + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (double-stack + ;; ST(0) = ST(0) op Mem + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + (t + ;; y to ST0 + (sc-case y + (double-reg + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is y double-stack) + (inst fldd (ea-for-df-stack y)) + (inst fldd (ea-for-df-desc y))))) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((and (sc-is y double-reg) (location= y r)) + (cond ((zerop (tn-offset r)) + (sc-case x + (double-reg + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (double-stack + ;; ST(0) = Mem op ST(0) + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y))))) + ;; y is in ST0 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + ;; ST0 = x op ST0 + (sc-case x + (double-reg + (inst ,foprd x)) + (double-stack + (inst ,foprd (ea-for-df-stack x))) + (descriptor-reg + (inst ,foprd (ea-for-df-desc x))))) + (t + ;; x to ST0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + ;; ST0 = ST0 op y + (sc-case y + (double-reg + (inst ,fopd y)) + (double-stack + (inst ,fopd (ea-for-df-stack y))) + (descriptor-reg + (inst ,fopd (ea-for-df-desc y)))))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (sc-case r + (double-reg + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))) + (double-stack + (inst fstd (ea-for-df-stack r)))))))) + + #!+long-float + (define-vop (,lname) + (:translate ,op) + (:args (x :scs (long-reg) :to :eval) + (y :scs (long-reg) :to :eval)) + (:temporary (:sc long-reg :offset fr0-offset + :from :eval :to :result) fr0) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator ,lcost + ;; Handle a few special cases. + (cond + ;; x, y, and r are the same register. + ((and (location= x r) (location= y r)) + (cond ((zerop (tn-offset r)) + (inst ,fop fr0)) + (t + (inst fxch x) + (inst ,fopd fr0) + ;; XX the source register will not be valid. + (note-next-instruction vop :internal-error) + (inst fxch r)))) + + ;; x and r are the same register. + ((location= x r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(0) op ST(y) + (inst ,fopd y)) + (t + ;; y to ST0 + (unless (zerop (tn-offset y)) + (copy-fp-reg-to-fr0 y)) + ;; ST(i) = ST(i) op ST0 + (inst ,fop-sti r))) + (maybe-fp-wait node vop)) + ;; y and r are the same register. + ((location= y r) + (cond ((zerop (tn-offset r)) + ;; ST(0) = ST(x) op ST(0) + (inst ,foprd x)) + (t + ;; x to ST0 + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ;; ST(i) = ST(0) op ST(i) + (inst ,fopr-sti r))) + (maybe-fp-wait node vop)) + ;; the default case + (t + ;; Get the result to ST0. + + ;; Special handling is needed if x or y are in ST0, and + ;; simpler code is generated. + (cond + ;; x is in ST0. + ((zerop (tn-offset x)) + ;; ST0 = ST0 op y + (inst ,fopd y)) + ;; y is in ST0 + ((zerop (tn-offset y)) + ;; ST0 = x op ST0 + (inst ,foprd x)) + (t + ;; x to ST0 + (copy-fp-reg-to-fr0 x) + ;; ST0 = ST0 op y + (inst ,fopd y))) + + (note-next-instruction vop :internal-error) + + ;; Finally save the result. + (cond ((zerop (tn-offset r)) + (maybe-fp-wait node)) + (t + (inst fst r)))))))))) + + (frob + fadd-sti fadd-sti + fadd fadd +/single-float 2 + faddd faddd +/double-float 2 + +/long-float 2) + (frob - fsub-sti fsubr-sti + fsub fsubr -/single-float 2 + fsubd fsubrd -/double-float 2 + -/long-float 2) + (frob * fmul-sti fmul-sti + fmul fmul */single-float 3 + fmuld fmuld */double-float 3 + */long-float 3) + (frob / fdiv-sti fdivr-sti + fdiv fdivr //single-float 12 + fdivd fdivrd //double-float 12 + //long-float 12)) + +(macrolet ((frob (name inst translate sc type) + `(define-vop (,name) + (:args (x :scs (,sc) :target fr0)) + (:results (y :scs (,sc))) + (:translate ,translate) + (:policy :fast-safe) + (:arg-types ,type) + (:result-types ,type) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:note "inline float arithmetic") + (:vop-var vop) + (:save-p :compute-only) + (:generator 1 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; Maybe save it. + (inst ,inst) ; Clobber st0. + (unless (zerop (tn-offset y)) + (inst fst y)))))) + + (frob abs/single-float fabs abs single-reg single-float) + (frob abs/double-float fabs abs double-reg double-float) + #!+long-float + (frob abs/long-float fabs abs long-reg long-float) + (frob %negate/single-float fchs %negate single-reg single-float) + (frob %negate/double-float fchs %negate double-reg double-float) + #!+long-float + (frob %negate/long-float fchs %negate long-reg long-float)) + +;;;; comparison + +(define-vop (=/float) + (:args (x) (y)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0; y is in any reg. + ((zerop (tn-offset x)) + (inst fucom y)) + ;; y is in ST0; x is in another reg. + ((zerop (tn-offset y)) + (inst fucom x)) + ;; x and y are the same register, not ST0 + ((location= x y) + (inst fxch x) + (inst fucom fr0-tn) + (inst fxch x)) + ;; x and y are different registers, neither ST0. + (t + (inst fxch x) + (inst fucom y) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (inst cmp ah-tn #x40) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=/single-float =/float) + (:translate =) + (:args (x :scs (single-reg)) + (y :scs (single-reg))) + (:arg-types single-float single-float)) + +(define-vop (=/double-float =/float) + (:translate =) + (:args (x :scs (double-reg)) + (y :scs (double-reg))) + (:arg-types double-float double-float)) + +#!+long-float +(define-vop (=/long-float =/float) + (:translate =) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float)) + +(define-vop (single-float) + (:translate >) + (:args (x :scs (single-reg single-stack descriptor-reg)) + (y :scs (single-reg single-stack descriptor-reg))) + (:arg-types single-float single-float) + (:temporary (:sc single-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y single-reg) (zerop (tn-offset y))) + (sc-case x + (single-reg + (inst fcom x)) + ((single-stack descriptor-reg) + (if (sc-is x single-stack) + (inst fcom (ea-for-sf-stack x)) + (inst fcom (ea-for-sf-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (single-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((single-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x single-stack) + (inst fld (ea-for-sf-stack x)) + (inst fld (ea-for-sf-desc x))))) + (sc-case y + (single-reg + (inst fcom y)) + ((single-stack descriptor-reg) + (if (sc-is y single-stack) + (inst fcom (ea-for-sf-stack y)) + (inst fcom (ea-for-sf-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (>double-float) + (:translate >) + (:args (x :scs (double-reg double-stack descriptor-reg)) + (y :scs (double-reg double-stack descriptor-reg))) + (:arg-types double-float double-float) + (:temporary (:sc double-reg :offset fr0-offset :from :eval) fr0) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + ;; Handle a few special cases. + (cond + ;; y is ST0. + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (sc-case x + (double-reg + (inst fcomd x)) + ((double-stack descriptor-reg) + (if (sc-is x double-stack) + (inst fcomd (ea-for-df-stack x)) + (inst fcomd (ea-for-df-desc x))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + + ;; general case when y is not in ST0 + (t + ;; x to ST0 + (sc-case x + (double-reg + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (sc-case y + (double-reg + (inst fcomd y)) + ((double-stack descriptor-reg) + (if (sc-is y double-stack) + (inst fcomd (ea-for-df-stack y)) + (inst fcomd (ea-for-df-desc y))))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +#!+long-float +(define-vop (>long-float) + (:translate >) + (:args (x :scs (long-reg)) + (y :scs (long-reg))) + (:arg-types long-float long-float) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p) + (:policy :fast-safe) + (:note "inline float comparison") + (:ignore temp) + (:generator 3 + (cond + ;; y is in ST0; x is in any reg. + ((zerop (tn-offset y)) + (inst fcomd x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst cmp ah-tn #x01)) + ;; x is in ST0; y is in another reg. + ((zerop (tn-offset x)) + (inst fcomd y) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45)) + ;; y and x are the same register, not ST0 + ;; y and x are different registers, neither ST0. + (t + (inst fxch x) + (inst fcomd y) + (inst fxch x) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45))) + (inst jmp (if not-p :ne :e) target))) + +;;; Comparisons with 0 can use the FTST instruction. + +(define-vop (float-test) + (:args (x)) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:conditional) + (:info target not-p y) + (:variant-vars code) + (:policy :fast-safe) + (:vop-var vop) + (:save-p :compute-only) + (:note "inline float comparison") + (:ignore temp y) + (:generator 2 + (note-this-location vop :internal-error) + (cond + ;; x is in ST0 + ((zerop (tn-offset x)) + (inst ftst)) + ;; x not ST0 + (t + (inst fxch x) + (inst ftst) + (inst fxch x))) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) ; C3 C2 C0 + (unless (zerop code) + (inst cmp ah-tn code)) + (inst jmp (if not-p :ne :e) target))) + +(define-vop (=0/single-float float-test) + (:translate =) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x40)) +(define-vop (=0/double-float float-test) + (:translate =) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x40)) +#!+long-float +(define-vop (=0/long-float float-test) + (:translate =) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x40)) + +(define-vop (<0/single-float float-test) + (:translate <) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x01)) +(define-vop (<0/double-float float-test) + (:translate <) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x01)) +#!+long-float +(define-vop (<0/long-float float-test) + (:translate <) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x01)) + +(define-vop (>0/single-float float-test) + (:translate >) + (:args (x :scs (single-reg))) + (:arg-types single-float (:constant (single-float 0f0 0f0))) + (:variant #x00)) +(define-vop (>0/double-float float-test) + (:translate >) + (:args (x :scs (double-reg))) + (:arg-types double-float (:constant (double-float 0d0 0d0))) + (:variant #x00)) +#!+long-float +(define-vop (>0/long-float float-test) + (:translate >) + (:args (x :scs (long-reg))) + (:arg-types long-float (:constant (long-float 0l0 0l0))) + (:variant #x00)) + +#!+long-float +(deftransform eql ((x y) (long-float long-float)) + `(and (= (long-float-low-bits x) (long-float-low-bits y)) + (= (long-float-high-bits x) (long-float-high-bits y)) + (= (long-float-exp-bits x) (long-float-exp-bits y)))) + +;;;; conversion + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc signed-stack) temp) + (:results (y :scs (,to-sc))) + (:arg-types signed-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (sc-case x + (signed-reg + (inst mov temp x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild temp))) + (signed-stack + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fild x)))))))) + (frob %single-float/signed %single-float single-reg single-float) + (frob %double-float/signed %double-float double-reg double-float) + #!+long-float + (frob %long-float/signed %long-float long-reg long-float)) + +(macrolet ((frob (name translate to-sc to-type) + `(define-vop (,name) + (:args (x :scs (unsigned-reg))) + (:results (y :scs (,to-sc))) + (:arg-types unsigned-num) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 6 + (inst push 0) + (inst push x) + (with-empty-tn@fp-top(y) + (note-this-location vop :internal-error) + (inst fildl (make-ea :dword :base esp-tn))) + (inst add esp-tn 8))))) + (frob %single-float/unsigned %single-float single-reg single-float) + (frob %double-float/unsigned %double-float double-reg double-float) + #!+long-float + (frob %long-float/unsigned %long-float long-reg long-float)) + +;;; These should be no-ops but the compiler might want to move some +;;; things around. +(macrolet ((frob (name translate from-sc from-type to-sc to-type) + `(define-vop (,name) + (:args (x :scs (,from-sc) :target y)) + (:results (y :scs (,to-sc))) + (:arg-types ,from-type) + (:result-types ,to-type) + (:policy :fast-safe) + (:note "inline float coercion") + (:translate ,translate) + (:vop-var vop) + (:save-p :compute-only) + (:generator 2 + (note-this-location vop :internal-error) + (unless (location= x y) + (cond + ((zerop (tn-offset x)) + ;; x is in ST0, y is in another reg. not ST0 + (inst fst y)) + ((zerop (tn-offset y)) + ;; y is in ST0, x is in another reg. not ST0 + (copy-fp-reg-to-fr0 x)) + (t + ;; Neither x or y are in ST0, and they are not in + ;; the same reg. + (inst fxch x) + (inst fst y) + (inst fxch x)))))))) + + (frob %single-float/double-float %single-float double-reg + double-float single-reg single-float) + #!+long-float + (frob %single-float/long-float %single-float long-reg + long-float single-reg single-float) + (frob %double-float/single-float %double-float single-reg single-float + double-reg double-float) + #!+long-float + (frob %double-float/long-float %double-float long-reg long-float + double-reg double-float) + #!+long-float + (frob %long-float/single-float %long-float single-reg single-float + long-reg long-float) + #!+long-float + (frob %long-float/double-float %long-float double-reg double-float + long-reg long-float)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type)) + (:args (x :scs (,from-sc))) + (:temporary (:sc signed-stack) stack-temp) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (signed-reg))) + (:arg-types ,from-type) + (:result-types signed-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + (,(if round-p 'progn 'pseudo-atomic) + ;; Normal mode (for now) is "round to best". + (with-tn@fp-top (x) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (sc-case y + (signed-stack + (inst fist y)) + (signed-reg + (inst fist stack-temp) + (inst mov y stack-temp))) + ,@(unless round-p + '((inst fldcw scw))))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(macrolet ((frob (trans from-sc from-type round-p) + `(define-vop (,(symbolicate trans "/" from-type "=>UNSIGNED")) + (:args (x :scs (,from-sc) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + ,@(unless round-p + '((:temporary (:sc unsigned-stack) stack-temp) + (:temporary (:sc unsigned-stack) scw) + (:temporary (:sc any-reg) rcw))) + (:results (y :scs (unsigned-reg))) + (:arg-types ,from-type) + (:result-types unsigned-num) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline float truncate") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + ,@(unless round-p + '((note-this-location vop :internal-error) + ;; Catch any pending FPE exceptions. + (inst wait))) + ;; Normal mode (for now) is "round to best". + (unless (zerop (tn-offset x)) + (copy-fp-reg-to-fr0 x)) + ,@(unless round-p + '((inst fnstcw scw) ; save current control word + (move rcw scw) ; into 16-bit register + (inst or rcw (ash #b11 10)) ; CHOP + (move stack-temp rcw) + (inst fldcw stack-temp))) + (inst sub esp-tn 8) + (inst fistpl (make-ea :dword :base esp-tn)) + (inst pop y) + (inst fld fr0) ; copy fr0 to at least restore stack. + (inst add esp-tn 4) + ,@(unless round-p + '((inst fldcw scw))))))) + (frob %unary-truncate single-reg single-float nil) + (frob %unary-truncate double-reg double-float nil) + #!+long-float + (frob %unary-truncate long-reg long-float nil) + (frob %unary-round single-reg single-float t) + (frob %unary-round double-reg double-float t) + #!+long-float + (frob %unary-round long-reg long-float t)) + +(define-vop (make-single-float) + (:args (bits :scs (signed-reg) :target res + :load-if (not (or (and (sc-is bits signed-stack) + (sc-is res single-reg)) + (and (sc-is bits signed-stack) + (sc-is res single-stack) + (location= bits res)))))) + (:results (res :scs (single-reg single-stack))) + (:temporary (:sc signed-stack) stack-temp) + (:arg-types signed-num) + (:result-types single-float) + (:translate make-single-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case res + (single-stack + (sc-case bits + (signed-reg + (inst mov res bits)) + (signed-stack + (aver (location= bits res))))) + (single-reg + (sc-case bits + (signed-reg + ;; source must be in memory + (inst mov stack-temp bits) + (with-empty-tn@fp-top(res) + (inst fld stack-temp))) + (signed-stack + (with-empty-tn@fp-top(res) + (inst fld bits)))))))) + +(define-vop (make-double-float) + (:args (hi-bits :scs (signed-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (double-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types signed-num unsigned-num) + (:result-types double-float) + (:translate make-double-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 2 + (let ((offset (1+ (tn-offset temp)))) + (storew hi-bits ebp-tn (- offset)) + (storew lo-bits ebp-tn (- (1+ offset))) + (with-empty-tn@fp-top(res) + (inst fldd (make-ea :dword :base ebp-tn + :disp (- (* (1+ offset) n-word-bytes)))))))) + +#!+long-float +(define-vop (make-long-float) + (:args (exp-bits :scs (signed-reg)) + (hi-bits :scs (unsigned-reg)) + (lo-bits :scs (unsigned-reg))) + (:results (res :scs (long-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types signed-num unsigned-num unsigned-num) + (:result-types long-float) + (:translate make-long-float) + (:policy :fast-safe) + (:vop-var vop) + (:generator 3 + (let ((offset (1+ (tn-offset temp)))) + (storew exp-bits ebp-tn (- offset)) + (storew hi-bits ebp-tn (- (1+ offset))) + (storew lo-bits ebp-tn (- (+ offset 2))) + (with-empty-tn@fp-top(res) + (inst fldl (make-ea :dword :base ebp-tn + :disp (- (* (+ offset 2) n-word-bytes)))))))) + +(define-vop (single-float-bits) + (:args (float :scs (single-reg descriptor-reg) + :load-if (not (sc-is float single-stack)))) + (:results (bits :scs (signed-reg))) + (:temporary (:sc signed-stack :from :argument :to :result) stack-temp) + (:arg-types single-float) + (:result-types signed-num) + (:translate single-float-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 4 + (sc-case bits + (signed-reg + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst stack-temp) + (inst mov bits stack-temp))) + (single-stack + (inst mov bits float)) + (descriptor-reg + (loadw + bits float single-float-value-slot + other-pointer-lowtag)))) + (signed-stack + (sc-case float + (single-reg + (with-tn@fp-top(float) + (inst fst bits)))))))) + +(define-vop (double-float-high-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (hi-bits :scs (signed-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types signed-num) + (:translate double-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw hi-bits ebp-tn (- (1+ (tn-offset temp))))) + (double-stack + (loadw hi-bits ebp-tn (- (1+ (tn-offset float))))) + (descriptor-reg + (loadw hi-bits float (1+ double-float-value-slot) + other-pointer-lowtag))))) + +(define-vop (double-float-low-bits) + (:args (float :scs (double-reg descriptor-reg) + :load-if (not (sc-is float double-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc double-stack) temp) + (:arg-types double-float) + (:result-types unsigned-num) + (:translate double-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (double-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 2 (tn-offset temp)) + n-word-bytes))))) + (inst fstd where))) + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset temp))))) + (double-stack + (loadw lo-bits ebp-tn (- (+ 2 (tn-offset float))))) + (descriptor-reg + (loadw lo-bits float double-float-value-slot + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-exp-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (exp-bits :scs (signed-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types signed-num) + (:translate long-float-exp-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset temp))) n-word-bytes)))) + (long-stack + (inst movsx exp-bits + (make-ea :word :base ebp-tn + :disp (* (- (1+ (tn-offset float))) n-word-bytes)))) + (descriptor-reg + (inst movsx exp-bits + (make-ea :word :base float + :disp (- (* (+ 2 long-float-value-slot) + n-word-bytes) + other-pointer-lowtag))))))) + +#!+long-float +(define-vop (long-float-high-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (hi-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-high-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw hi-bits ebp-tn (- (+ (tn-offset temp) 2)))) + (long-stack + (loadw hi-bits ebp-tn (- (+ (tn-offset float) 2)))) + (descriptor-reg + (loadw hi-bits float (1+ long-float-value-slot) + other-pointer-lowtag))))) + +#!+long-float +(define-vop (long-float-low-bits) + (:args (float :scs (long-reg descriptor-reg) + :load-if (not (sc-is float long-stack)))) + (:results (lo-bits :scs (unsigned-reg))) + (:temporary (:sc long-stack) temp) + (:arg-types long-float) + (:result-types unsigned-num) + (:translate long-float-low-bits) + (:policy :fast-safe) + (:vop-var vop) + (:generator 5 + (sc-case float + (long-reg + (with-tn@fp-top(float) + (let ((where (make-ea :dword :base ebp-tn + :disp (- (* (+ 3 (tn-offset temp)) + n-word-bytes))))) + (store-long-float where))) + (loadw lo-bits ebp-tn (- (+ (tn-offset temp) 3)))) + (long-stack + (loadw lo-bits ebp-tn (- (+ (tn-offset float) 3)))) + (descriptor-reg + (loadw lo-bits float long-float-value-slot + other-pointer-lowtag))))) + +;;;; float mode hackery + +(sb!xc:deftype float-modes () '(unsigned-byte 32)) ; really only 16 +(defknown floating-point-modes () float-modes (flushable)) +(defknown ((setf floating-point-modes)) (float-modes) + float-modes) + +(def!constant npx-env-size (* 7 n-word-bytes)) +(def!constant npx-cw-offset 0) +(def!constant npx-sw-offset 4) + +(define-vop (floating-point-modes) + (:results (res :scs (unsigned-reg))) + (:result-types unsigned-num) + (:translate floating-point-modes) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset :target res + :to :result) eax) + (:generator 8 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions + (inst fstenv (make-ea :dword :base esp-tn)) ; masks all exceptions + (inst fldenv (make-ea :dword :base esp-tn)) ; Restore previous state. + ;; Move current status to high word. + (inst mov eax (make-ea :dword :base esp-tn :disp (- npx-sw-offset 2))) + ;; Move exception mask to low word. + (inst mov ax-tn (make-ea :word :base esp-tn :disp npx-cw-offset)) + (inst add esp-tn npx-env-size) ; Pop stack. + (inst xor eax #x3f) ; Flip exception mask to trap enable bits. + (move res eax))) + +(define-vop (set-floating-point-modes) + (:args (new :scs (unsigned-reg) :to :result :target res)) + (:results (res :scs (unsigned-reg))) + (:arg-types unsigned-num) + (:result-types unsigned-num) + (:translate (setf floating-point-modes)) + (:policy :fast-safe) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :eval :to :result) eax) + (:generator 3 + (inst sub esp-tn npx-env-size) ; Make space on stack. + (inst wait) ; Catch any pending FPE exceptions. + (inst fstenv (make-ea :dword :base esp-tn)) + (inst mov eax new) + (inst xor eax #x3f) ; Turn trap enable bits into exception mask. + (inst mov (make-ea :word :base esp-tn :disp npx-cw-offset) ax-tn) + (inst shr eax 16) ; position status word + (inst mov (make-ea :word :base esp-tn :disp npx-sw-offset) ax-tn) + (inst fldenv (make-ea :dword :base esp-tn)) + (inst add esp-tn npx-env-size) ; Pop stack. + (move res new))) + +#!-long-float +(progn + +;;; Let's use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of fsin and fcos that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; KLUDGE: these versions of fsin, fcos, and ftan simply load a 0.0 +;;; result if the argument is out of range 2^63 and would thus be +;;; hopelessly inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; %exp that handles the following special cases: exp(+Inf) is +Inf; +;;; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (double-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc double-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (double-reg))) + (:arg-types double-float signed-num) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (double-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0) + (y :scs (double-reg double-stack descriptor-reg) :target fr1)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))) + ;; Load x to fr0 + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (double-reg) :to :result)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p with limited x range function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 4 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (double-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((double-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x double-stack) + (inst fldd (ea-for-df-stack x)) + (inst fldd (ea-for-df-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (double-reg double-stack descriptor-reg) :target fr1) + (y :scs (double-reg double-stack descriptor-reg) :target fr0)) + (:temporary (:sc double-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc double-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (double-reg))) + (:arg-types double-float double-float) + (:result-types double-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y)) + (sc-is x double-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x double-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ((and (sc-is x double-reg) (zerop (tn-offset x)) + (sc-is y double-reg) (zerop (tn-offset x))) + ;; copy x to fr1 + (inst fst fr1)) + ;; y in fr0; x not in fr1 + ((and (sc-is y double-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y double-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (double-reg + (copy-fp-reg-to-fr0 x)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x double-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (double-reg + (copy-fp-reg-to-fr0 y)) + (double-stack + (inst fstp fr0) + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldd (ea-for-df-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (double-stack + (inst fldd (ea-for-df-stack x))) + (descriptor-reg + (inst fldd (ea-for-df-desc x)))) + ;; Load y to fr0 + (sc-case y + (double-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (double-stack + (inst fldd (ea-for-df-stack y))) + (descriptor-reg + (inst fldd (ea-for-df-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) +) ; PROGN #!-LONG-FLOAT + +#!+long-float +(progn + +;;; Lets use some of the 80387 special functions. +;;; +;;; These defs will not take effect unless code/irrat.lisp is modified +;;; to remove the inlined alien routine def. + +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:ignore fr0) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:translate ,trans) + (:policy :fast-safe) + (:note "inline NPX function") + (:vop-var vop) + (:save-p :compute-only) + (:node-var node) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) ; clobber st0 + (cond ((zerop (tn-offset y)) + (maybe-fp-wait node)) + (t + (inst fst y))))))) + + ;; Quick versions of FSIN and FCOS that require the argument to be + ;; within range 2^63. + (frob fsin-quick %sin-quick fsin) + (frob fcos-quick %cos-quick fcos) + (frob fsqrt %sqrt fsqrt)) + +;;; Quick version of ftan that requires the argument to be within +;;; range 2^63. +(define-vop (ftan-quick) + (:translate %tan-quick) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; These versions of fsin, fcos, and ftan simply load a 0.0 result if +;;; the argument is out of range 2^63 and would thus be hopelessly +;;; inaccurate. +(macrolet ((frob (func trans op) + `(define-vop (,func) + (:translate ,trans) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline sin/cos function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + (inst ,op) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fstp fr0) ; Load 0.0 + (inst fldz) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))))) + (frob fsin %sin fsin) + (frob fcos %cos fcos)) + +(define-vop (ftan) + (:translate %tan) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc unsigned-reg :offset eax-offset + :from :argument :to :result) eax) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:ignore eax) + (:policy :fast-safe) + (:note "inline tan function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore eax) + (:generator 5 + (note-this-location vop :internal-error) + (case (tn-offset x) + (0 + (inst fstp fr1)) + (1 + (inst fstp fr0)) + (t + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))))) + (inst fptan) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x04) ; C2 + (inst jmp :z DONE) + ;; Else x was out of range so reduce it; ST0 is unchanged. + (inst fldz) ; Load 0.0 + (inst fxch fr1) + DONE + ;; Result is in fr1 + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t + (inst fxch fr1) + (inst fstd y))))) + +;;; Modified exp that handles the following special cases: +;;; exp(+Inf) is +Inf; exp(-Inf) is 0; exp(NaN) is NaN. +(define-vop (fexp) + (:translate %exp) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline exp function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives 0 + (inst fldz) + (inst jmp-short DONE) + NOINFNAN + (inst fstp fr1) + (inst fldl2e) + (inst fmul fr1) + ;; Now fr0=x log2(e) + (inst fst fr1) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +;;; Expm1 = exp(x) - 1. +;;; Handles the following special cases: +;;; expm1(+Inf) is +Inf; expm1(-Inf) is -1.0; expm1(NaN) is NaN. +(define-vop (fexpm1) + (:translate %expm1) + (:args (x :scs (long-reg) :target fr0)) + (:temporary (:sc word-reg :offset eax-offset :from :eval :to :result) temp) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :argument :to :result) fr2) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline expm1 function") + (:vop-var vop) + (:save-p :compute-only) + (:ignore temp) + (:generator 5 + (note-this-location vop :internal-error) + (unless (zerop (tn-offset x)) + (inst fxch x) ; x to top of stack + (unless (location= x y) + (inst fst x))) ; maybe save it + ;; Check for Inf or NaN + (inst fxam) + (inst fnstsw) + (inst sahf) + (inst jmp :nc NOINFNAN) ; Neither Inf or NaN. + (inst jmp :np NOINFNAN) ; NaN gives NaN? Continue. + (inst and ah-tn #x02) ; Test sign of Inf. + (inst jmp :z DONE) ; +Inf gives +Inf. + (inst fstp fr0) ; -Inf gives -1.0 + (inst fld1) + (inst fchs) + (inst jmp-short DONE) + NOINFNAN + ;; Free two stack slots leaving the argument on top. + (inst fstp fr2) + (inst fstp fr0) + (inst fldl2e) + (inst fmul fr1) ; Now fr0 = x log2(e) + (inst fst fr1) + (inst frndint) + (inst fsub-sti fr1) + (inst fxch fr1) + (inst f2xm1) + (inst fscale) + (inst fxch fr1) + (inst fld1) + (inst fscale) + (inst fstp fr1) + (inst fld1) + (inst fsub fr1) + (inst fsubr fr2) + DONE + (unless (zerop (tn-offset y)) + (inst fstd y)))) + +(define-vop (flog) + (:translate %log) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flog10) + (:translate %log10) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline log10 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldlg2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldlg2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x)))))) + (inst fyl2x)) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldlg2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))) + (inst fyl2x))) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (fpow) + (:translate %pow) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:temporary (:sc long-reg :offset fr2-offset + :from :load :to :result) fr2) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline pow function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fyl2x) + ;; Now fr0=y log2(x) + (inst fld fr0) + (inst frndint) + (inst fst fr2) + (inst fsubp-sti fr1) + (inst f2xm1) + (inst fld1) + (inst faddp-sti fr1) + (inst fscale) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fscalen) + (:translate %scalbn) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (signed-stack signed-reg) :target temp)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset :from :eval :to :result) fr1) + (:temporary (:sc signed-stack :from (:argument 1) :to :result) temp) + (:results (r :scs (long-reg))) + (:arg-types long-float signed-num) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalbn function") + (:generator 5 + ;; Setup x in fr0 and y in fr1 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + (inst fstp fr1) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (1 + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fxch fr1)) + (t + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (inst fld (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (signed-reg + (inst mov temp y) + (inst fild temp)) + (signed-stack + (inst fild y))) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (fscale) + (:translate %scalb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0) + (y :scs (long-reg long-stack descriptor-reg) :target fr1)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 1) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline scalb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr0 and y in fr1 + (cond + ;; x in fr0; y in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x)) + (sc-is y long-reg) (= 1 (tn-offset y)))) + ;; y in fr1; x not in fr0 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; x in fr0; y not in fr1 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; x in fr1; y not in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y)))) + (inst fxch fr1)) + ;; y in fr0; + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x))))) + ;; Neither x or y are in either fr0 or fr1 + (t + ;; Load y then x + (inst fstp fr0) + (inst fstp fr0) + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset y) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))) + ;; Load x to fr0 + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + + ;; Now have x at fr0; and y at fr1 + (inst fscale) + (unless (zerop (tn-offset r)) + (inst fstd r)))) + +(define-vop (flog1p) + (:translate %log1p) + (:args (x :scs (long-reg) :to :result)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:temporary (:sc word-reg :offset eax-offset :from :eval) temp) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + ;; FIXME 1: This appears to be the second DEFINE-VOP of FLOG1P. + ;; Perhaps this is OK because of the #!+LONG-FLOAT wrapped around + ;; an enormous PROGN above. Still, it would be probably be good to + ;; add some code to warn about redefining VOPs. + (:note "inline log1p function") + (:ignore temp) + (:generator 5 + ;; x is in a FP reg, not fr0, fr1. + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2))) + ;; Check the range + (inst push #x3e947ae1) ; Constant 0.29 + (inst fabs) + (inst fld (make-ea :dword :base esp-tn)) + (inst fcompp) + (inst add esp-tn 4) + (inst fnstsw) ; status word to ax + (inst and ah-tn #x45) + (inst jmp :z WITHIN-RANGE) + ;; Out of range for fyl2xp1. + (inst fld1) + (inst faddd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fldln2) + (inst fxch fr1) + (inst fyl2x) + (inst jmp DONE) + + WITHIN-RANGE + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 1))) + (inst fyl2xp1) + DONE + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +;;; The Pentium has a less restricted implementation of the fyl2xp1 +;;; instruction and a range check can be avoided. +(define-vop (flog1p-pentium) + (:translate %log1p) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:guard (member :pentium-style-fyl2xp1 *backend-subfeatures*)) + (:note "inline log1p function") + (:generator 5 + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1) + (inst fldln2) + (inst fxch fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0) + (inst fldln2) + (inst fxch fr1)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset x))))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (inst fldln2) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fyl2xp1) + (inst fld fr0) + (case (tn-offset y) + ((0 1)) + (t (inst fstd y))))) + +(define-vop (flogb) + (:translate %logb) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from :argument :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from :argument :to :result) fr1) + (:results (y :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline logb function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + (sc-case x + (long-reg + (case (tn-offset x) + (0 + ;; x is in fr0 + (inst fstp fr1)) + (1 + ;; x is in fr1 + (inst fstp fr0)) + (t + ;; x is in a FP reg, not fr0 or fr1 + (inst fstp fr0) + (inst fstp fr0) + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))))) + ((long-stack descriptor-reg) + (inst fstp fr0) + (inst fstp fr0) + (if (sc-is x long-stack) + (inst fldl (ea-for-lf-stack x)) + (inst fldl (ea-for-lf-desc x))))) + (inst fxtract) + (case (tn-offset y) + (0 + (inst fxch fr1)) + (1) + (t (inst fxch fr1) + (inst fstd y))))) + +(define-vop (fatan) + (:translate %atan) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 0) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and 1.0 in fr0 + (cond + ;; x in fr0 + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fstp fr1)) + ;; x in fr1 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + (inst fstp fr0)) + ;; x not in fr0 or fr1 + (t + ;; Load x then 1.0 + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))))) + (inst fld1) + ;; Now have x at fr1; and 1.0 at fr0 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +(define-vop (fatan2) + (:translate %atan2) + (:args (x :scs (long-reg long-stack descriptor-reg) :target fr1) + (y :scs (long-reg long-stack descriptor-reg) :target fr0)) + (:temporary (:sc long-reg :offset fr0-offset + :from (:argument 1) :to :result) fr0) + (:temporary (:sc long-reg :offset fr1-offset + :from (:argument 0) :to :result) fr1) + (:results (r :scs (long-reg))) + (:arg-types long-float long-float) + (:result-types long-float) + (:policy :fast-safe) + (:note "inline atan2 function") + (:vop-var vop) + (:save-p :compute-only) + (:generator 5 + (note-this-location vop :internal-error) + ;; Setup x in fr1 and y in fr0 + (cond + ;; y in fr0; x in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y)) + (sc-is x long-reg) (= 1 (tn-offset x)))) + ;; x in fr1; y not in fr0 + ((and (sc-is x long-reg) (= 1 (tn-offset x))) + ;; Load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; y in fr0; x not in fr1 + ((and (sc-is y long-reg) (zerop (tn-offset y))) + (inst fxch fr1) + ;; Now load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; y in fr1; x not in fr1 + ((and (sc-is y long-reg) (= 1 (tn-offset y))) + ;; Load x to fr0 + (sc-case x + (long-reg + (copy-fp-reg-to-fr0 x)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc x)))) + (inst fxch fr1)) + ;; x in fr0; + ((and (sc-is x long-reg) (zerop (tn-offset x))) + (inst fxch fr1) + ;; Now load y to fr0 + (sc-case y + (long-reg + (copy-fp-reg-to-fr0 y)) + (long-stack + (inst fstp fr0) + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fstp fr0) + (inst fldl (ea-for-lf-desc y))))) + ;; Neither y or x are in either fr0 or fr1 + (t + ;; Load x then y + (inst fstp fr0) + (inst fstp fr0) + (sc-case x + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (- (tn-offset x) 2)))) + (long-stack + (inst fldl (ea-for-lf-stack x))) + (descriptor-reg + (inst fldl (ea-for-lf-desc x)))) + ;; Load y to fr0 + (sc-case y + (long-reg + (inst fldd (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (1- (tn-offset y))))) + (long-stack + (inst fldl (ea-for-lf-stack y))) + (descriptor-reg + (inst fldl (ea-for-lf-desc y)))))) + + ;; Now have y at fr0; and x at fr1 + (inst fpatan) + (inst fld fr0) + (case (tn-offset r) + ((0 1)) + (t (inst fstd r))))) + +) ; PROGN #!+LONG-FLOAT + +;;;; complex float VOPs + +(define-vop (make-complex-single-float) + (:translate complex) + (:args (real :scs (single-reg) :to :result :target r + :load-if (not (location= real r))) + (imag :scs (single-reg) :to :save)) + (:arg-types single-float single-float) + (:results (r :scs (complex-single-reg) :from (:argument 0) + :load-if (not (sc-is r complex-single-stack)))) + (:result-types complex-single-float) + (:note "inline complex single-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-single-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-single-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fst (ea-for-csf-real-stack r))) + (t + (inst fxch real) + (inst fst (ea-for-csf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fst (ea-for-csf-imag-stack r)) + (inst fxch imag))))) + +(define-vop (make-complex-double-float) + (:translate complex) + (:args (real :scs (double-reg) :target r + :load-if (not (location= real r))) + (imag :scs (double-reg) :to :save)) + (:arg-types double-float double-float) + (:results (r :scs (complex-double-reg) :from (:argument 0) + :load-if (not (sc-is r complex-double-stack)))) + (:result-types complex-double-float) + (:note "inline complex double-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-double-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-double-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (inst fstd (ea-for-cdf-real-stack r))) + (t + (inst fxch real) + (inst fstd (ea-for-cdf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (inst fstd (ea-for-cdf-imag-stack r)) + (inst fxch imag))))) + +#!+long-float +(define-vop (make-complex-long-float) + (:translate complex) + (:args (real :scs (long-reg) :target r + :load-if (not (location= real r))) + (imag :scs (long-reg) :to :save)) + (:arg-types long-float long-float) + (:results (r :scs (complex-long-reg) :from (:argument 0) + :load-if (not (sc-is r complex-long-stack)))) + (:result-types complex-long-float) + (:note "inline complex long-float creation") + (:policy :fast-safe) + (:generator 5 + (sc-case r + (complex-long-reg + (let ((r-real (complex-double-reg-real-tn r))) + (unless (location= real r-real) + (cond ((zerop (tn-offset r-real)) + (copy-fp-reg-to-fr0 real)) + ((zerop (tn-offset real)) + (inst fstd r-real)) + (t + (inst fxch real) + (inst fstd r-real) + (inst fxch real))))) + (let ((r-imag (complex-double-reg-imag-tn r))) + (unless (location= imag r-imag) + (cond ((zerop (tn-offset imag)) + (inst fstd r-imag)) + (t + (inst fxch imag) + (inst fstd r-imag) + (inst fxch imag)))))) + (complex-long-stack + (unless (location= real r) + (cond ((zerop (tn-offset real)) + (store-long-float (ea-for-clf-real-stack r))) + (t + (inst fxch real) + (store-long-float (ea-for-clf-real-stack r)) + (inst fxch real)))) + (inst fxch imag) + (store-long-float (ea-for-clf-imag-stack r)) + (inst fxch imag))))) + + +(define-vop (complex-float-value) + (:args (x :target r)) + (:results (r)) + (:variant-vars offset) + (:policy :fast-safe) + (:generator 3 + (cond ((sc-is x complex-single-reg complex-double-reg + #!+long-float complex-long-reg) + (let ((value-tn + (make-random-tn :kind :normal + :sc (sc-or-lose 'double-reg) + :offset (+ offset (tn-offset x))))) + (unless (location= value-tn r) + (cond ((zerop (tn-offset r)) + (copy-fp-reg-to-fr0 value-tn)) + ((zerop (tn-offset value-tn)) + (inst fstd r)) + (t + (inst fxch value-tn) + (inst fstd r) + (inst fxch value-tn)))))) + ((sc-is r single-reg) + (let ((ea (sc-case x + (complex-single-stack + (ecase offset + (0 (ea-for-csf-real-stack x)) + (1 (ea-for-csf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-csf-real-desc x)) + (1 (ea-for-csf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fld ea)))) + ((sc-is r double-reg) + (let ((ea (sc-case x + (complex-double-stack + (ecase offset + (0 (ea-for-cdf-real-stack x)) + (1 (ea-for-cdf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-cdf-real-desc x)) + (1 (ea-for-cdf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldd ea)))) + #!+long-float + ((sc-is r long-reg) + (let ((ea (sc-case x + (complex-long-stack + (ecase offset + (0 (ea-for-clf-real-stack x)) + (1 (ea-for-clf-imag-stack x)))) + (descriptor-reg + (ecase offset + (0 (ea-for-clf-real-desc x)) + (1 (ea-for-clf-imag-desc x))))))) + (with-empty-tn@fp-top(r) + (inst fldl ea)))) + (t (error "COMPLEX-FLOAT-VALUE VOP failure"))))) + +(define-vop (realpart/complex-single-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (realpart/complex-double-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float realpart") + (:variant 0)) + +#!+long-float +(define-vop (realpart/complex-long-float complex-float-value) + (:translate realpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float realpart") + (:variant 0)) + +(define-vop (imagpart/complex-single-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-single-reg complex-single-stack descriptor-reg) + :target r)) + (:arg-types complex-single-float) + (:results (r :scs (single-reg))) + (:result-types single-float) + (:note "complex float imagpart") + (:variant 1)) + +(define-vop (imagpart/complex-double-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-double-reg complex-double-stack descriptor-reg) + :target r)) + (:arg-types complex-double-float) + (:results (r :scs (double-reg))) + (:result-types double-float) + (:note "complex float imagpart") + (:variant 1)) + +#!+long-float +(define-vop (imagpart/complex-long-float complex-float-value) + (:translate imagpart) + (:args (x :scs (complex-long-reg complex-long-stack descriptor-reg) + :target r)) + (:arg-types complex-long-float) + (:results (r :scs (long-reg))) + (:result-types long-float) + (:note "complex float imagpart") + (:variant 1)) + +;;; hack dummy VOPs to bias the representation selection of their +;;; arguments towards a FP register, which can help avoid consing at +;;; inappropriate locations +(defknown double-float-reg-bias (double-float) (values)) +(define-vop (double-float-reg-bias) + (:translate double-float-reg-bias) + (:args (x :scs (double-reg double-stack) :load-if nil)) + (:arg-types double-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) +(defknown single-float-reg-bias (single-float) (values)) +(define-vop (single-float-reg-bias) + (:translate single-float-reg-bias) + (:args (x :scs (single-reg single-stack) :load-if nil)) + (:arg-types single-float) + (:policy :fast-safe) + (:note "inline dummy FP register bias") + (:ignore x) + (:generator 0)) + + +;; XMM Moves + + +(defun ea-for-xmm-desc (tn) + (make-ea :xmmword :base tn + :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) + +(defun ea-for-xmm-stack (tn) + (make-ea :xmmword :base ebp-tn + :disp (- (* (+ (tn-offset tn) + 4) + n-word-bytes)))) + +(define-move-fun (load-xmm 2) (vop x y) + ((xmm-stack) (xmm-reg)) + (inst movdqu y (ea-for-xmm-stack x))) + +(define-move-fun (store-xmm 2) (vop x y) + ((xmm-reg) (xmm-stack)) + (inst movdqu (ea-for-xmm-stack y) x)) + +(define-move-fun (load-xmm-single 2) (vop x y) + ((single-stack) (xmm-reg)) + (inst movss y (ea-for-sf-stack x))) + +(define-move-fun (store-xmm-single 2) (vop x y) + ((xmm-reg) (single-stack)) + (inst movss (ea-for-sf-stack y) x)) + + +(define-vop (%load-xmm-from-array/single-float) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) + (:results (dest :scs (xmm-reg))) + (:result-types xmm) + (:generator 1 + (inst shl index 2) + (inst movdqu dest (make-ea :xmmword :base src :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) + + +(define-vop (%store-xmm-to-array/single-float) + (:policy :fast-safe) + (:args (dest :scs (descriptor-reg)) + (index :scs (unsigned-reg)) + (src :scs (xmm-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) + (:generator 1 + (inst shl index 2) + (inst movdqu (make-ea :xmmword :base dest :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) + src))) + + +(define-vop (xmm-move) + (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) + (:note "xmm move") + (:generator 0 + (unless (location= x y) + (inst movdqa y x)))) + +(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) + +(define-vop (move-from-xmm) + (:args (x :scs (xmm-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "xmm to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + xmm-widetag + xmm-size node) + (inst movdqu (ea-for-xmm-desc y) x)))) + +(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) + +(define-vop (move-to-xmm) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (xmm-reg))) + (:note "pointer to xmm coercion") + (:generator 2 + (inst movdqu y (ea-for-xmm-desc x)))) + +(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) + + +(define-vop (move-xmm-arg) + (:args (x :scs (xmm-reg) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y xmm-reg)))) + (:results (y)) + (:note "xmm argument move") + (:generator 6 + (sc-case y + (xmm-reg + (unless (location= x y) + (inst movdqa y x))) + + (xmm-stack + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :xmmword :base fp :disp offset))) + (inst movdqu ea x)) + + (let ((ea (make-ea :xmmword :base fp + :disp (- (* (+ (tn-offset y) 4) + n-word-bytes))))) + (inst movdqu ea x))))))) + +(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) + +(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) + + diff --git a/sbcl-src/src/compiler/x86/vm.lisp b/sbcl-src/src/compiler/x86/vm.lisp index d3fde75..168f5e1 100644 --- a/sbcl-src/src/compiler/x86/vm.lisp +++ b/sbcl-src/src/compiler/x86/vm.lisp @@ -200,6 +200,7 @@ (sap-stack stack) ; System area pointers. (single-stack stack) ; single-floats (double-stack stack :element-size 2) ; double-floats. + (xmm-stack stack :element-size 4) ; xmm #!+long-float (long-stack stack :element-size 3) ; long-floats. (complex-single-stack stack :element-size 2) ; complex-single-floats @@ -304,6 +305,12 @@ :save-p t :alternate-scs (double-stack)) + ;; non-descriptor XMMs + (xmm-reg xmm-registers + :locations #.*xmm-regs* + :save-p t + :alternate-scs (xmm-stack)) + ;; non-descriptor LONG-FLOATs #!+long-float (long-reg float-registers @@ -334,8 +341,6 @@ :save-p t :alternate-scs (complex-long-stack)) - (xmm-reg xmm-registers - :locations #.*xmm-regs*) ;; a catch or unwind block (catch-block stack :element-size kludge-nondeterministic-catch-block-size)) @@ -376,7 +381,8 @@ (def-misc-reg-tns unsigned-reg eax ebx ecx edx ebp esp edi esi) (def-misc-reg-tns word-reg ax bx cx dx bp sp di si) (def-misc-reg-tns byte-reg al ah bl bh cl ch dl dh) - (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7)) + (def-misc-reg-tns single-reg fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7) + (def-misc-reg-tns xmm-reg xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7)) ;;; TNs for registers used to pass arguments (defparameter *register-arg-tns* diff --git a/sse-matrix.lisp b/sse-matrix.lisp index c38deb9..c0f5976 100644 --- a/sse-matrix.lisp +++ b/sse-matrix.lisp @@ -51,14 +51,14 @@ http://developer.intel.com/design/pentiumiii/sml/24504501.pdf SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) - (:TEMPORARY (:SC SSE-REG) X0) - (:TEMPORARY (:SC SSE-REG) X1) - (:TEMPORARY (:SC SSE-REG) X2) - (:TEMPORARY (:SC SSE-REG) X3) - (:TEMPORARY (:SC SSE-REG) X4) - (:TEMPORARY (:SC SSE-REG) X5) - (:TEMPORARY (:SC SSE-REG) X6) - (:TEMPORARY (:SC SSE-REG) X7) + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + (:TEMPORARY (:SC XMM-REG) X3) + (:TEMPORARY (:SC XMM-REG) X4) + (:TEMPORARY (:SC XMM-REG) X5) + (:TEMPORARY (:SC XMM-REG) X6) + (:TEMPORARY (:SC XMM-REG) X7) (:GENERATOR 10 (inst movss x2 (vect-ea mat2 32)) diff --git a/sse-moves.lisp b/sse-moves.lisp new file mode 100644 index 0000000..389de4e --- /dev/null +++ b/sse-moves.lisp @@ -0,0 +1,236 @@ +(in-package :sb-vm) + +(defun ea-for-xmm-desc (tn) + (make-ea :xmmword :base tn + :disp (- (* xmm-value-slot n-word-bytes) other-pointer-lowtag))) + +(defun ea-for-xmm-stack (tn) + (make-ea :xmmword :base ebp-tn + :disp (- (* (+ (tn-offset tn) + 4) + n-word-bytes)))) + +(define-move-fun (load-xmm 2) (vop x y) + ((xmm-stack) (xmm-reg)) + (inst movdqu y (ea-for-xmm-stack x))) + +(define-move-fun (store-xmm 2) (vop x y) + ((xmm-reg) (xmm-stack)) + (inst movdqu (ea-for-xmm-stack y) x)) + +(define-move-fun (load-xmm-single 2) (vop x y) + ((single-stack) (xmm-reg)) + (inst movss y (ea-for-sf-stack x))) + +(define-move-fun (store-xmm-single 2) (vop x y) + ((xmm-reg) (single-stack)) + (inst movss (ea-for-sf-stack y) x)) + + +(define-vop (data-vector-ref/simple-array-single-float/xmm) + (:note "array to xmm access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg))) + (:arg-types simple-array-single-float positive-fixnum) + (:results (value :scs (xmm-reg))) + (:result-types xmm) + (:generator 5 + (inst movdqu value + (make-ea :xmmword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag))))) + +(define-vop (data-vector-ref-c/simple-array-single-float/xmm) + (:note "array to xmm access") + (:translate data-vector-ref) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg))) + (:info index) + (:arg-types simple-array-single-float (:constant (signed-byte 30))) + (:results (value :scs (xmm-reg))) + (:result-types xmm) + (:generator 4 + (inst movdqu value (make-ea :xmmword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag))))) + + + +(define-vop (data-vector-set/simple-array-single-float/xmm) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (index :scs (any-reg)) + (value :scs (xmm-reg) :target result)) + (:arg-types simple-array-single-float positive-fixnum xmm) + (:results (result :scs (xmm-reg))) + (:result-types xmm) + (:generator 5 + (inst movdqu (make-ea :dword :base object :index index :scale 1 + :disp (- (* vector-data-offset + n-word-bytes) + other-pointer-lowtag)) + value) + (unless (location= value result) + (move result value)))) + +(define-vop (data-vector-set-c/simple-array-single-float/xmm) + (:note "inline array store") + (:translate data-vector-set) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg)) + (value :scs (xmm-reg) :target result)) + (:info index) + (:arg-types simple-array-single-float (:constant (signed-byte 30)) + xmm) + (:results (result :scs (xmm-reg))) + (:result-types xmm) + (:generator 4 + (inst movdqu (make-ea :dword :base object + :disp (- (+ (* vector-data-offset + n-word-bytes) + (* 16 index)) + other-pointer-lowtag)) + value) + (unless (location= value result) + (move result value)))) + + +(define-vop (myvop1) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (descriptor-reg))) + (:result-types fixnum) + (:generator 1 + (inst mov dest (fixnumize 100)))) + +(define-vop (myvop2) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (descriptor-reg))) + (:result-types fixnum) + (:temporary (:scs (xmm-reg)) x0) + (:generator 1 + (inst movdqu x0 (make-ea :xmmword :base src :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (inst mov dest (fixnumize 100)))) + +(define-vop (myvop3) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (xmm-reg))) + (:result-types xmm) + (:temporary (:scs (xmm-reg) :to :result) x0) + (:generator 1 + (inst movdqu x0 (make-ea :xmmword :base src :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) + (move dest x0))) + +(define-vop (myvop4) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg))) + (:arg-types simple-array-single-float) + (:results (dest :scs (single-reg))) + (:result-types single-float) + (:temporary (:scs (single-reg) :to :result) x0) + (:generator 1 +;; (move x0 (make-ea :dword :base src :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) +;; (move dest x0))) + (inst nop))) + + +(define-vop (%load-xmm-from-array/single-float) + (:policy :fast-safe) + (:args (src :scs (descriptor-reg)) + (index :scs (unsigned-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum) + (:results (dest :scs (xmm-reg))) + (:result-types xmm) + (:generator 1 + (inst shl index 2) + (inst movdqu dest (make-ea :xmmword :base src :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))) + + +(define-vop (%store-xmm-to-array/single-float) + (:policy :fast-safe) + (:args (dest :scs (descriptor-reg)) + (index :scs (unsigned-reg)) + (src :scs (xmm-reg))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT fixnum XMM) + (:generator 1 + (inst shl index 2) + (inst movdqu (make-ea :xmmword :base dest :index index + :disp (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)) + src))) + + +(define-vop (xmm-move) + (:args (x :scs (xmm-reg) :target y :load-if (not (location= x y)))) + (:results (y :scs (xmm-reg) :load-if (not (location= x y)))) + (:note "xmm move") + (:generator 0 + (unless (location= x y) + (inst movdqa y x)))) + +(define-move-vop xmm-move :move (xmm-reg) (xmm-reg)) + +(define-vop (move-from-xmm) + (:args (x :scs (xmm-reg) :to :save)) + (:results (y :scs (descriptor-reg))) + (:node-var node) + (:note "xmm to pointer coercion") + (:generator 13 + (with-fixed-allocation (y + xmm-widetag + xmm-size node) + (inst movdqu (ea-for-xmm-desc y) x)))) + +(define-move-vop move-from-xmm :move (xmm-reg) (descriptor-reg)) + +(define-vop (move-to-xmm) + (:args (x :scs (descriptor-reg))) + (:results (y :scs (xmm-reg))) + (:note "pointer to xmm coercion") + (:generator 2 + (inst movdqu y (ea-for-xmm-desc x)))) + +(define-move-vop move-to-xmm :move (descriptor-reg) (xmm-reg)) + + +(define-vop (move-xmm-arg) + (:args (x :scs (xmm-reg) :target y) + (fp :scs (any-reg) + :load-if (not (sc-is y xmm-reg)))) + (:results (y)) + (:note "xmm argument move") + (:generator 6 + (sc-case y + (xmm-reg + (unless (location= x y) + (inst movdqa y x))) + + (xmm-stack + (if (= (tn-offset fp) esp-offset) + (let* ((offset (* (tn-offset y) n-word-bytes)) + (ea (make-ea :xmmword :base fp :disp offset))) + (inst movdqu ea x)) + + (let ((ea (make-ea :xmmword :base fp + :disp (- (* (+ (tn-offset y) 4) + n-word-bytes))))) + (inst movdqu ea x))))))) + +(define-move-vop move-xmm-arg :move-arg (xmm-reg descriptor-reg) (xmm-reg)) + +(define-move-vop move-arg :move-arg (xmm-reg) (descriptor-reg)) + + + diff --git a/sse-vector.lisp b/sse-vector.lisp new file mode 100644 index 0000000..2eec399 --- /dev/null +++ b/sse-vector.lisp @@ -0,0 +1,225 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :sb-vm) + +(defmacro vect-ea (base &optional idx) + (let ((disp + (if (and idx (numberp idx)) + `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx) + `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG)))) + + (if (and idx (symbolp idx)) + `(make-ea :dword :base ,base :index ,idx :disp ,disp) + `(make-ea :dword :base ,base :disp ,disp)))) + +(DEFINE-VOP (%sse-vect-add/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SRC2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst movdqu x0 (vect-ea src1)) + (inst movdqu x1 (vect-ea src2)) + (inst addps x0 x1) + (inst movdqu (vect-ea dest) x0))) + +(DEFINE-VOP (%sse-vect-add2/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (SRC1 :SCS (XMM-REG)) + (SRC2 :SCS (XMM-REG))) + (:ARG-TYPES XMM XMM) + + (:RESULTS (DEST :SCS (XMM-REG))) + + (:TEMPORARY (:SC XMM-REG :from :argument :to :result) X0) + (:TEMPORARY (:SC XMM-REG :from :argument) X1) + + (:GENERATOR 10 + (move x0 src1) + (move x1 src2) + (inst addps x0 x1) + (move dest x0) + )) + +(DEFINE-VOP (%sse-vect-sub/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SRC2 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst movdqu x0 (vect-ea src1)) + (inst movdqu x1 (vect-ea src2)) + (inst subps x0 x1) + (inst movdqu (vect-ea dest) x0))) + +(DEFINE-VOP (%sse-vect-len/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst xorps x0 x0) + (inst movdqu x1 (vect-ea src1)) + (inst mulps x1 x1) ;; ^2 + + (inst movdqa x0 x1) ;; + + + (inst psrldq-ib x1 4) ;; >> 4 + (inst addss x0 x1) ;; + + + (inst psrldq-ib x1 4) ;; .. + (inst addss x0 x1) + + (inst psrldq-ib x1 4) + (inst addss x0 x1) ;; here we have added up all single-floats + + (inst sqrtss x1 x0) ;; sqrt + + (inst movss (vect-ea dest) x1) ;; store scalar single-float + )) + +(DEFINE-VOP (%sse-vect-scalar-mul/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SCALAR :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + + (:GENERATOR 10 + (inst xorps x2 x2) + (inst movdqu x0 (vect-ea src1)) + (inst movss x1 (vect-ea scalar)) + + ;; load scalar to all slots + (inst addss x2 x1) + (inst pslldq-ib x1 4) + (inst orps x2 x1) + (inst pslldq-ib x1 4) + (inst orps x2 x1) + (inst pslldq-ib x1 4) + (inst orps x2 x1) + + ;; mul vector with scalar-vector + (inst mulps x0 x2) + + ;; store + (inst movdqu (vect-ea dest) x0) + )) + +(DEFINE-VOP (%sse-vect-normalize/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG))) + (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + (:TEMPORARY (:SC XMM-REG) X2) + + (:GENERATOR 10 + (inst xorps x0 x0) + (inst movdqu x1 (vect-ea src1)) + (inst movdqa x2 x1) + + ;; calculate x0 <- 1 / sqrt( x^2 + y^2 + z^2 + w^2 ) + (inst mulps x1 x1) ;; ^2 + + ;; copy x1 to x0, then rotate/add + (inst movdqa x0 x1) + + (inst shufps x1 x1 #b10010011) ;; rotate + (inst addps x0 x1) ;; + + + (inst shufps x1 x1 #b10010011) ;; rotate + (inst addps x0 x1) ;; + + + (inst shufps x1 x1 #b10010011) ;; rotate + (inst addps x0 x1) ;; + + + (inst rsqrtps x1 x0) ;; 1 / sqrt + + (inst mulps x2 x1) ;; vect = vect * (1 / sqrt(len)) + + (inst movdqu (vect-ea dest) x2) ;; store normalized vector + )) + +(DEFINE-VOP (%sse-vect-dot/single-float) + (:POLICY :FAST-SAFE) + (:ARGS (DEST :SCS (DESCRIPTOR-REG)) + (SRC1 :SCS (DESCRIPTOR-REG)) + (SRC2 :SCS (DESCRIPTOR-REG))) + + (:ARG-TYPES + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT + SIMPLE-ARRAY-SINGLE-FLOAT) + + (:TEMPORARY (:SC XMM-REG) X0) + (:TEMPORARY (:SC XMM-REG) X1) + + (:GENERATOR 10 + (inst movdqu x0 (vect-ea src1)) + (inst movdqu x1 (vect-ea src2)) + + (inst mulps x1 x0) ;; a_n * b_n + + (inst movdqa x0 x1) ;; + + (inst psrldq-ib x1 4) ;; >> 4 + (inst addss x0 x1) ;; + + + (inst psrldq-ib x1 4) ;; .. + (inst addss x0 x1) + + (inst psrldq-ib x1 4) + (inst addss x0 x1) ;; here we have added up all single-floats + + (inst movss (vect-ea dest) x0) ;; store scalar single-float + )) + diff --git a/test-vector.lisp b/test-vector.lisp new file mode 100644 index 0000000..8ae582c --- /dev/null +++ b/test-vector.lisp @@ -0,0 +1,190 @@ +#| +Copyright (c) 2005 Risto Laakso +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +|# +(in-package :cl-user) +;;(declaim (optimize (speed 3) (space 0) (debug 0) (safety 0))) + +(defmacro make-vector () + `(make-array 4 :element-type 'single-float :initial-element 0f0 :adjustable nil :fill-pointer nil)) + +(defmacro make-scalar () + `(make-array 1 :element-type 'single-float :initial-element 0f0 :adjustable nil :fill-pointer nil)) + +(declaim + (ftype (function ((simple-array single-float (4)) single-float) (simple-array single-float (4))) v* v2*) + (ftype (function ((simple-array single-float (4)) (simple-array single-float (4))) (simple-array single-float (4))) v+ v- v2+ v2-) + (ftype (function ((simple-array single-float (4)) (simple-array single-float (4))) single-float) dot dot2) + (ftype (function ((simple-array single-float (4))) (simple-array single-float (4))) unitise unitise2) + (ftype (function (single-float single-float single-float) (simple-array single-float (4))) vec) + ) + +(declaim (inline v* v+ v- dot unitise vec v2* v2+ v2- dot2 unitise2)) + +(defun v2* (a s) + (let ((res (make-vector))) + (declare (type (simple-array single-float (4)) a res) (type single-float s)) + (loop for i from 0 to 3 do (setf (aref res i) (* (aref a i) s))) + res)) + +(defun v* (a s) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-scalar-mul/single-float res a s) + res)) + +(defun v2+ (a b) + (let ((res (make-vector))) + (declare (type (simple-array single-float (4)) a b res)) + (loop for i from 0 to 3 do (setf (aref res i) (+ (aref a i) (aref b i)))) + res)) + +(defun v+ (a b) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-add/single-float res a b) + res)) + +(defun v2- (a b) + (let ((res (make-vector))) + (declare (type (simple-array single-float (4)) a b res)) + (loop for i from 0 to 3 do (setf (aref res i) (- (aref a i) (aref b i)))) + res)) + +(defun v- (a b) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-sub/single-float res a b) + res)) + +(defun dot2 (a b) + (declare (type (simple-array single-float (4)) a b)) + (loop for i from 0 to 3 sum (* (aref a i) (aref b i)) into res finally (return res))) + +(defun dot (a b) + (let ((res (make-scalar))) + (sb-sys:%primitive sb-vm::%sse-vect-dot/single-float res a b) + (aref res 0))) + +(defun unitise2 (a) + (v2* a (/ 1f0 (sqrt (dot2 a a))))) + +(defun unitise (a) + (let ((res (make-vector))) + (sb-sys:%primitive sb-vm::%sse-vect-normalize/single-float res a) + res)) + +(defun vec (x y z) + (let ((res (make-vector))) + (setf (aref res 0) x (aref res 1) y (aref res 2) z) + res)) + +(defun test-foo2 () + (let* ((v (v- (vec 10f0 10f0 0f0) (vec 3f0 3f0 1f0))) + (b (dot v (vec 0f0 0f0 10f0))) + (disc (+ (- (* b b) (dot v v)) (* 1.5 1.5)))) + disc)) + +(defun test-bar4 () +;; (let ((x (vec (random 1f6) (random 1f6) (random 1f6))) +;; (y (vec (random 1f6) (random 1f6) (random 1f6))) +;; (z (vec (random 1f6) (random 1f6) (random 1f6))) +;; (idx 0) +;; (res (make-vector))) + (let ((x (Vec 1f0 2f0 3f0)) + (idx 0)) + +;; (sb-sys:%primitive sb-vm::%store-xmm-to-array/single-float res 0 +;; (sb-sys:%primitive sb-vm::%sse-vect-add2/single-float +;; (the xmm (sb-sys:%primitive sb-vm::data-vector-ref/simple-array-single-float/xmm x idx)) +;; (sb-sys:%primitive sb-vm::data-vector-ref/simple-array-single-float/xmm y idx)) + +;; (data-vector-ref x 0) +;; (data-vector-ref y 0)) +;; (sb-sys:%primitive sb-vm::%store-xmm-to-array/single-float y 0 +;; (the xmm (sb-sys:%primitive sb-vm::%load-xmm-from-array/single-float x 0)) +;; (sb-sys:%primitive sb-vm::%load-xmm-from-array/single-float y 0))) + (the single-float (sb-sys:%primitive sb-vm::data-vector-ref/simple-array-single-float x idx)) + +;; (sb-sys:%primitive sb-vm::move-from-xmm +;; (sb-sys:%primitive sb-vm::myvop4 x)))) + +)) +(defun test-bar3 (x y) + (v- (v+ x y) (unitise y))) + +(defun test-bar () + (let ((x (vec (random 1f6) (random 1f6) (random 1f6))) + (y (vec (random 1f6) (random 1f6) (random 1f6))) + res) + (time (dotimes (i 1000000) + (setf res (dot (v- (v+ x y) y) (unitise y))))) + (time (dotimes (i 1000000) + (setf res (dot2 (v2- (v2+ x y) y) (unitise2 y))))) + res)) + + + +(defun test-foo () + (format t "~S.~%" (unitise (vec -1.0 -3.0 2.0)))) + +(defun test-vector () + (let ((vec1 (make-vector)) + (vec2 (make-vector)) + (vec3 (make-vector)) + (temp (make-array 1 :element-type 'single-float :initial-element 0f0)) + res) + + (loop for i of-type fixnum from 0 below 3 + do (setf (aref vec1 i) (float (random 1f6)) + (aref vec2 i) (float (random 1f6)))) + + + (format t "Data: ~S~%~S~%" vec1 vec2) + + (sb-sys:%primitive sb-vm::%sse-vect-add/single-float vec3 vec1 vec2) + (format t "Add: ~S, ok? ~A~%" vec3 + (loop for equal = t + for res-elt across res + for idx from 0 + for ok-elt = (+ (aref vec1 idx) (aref vec2 idx)) + when (/= ok-elt res-elt) do (setq equal nil) + finally (return equal))) + + (sb-sys:%primitive sb-vm::%sse-vect-normalize/single-float vec3 vec1) + (sb-sys:%primitive sb-vm::%sse-vect-len/single-float temp vec3) + (format t "Normalize 1: ~S, len ~S.~%" vec3 temp) + + (sb-sys:%primitive sb-vm::%sse-vect-normalize/single-float vec3 vec2) + (sb-sys:%primitive sb-vm::%sse-vect-len/single-float temp vec3) + (format t "Normalize 2: ~S, len ~S.~%" vec3 temp) + + (sb-sys:%primitive sb-vm::%sse-vect-dot/single-float temp vec1 vec2) + (format t "Dot: ~S, ok? ~A.~%" temp + (loop for a across vec1 + for b across vec2 + sum (* a b) into res + finally (return (= res (aref temp 0))))) + + )) + +