CVS patch versions
[sb-simd.git] / sse-vector.lisp
blob2eec399631dcbce3fdbd5321955ad397fad8dede
1 #|
2 Copyright (c) 2005 Risto Laakso
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 (in-package :sb-vm)
29 (defmacro vect-ea (base &optional idx)
30 (let ((disp
31 (if (and idx (numberp idx))
32 `(+ (- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG) ,idx)
33 `(- (* VECTOR-DATA-OFFSET N-WORD-BYTES) OTHER-POINTER-LOWTAG))))
35 (if (and idx (symbolp idx))
36 `(make-ea :dword :base ,base :index ,idx :disp ,disp)
37 `(make-ea :dword :base ,base :disp ,disp))))
39 (DEFINE-VOP (%sse-vect-add/single-float)
40 (:POLICY :FAST-SAFE)
41 (:ARGS (DEST :SCS (DESCRIPTOR-REG))
42 (SRC1 :SCS (DESCRIPTOR-REG))
43 (SRC2 :SCS (DESCRIPTOR-REG)))
44 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT
45 SIMPLE-ARRAY-SINGLE-FLOAT
46 SIMPLE-ARRAY-SINGLE-FLOAT)
48 (:TEMPORARY (:SC XMM-REG) X0)
49 (:TEMPORARY (:SC XMM-REG) X1)
51 (:GENERATOR 10
52 (inst movdqu x0 (vect-ea src1))
53 (inst movdqu x1 (vect-ea src2))
54 (inst addps x0 x1)
55 (inst movdqu (vect-ea dest) x0)))
57 (DEFINE-VOP (%sse-vect-add2/single-float)
58 (:POLICY :FAST-SAFE)
59 (:ARGS (SRC1 :SCS (XMM-REG))
60 (SRC2 :SCS (XMM-REG)))
61 (:ARG-TYPES XMM XMM)
63 (:RESULTS (DEST :SCS (XMM-REG)))
65 (:TEMPORARY (:SC XMM-REG :from :argument :to :result) X0)
66 (:TEMPORARY (:SC XMM-REG :from :argument) X1)
68 (:GENERATOR 10
69 (move x0 src1)
70 (move x1 src2)
71 (inst addps x0 x1)
72 (move dest x0)
75 (DEFINE-VOP (%sse-vect-sub/single-float)
76 (:POLICY :FAST-SAFE)
77 (:ARGS (DEST :SCS (DESCRIPTOR-REG))
78 (SRC1 :SCS (DESCRIPTOR-REG))
79 (SRC2 :SCS (DESCRIPTOR-REG)))
80 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT
81 SIMPLE-ARRAY-SINGLE-FLOAT
82 SIMPLE-ARRAY-SINGLE-FLOAT)
84 (:TEMPORARY (:SC XMM-REG) X0)
85 (:TEMPORARY (:SC XMM-REG) X1)
87 (:GENERATOR 10
88 (inst movdqu x0 (vect-ea src1))
89 (inst movdqu x1 (vect-ea src2))
90 (inst subps x0 x1)
91 (inst movdqu (vect-ea dest) x0)))
93 (DEFINE-VOP (%sse-vect-len/single-float)
94 (:POLICY :FAST-SAFE)
95 (:ARGS (DEST :SCS (DESCRIPTOR-REG))
96 (SRC1 :SCS (DESCRIPTOR-REG)))
97 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT)
99 (:TEMPORARY (:SC XMM-REG) X0)
100 (:TEMPORARY (:SC XMM-REG) X1)
102 (:GENERATOR 10
103 (inst xorps x0 x0)
104 (inst movdqu x1 (vect-ea src1))
105 (inst mulps x1 x1) ;; ^2
107 (inst movdqa x0 x1) ;; +
109 (inst psrldq-ib x1 4) ;; >> 4
110 (inst addss x0 x1) ;; +
112 (inst psrldq-ib x1 4) ;; ..
113 (inst addss x0 x1)
115 (inst psrldq-ib x1 4)
116 (inst addss x0 x1) ;; here we have added up all single-floats
118 (inst sqrtss x1 x0) ;; sqrt
120 (inst movss (vect-ea dest) x1) ;; store scalar single-float
123 (DEFINE-VOP (%sse-vect-scalar-mul/single-float)
124 (:POLICY :FAST-SAFE)
125 (:ARGS (DEST :SCS (DESCRIPTOR-REG))
126 (SRC1 :SCS (DESCRIPTOR-REG))
127 (SCALAR :SCS (DESCRIPTOR-REG)))
128 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT)
130 (:TEMPORARY (:SC XMM-REG) X0)
131 (:TEMPORARY (:SC XMM-REG) X1)
132 (:TEMPORARY (:SC XMM-REG) X2)
134 (:GENERATOR 10
135 (inst xorps x2 x2)
136 (inst movdqu x0 (vect-ea src1))
137 (inst movss x1 (vect-ea scalar))
139 ;; load scalar to all slots
140 (inst addss x2 x1)
141 (inst pslldq-ib x1 4)
142 (inst orps x2 x1)
143 (inst pslldq-ib x1 4)
144 (inst orps x2 x1)
145 (inst pslldq-ib x1 4)
146 (inst orps x2 x1)
148 ;; mul vector with scalar-vector
149 (inst mulps x0 x2)
151 ;; store
152 (inst movdqu (vect-ea dest) x0)
155 (DEFINE-VOP (%sse-vect-normalize/single-float)
156 (:POLICY :FAST-SAFE)
157 (:ARGS (DEST :SCS (DESCRIPTOR-REG))
158 (SRC1 :SCS (DESCRIPTOR-REG)))
159 (:ARG-TYPES SIMPLE-ARRAY-SINGLE-FLOAT SIMPLE-ARRAY-SINGLE-FLOAT)
161 (:TEMPORARY (:SC XMM-REG) X0)
162 (:TEMPORARY (:SC XMM-REG) X1)
163 (:TEMPORARY (:SC XMM-REG) X2)
165 (:GENERATOR 10
166 (inst xorps x0 x0)
167 (inst movdqu x1 (vect-ea src1))
168 (inst movdqa x2 x1)
170 ;; calculate x0 <- 1 / sqrt( x^2 + y^2 + z^2 + w^2 )
171 (inst mulps x1 x1) ;; ^2
173 ;; copy x1 to x0, then rotate/add
174 (inst movdqa x0 x1)
176 (inst shufps x1 x1 #b10010011) ;; rotate
177 (inst addps x0 x1) ;; +
179 (inst shufps x1 x1 #b10010011) ;; rotate
180 (inst addps x0 x1) ;; +
182 (inst shufps x1 x1 #b10010011) ;; rotate
183 (inst addps x0 x1) ;; +
185 (inst rsqrtps x1 x0) ;; 1 / sqrt
187 (inst mulps x2 x1) ;; vect = vect * (1 / sqrt(len))
189 (inst movdqu (vect-ea dest) x2) ;; store normalized vector
192 (DEFINE-VOP (%sse-vect-dot/single-float)
193 (:POLICY :FAST-SAFE)
194 (:ARGS (DEST :SCS (DESCRIPTOR-REG))
195 (SRC1 :SCS (DESCRIPTOR-REG))
196 (SRC2 :SCS (DESCRIPTOR-REG)))
198 (:ARG-TYPES
199 SIMPLE-ARRAY-SINGLE-FLOAT
200 SIMPLE-ARRAY-SINGLE-FLOAT
201 SIMPLE-ARRAY-SINGLE-FLOAT)
203 (:TEMPORARY (:SC XMM-REG) X0)
204 (:TEMPORARY (:SC XMM-REG) X1)
206 (:GENERATOR 10
207 (inst movdqu x0 (vect-ea src1))
208 (inst movdqu x1 (vect-ea src2))
210 (inst mulps x1 x0) ;; a_n * b_n
212 (inst movdqa x0 x1) ;;
214 (inst psrldq-ib x1 4) ;; >> 4
215 (inst addss x0 x1) ;; +
217 (inst psrldq-ib x1 4) ;; ..
218 (inst addss x0 x1)
220 (inst psrldq-ib x1 4)
221 (inst addss x0 x1) ;; here we have added up all single-floats
223 (inst movss (vect-ea dest) x0) ;; store scalar single-float