4823b1bedc4cf552090f1ace5f9c901303faa3d4
[sb-simd.git] / generate-sse-vops.lisp
blob4823b1bedc4cf552090f1ace5f9c901303faa3d4
1 (defun vect-ea (vect idx)
2 `(make-ea :dword :base ,vect :index ,idx
3 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag)))
5 (defun gen-vops-to-file (filename)
6 (with-open-file (stream filename :direction :output :if-exists :supersede)
7 (gen-vops stream)))
9 (defun gen-vops (&optional (stream t))
11 (format stream "(in-package :sb-vm)~%~%")
13 ;; TWO-ARG SSE VOPs
14 (loop for (op-name type mov-inst op-inst elem-width) in
16 ;; single float
17 (add single-float movups addps 4)
18 (addsub single-float movups addsubps 4)
19 (andnot single-float movups andnps 4)
20 (and single-float movups andps 4)
21 (div single-float movups divps 4)
22 (hadd single-float movups haddps 4)
23 (hsub single-float movups hsubps 4)
24 (max single-float movups maxps 4)
25 (min single-float movups minps 4)
26 (mul single-float movups mulps 4)
27 (or single-float movups orps 4)
28 (sub single-float movups subps 4)
29 (xor single-float movups xorps 4)
31 ;; double float
32 (add double-float movupd addpd 8)
33 (addsub double-float movupd addsubpd 8)
34 (andnot double-float movupd andnpd 8)
35 (and double-float movupd andpd 8)
36 (div double-float movupd divpd 8)
37 (hadd double-float movupd haddpd 8)
38 (hsub double-float movupd hsubpd 8)
39 (max double-float movupd maxpd 8)
40 (min double-float movupd minpd 8)
41 (mul double-float movupd mulpd 8)
42 (or double-float movupd orpd 8)
43 (sub double-float movupd subpd 8)
44 (xor double-float movupd xorpd 8)
46 ;; unsigned byte 8
47 (add unsigned-byte-8 movdqu paddb 1)
48 (avg unsigned-byte-8 movdqu pavgb 1)
49 (max unsigned-byte-8 movdqu pmaxub 1)
50 (min unsigned-byte-8 movdqu pminub 1)
51 (sub unsigned-byte-8 movdqu psubb 1)
53 (and unsigned-byte-8 movdqu pand 1)
54 (andn unsigned-byte-8 movdqu pandn 1)
55 (or unsigned-byte-8 movdqu por 1)
56 (xor unsigned-byte-8 movdqu pxor 1)
58 ;; unsigned byte 16
59 (add unsigned-byte-16 movdqu paddw 2)
60 (avg unsigned-byte-16 movdqu pavgw 2)
61 (sub unsigned-byte-16 movdqu psubw 2)
63 (and unsigned-byte-16 movdqu pand 2)
64 (andn unsigned-byte-16 movdqu pandn 2)
65 (or unsigned-byte-16 movdqu por 2)
66 (xor unsigned-byte-16 movdqu pxor 2)
68 (shl unsigned-byte-16 movdqu psllw 2)
69 (shr unsigned-byte-16 movdqu psrlw 2)
71 ;; signed byte 16
72 (add signed-byte-16 movdqu paddw 2)
73 (max signed-byte-16 movdqu pmaxsw 2)
74 (min signed-byte-16 movdqu pminsw 2)
75 (sub signed-byte-16 movdqu psubw 2)
77 (and signed-byte-16 movdqu pand 2)
78 (andn signed-byte-16 movdqu pandn 2)
79 (or signed-byte-16 movdqu por 2)
80 (xor signed-byte-16 movdqu pxor 2)
82 (shl signed-byte-16 movdqu psllw 2)
83 (shr signed-byte-16 movdqu psraw 2)
87 (format stream "~S~%~%"
88 `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type)))
89 (format t "; defining VOP ~A..~%" name)
90 name)))
92 (:policy :fast-safe)
94 ;;(:guard (member :sse2 *backend-subfeatures*))
96 (:args
97 (result :scs (descriptor-reg))
98 (vect1 :scs (descriptor-reg))
99 (vect2 :scs (descriptor-reg))
100 (index :scs (unsigned-reg)))
102 (:arg-types
103 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
104 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
105 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
106 fixnum)
108 (:temporary (:sc sse-reg) sse-temp1)
109 (:temporary (:sc sse-reg) sse-temp2)
111 (:generator 10
113 ;; scale index by 4 (size-of single-float)
114 (inst shl index ,(floor (log elem-width 2)))
116 ;; load
117 (inst ,mov-inst sse-temp1 ,(vect-ea 'vect1 'index))
118 (inst ,mov-inst sse-temp2 ,(vect-ea 'vect2 'index))
120 ;; operate
121 (inst ,op-inst sse-temp1 sse-temp2)
123 ;; store
124 (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp1)
125 ))))
127 ;; SINGLE-ARG SSE VOPs
128 (loop for (op-name type mov-inst op-inst elem-width) in
130 (recip single-float movups rcpps 4)
131 (rsqrt single-float movups rsqrtps 4)
132 (sqrt single-float movups sqrtps 4)
133 (sqrt double-float movupd sqrtpd 8)
136 (format stream "~S~%~%"
137 `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type)))
138 (format t "; defining VOP ~A..~%" name)
139 name)))
140 (:policy :fast-safe)
142 ;;(:guard (member :sse2 *backend-subfeatures*))
144 (:args
145 (result :scs (descriptor-reg))
146 (vect1 :scs (descriptor-reg))
147 (index :scs (unsigned-reg)))
149 (:arg-types
150 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
151 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
152 fixnum)
154 (:temporary (:sc sse-reg) sse-temp1)
155 (:temporary (:sc sse-reg) sse-temp2)
157 (:generator 10
159 ;; scale index by 4 (size-of single-float)
160 (inst shl index ,(floor (log elem-width 2)))
162 ;; load
163 (inst ,mov-inst sse-temp1 ,(vect-ea 'vect1 'index))
165 ;; operate
166 (inst ,op-inst sse-temp2 sse-temp1)
168 ;; store
169 (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp2)
170 ))))
172 ;; COMPARE
173 (loop for (op-name type mov-inst op-inst elem-width) in
175 (cmp single-float movups cmpps 4)
176 (cmp double-float movupd cmppd 8)
179 (format stream "~S~%~%"
180 `(define-vop (,(intern (let ((name (format nil "%SSE-~A/SIMPLE-ARRAY-~A-1" op-name type)))
181 (format t "; defining VOP ~A..~%" name)
182 name)))
184 (:policy :fast-safe)
186 ;;(:guard (member :sse2 *backend-subfeatures*))
188 (:args
189 (result :scs (descriptor-reg))
190 (vect1 :scs (descriptor-reg))
191 (vect2 :scs (descriptor-reg))
192 (index :scs (unsigned-reg)))
194 (:info cond)
196 (:arg-types
197 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
198 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
199 ,(intern (format nil "SIMPLE-ARRAY-~A" type))
200 fixnum
201 (:constant keyword)
204 (:temporary (:sc sse-reg) sse-temp1)
205 (:temporary (:sc sse-reg) sse-temp2)
207 (:generator 10
209 ;; scale index by 4 (size-of single-float)
210 (inst shl index ,(floor (log elem-width 2)))
212 ;; load
213 (inst ,mov-inst sse-temp1 ,(vect-ea 'vect1 'index))
214 (inst ,mov-inst sse-temp2 ,(vect-ea 'vect2 'index))
216 ;; operate
217 (inst ,op-inst sse-temp1 sse-temp2 cond)
219 ;; store
220 (inst ,mov-inst ,(vect-ea 'result 'index) sse-temp1)
221 ))))