1 ;;;; SSE intrinsics support for x86-64
3 ;;;; This software is part of the SBCL system. See the README file for
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
14 (defun ea-for-sse-stack (tn &optional
(base rbp-tn
))
15 (ea (frame-byte-offset (1+ (tn-offset tn
))) base
))
17 (defun float-sse-p (tn)
18 (sc-is tn single-sse-reg single-sse-stack single-sse-immediate
19 double-sse-reg double-sse-stack double-sse-immediate
))
21 (sc-is tn int-sse-reg int-sse-stack int-sse-immediate
))
24 (progn ; the host compiler will complain about absence of these
25 (defun %simd-pack-low
(x) (error "Called %SIMD-PACK-LOW ~S" x
))
26 (defun %simd-pack-high
(x) (error "Called %SIMD-PACK-HIGH ~S" x
)))
28 (define-move-fun (load-int-sse-immediate 1) (vop x y
)
29 ((int-sse-immediate) (int-sse-reg))
30 (let* ((x (tn-value x
))
31 (lo (%simd-pack-low x
))
32 (hi (%simd-pack-high x
)))
35 ((= lo hi
(ldb (byte 64 0) -
1))
36 ;; don't think this is recognized as dependency breaking...
39 (inst movdqa y
(register-inline-constant x
))))))
41 (define-move-fun (load-float-sse-immediate 1) (vop x y
)
42 ((single-sse-immediate double-sse-immediate
)
43 (single-sse-reg double-sse-reg
))
44 (let* ((x (tn-value x
))
45 (lo (%simd-pack-low x
))
46 (hi (%simd-pack-high x
)))
49 ((= lo hi
(ldb (byte 64 0) -
1))
52 (inst movaps y
(register-inline-constant x
))))))
54 (define-move-fun (load-int-sse 2) (vop x y
)
55 ((int-sse-stack) (int-sse-reg))
56 (inst movdqu y
(ea-for-sse-stack x
)))
58 (define-move-fun (load-float-sse 2) (vop x y
)
59 ((single-sse-stack double-sse-stack
) (single-sse-reg double-sse-reg
))
60 (inst movups y
(ea-for-sse-stack x
)))
62 (define-move-fun (store-int-sse 2) (vop x y
)
63 ((int-sse-reg) (int-sse-stack))
64 (inst movdqu
(ea-for-sse-stack y
) x
))
66 (define-move-fun (store-float-sse 2) (vop x y
)
67 ((double-sse-reg single-sse-reg
) (double-sse-stack single-sse-stack
))
68 (inst movups
(ea-for-sse-stack y
) x
))
70 (define-vop (sse-move)
71 (:args
(x :scs
(single-sse-reg double-sse-reg int-sse-reg
)
73 :load-if
(not (location= x y
))))
74 (:results
(y :scs
(single-sse-reg double-sse-reg int-sse-reg
)
75 :load-if
(not (location= x y
))))
79 (define-move-vop sse-move
:move
80 (int-sse-reg single-sse-reg double-sse-reg
)
81 (int-sse-reg single-sse-reg double-sse-reg
))
83 (macrolet ((define-move-from-sse (type tag
&rest scs
)
84 (let ((name (symbolicate "MOVE-FROM-SSE/" type
)))
88 (:results
(y :scs
(descriptor-reg)))
89 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
92 (:note
"AVX2 to pointer coercion")
94 (alloc-other simd-pack-widetag simd-pack-size y node nil thread-tn
)
95 (storew (fixnumize ,tag
)
96 y simd-pack-tag-slot other-pointer-lowtag
)
97 (let ((ea (object-slot-ea y simd-pack-lo-value-slot other-pointer-lowtag
)))
100 (inst movdqa ea x
)))))
101 (define-move-vop ,name
:move
102 ,scs
(descriptor-reg))))))
103 ;; see +simd-pack-element-types+
104 (define-move-from-sse simd-pack-single
0 single-sse-reg
)
105 (define-move-from-sse simd-pack-double
1 double-sse-reg
)
106 (define-move-from-sse simd-pack-ub8
2 int-sse-reg
)
107 (define-move-from-sse simd-pack-ub16
3 int-sse-reg
)
108 (define-move-from-sse simd-pack-ub32
4 int-sse-reg
)
109 (define-move-from-sse simd-pack-ub64
5 int-sse-reg
)
110 (define-move-from-sse simd-pack-sb8
6 int-sse-reg
)
111 (define-move-from-sse simd-pack-sb16
7 int-sse-reg
)
112 (define-move-from-sse simd-pack-sb32
8 int-sse-reg
)
113 (define-move-from-sse simd-pack-sb64
9 int-sse-reg
))
115 (define-vop (move-to-sse)
116 (:args
(x :scs
(descriptor-reg)))
117 (:results
(y :scs
(int-sse-reg double-sse-reg single-sse-reg
)))
118 (:note
"pointer to SSE coercion")
120 (let ((ea (object-slot-ea x simd-pack-lo-value-slot other-pointer-lowtag
)))
123 (inst movdqa y ea
)))))
124 (define-move-vop move-to-sse
:move
126 (int-sse-reg double-sse-reg single-sse-reg
))
128 (define-vop (move-sse-arg)
129 (:args
(x :scs
(int-sse-reg double-sse-reg single-sse-reg
) :target y
)
131 :load-if
(not (sc-is y int-sse-reg double-sse-reg single-sse-reg
))))
133 (:note
"SSE argument move")
136 ((int-sse-reg double-sse-reg single-sse-reg
)
137 (unless (location= x y
)
138 (if (or (float-sse-p x
)
142 ((int-sse-stack double-sse-stack single-sse-stack
)
144 (inst movups
(ea-for-sse-stack y fp
) x
)
145 (inst movdqu
(ea-for-sse-stack y fp
) x
))))))
146 (define-move-vop move-sse-arg
:move-arg
147 (int-sse-reg double-sse-reg single-sse-reg descriptor-reg
)
148 (int-sse-reg double-sse-reg single-sse-reg
))
150 (define-move-vop move-arg
:move-arg
151 (int-sse-reg double-sse-reg single-sse-reg
)
155 (define-vop (%simd-pack-low
)
156 (:translate %simd-pack-low
)
157 (:args
(x :scs
(int-sse-reg double-sse-reg single-sse-reg
)))
158 (:arg-types simd-pack
)
159 (:results
(dst :scs
(unsigned-reg)))
160 (:result-types unsigned-num
)
165 (define-vop (%simd-pack-high
)
166 (:translate %simd-pack-high
)
167 (:args
(x :scs
(int-sse-reg double-sse-reg single-sse-reg
)
169 (:arg-types simd-pack
)
170 (:temporary
(:sc sse-reg
:from
(:argument
0)) tmp
)
171 (:results
(dst :scs
(unsigned-reg)))
172 (:result-types unsigned-num
)
177 (inst movq dst tmp
)))
178 (define-vop (%simd-pack-high
/sse4
) ; 1 instruction and no temp
179 (:translate %simd-pack-high
)
180 (:args
(x :scs
(int-sse-reg double-sse-reg single-sse-reg
)))
181 (:arg-types simd-pack
)
182 (:results
(dst :scs
(unsigned-reg)))
183 (:result-types unsigned-num
)
185 (:guard
(member :sse4
*backend-subfeatures
*))
187 (inst pextrq dst x
1)))
189 (define-vop (%make-simd-pack
)
190 (:translate %make-simd-pack
)
192 (:args
(tag :scs
(any-reg))
193 (lo :scs
(unsigned-reg))
194 (hi :scs
(unsigned-reg)))
195 (:arg-types tagged-num unsigned-num unsigned-num
)
196 (:results
(dst :scs
(descriptor-reg) :from
:load
))
198 #+gs-seg
(:temporary
(:sc unsigned-reg
:offset
15) thread-tn
)
201 (alloc-other simd-pack-widetag simd-pack-size dst node nil thread-tn
)
202 ;; see +simd-pack-element-types+
203 (storew tag dst simd-pack-tag-slot other-pointer-lowtag
)
204 (storew lo dst simd-pack-lo-value-slot other-pointer-lowtag
)
205 (storew hi dst simd-pack-hi-value-slot other-pointer-lowtag
)))
207 (define-vop (%make-simd-pack-ub64
)
208 (:translate %make-simd-pack-ub64
)
210 (:args
(lo :scs
(unsigned-reg))
211 (hi :scs
(unsigned-reg)))
212 (:arg-types unsigned-num unsigned-num
)
213 (:temporary
(:sc int-sse-reg
) tmp
)
214 (:results
(dst :scs
(int-sse-reg)))
215 (:result-types simd-pack-ub64
)
219 (inst punpcklqdq dst tmp
)))
221 (defmacro simd-pack-dispatch
(pack &body body
)
222 (check-type pack symbol
)
223 `(let ((,pack
,pack
))
225 ,@(map 'list
(lambda (eltype)
226 `((simd-pack ,eltype
) ,@body
))
227 +simd-pack-element-types
+))))
230 (macrolet ((unpack-unsigned (pack bits
)
231 `(simd-pack-dispatch ,pack
232 (let ((lo (%simd-pack-low
,pack
))
233 (hi (%simd-pack-high
,pack
)))
235 ,@(loop for pos by bits below
64 collect
236 `(unpack-unsigned-1 ,bits
,pos lo
))
237 ,@(loop for pos by bits below
64 collect
238 `(unpack-unsigned-1 ,bits
,pos hi
))))))
239 (unpack-unsigned-1 (bits position ub64
)
240 `(ldb (byte ,bits
,position
) ,ub64
)))
241 (declaim (inline %simd-pack-ub8s
))
242 (defun %simd-pack-ub8s
(pack)
243 (declare (type simd-pack pack
))
244 (unpack-unsigned pack
8))
246 (declaim (inline %simd-pack-ub16s
))
247 (defun %simd-pack-ub16s
(pack)
248 (declare (type simd-pack pack
))
249 (unpack-unsigned pack
16))
251 (declaim (inline %simd-pack-ub32s
))
252 (defun %simd-pack-ub32s
(pack)
253 (declare (type simd-pack pack
))
254 (unpack-unsigned pack
32))
256 (declaim (inline %simd-pack-ub64s
))
257 (defun %simd-pack-ub64s
(pack)
258 (declare (type simd-pack pack
))
259 (unpack-unsigned pack
64)))
262 (macrolet ((unpack-signed (pack bits
)
263 `(simd-pack-dispatch ,pack
264 (let ((lo (%simd-pack-low
,pack
))
265 (hi (%simd-pack-high
,pack
)))
267 ,@(loop for pos by bits below
64 collect
268 `(unpack-signed-1 ,bits
,pos lo
))
269 ,@(loop for pos by bits below
64 collect
270 `(unpack-signed-1 ,bits
,pos hi
))))))
271 (unpack-signed-1 (bits position ub64
)
272 `(- (mod (+ (ldb (byte ,bits
,position
) ,ub64
)
275 ,(expt 2 (1- bits
)))))
276 (declaim (inline %simd-pack-sb8s
))
277 (defun %simd-pack-sb8s
(pack)
278 (declare (type simd-pack pack
))
279 (unpack-signed pack
8))
281 (declaim (inline %simd-pack-sb16s
))
282 (defun %simd-pack-sb16s
(pack)
283 (declare (type simd-pack pack
))
284 (unpack-signed pack
16))
286 (declaim (inline %simd-pack-sb32s
))
287 (defun %simd-pack-sb32s
(pack)
288 (declare (type simd-pack pack
))
289 (unpack-signed pack
32))
291 (declaim (inline %simd-pack-sb64s
))
292 (defun %simd-pack-sb64s
(pack)
293 (declare (type simd-pack pack
))
294 (unpack-signed pack
64)))
298 (declaim (inline %make-simd-pack-ub32
))
299 (defun %make-simd-pack-ub32
(w x y z
)
300 (declare (type (unsigned-byte 32) w x y z
))
302 #.
(position '(unsigned-byte 32) +simd-pack-element-types
+ :test
#'equal
)
303 (logior w
(ash x
32))
304 (logior y
(ash z
32)))))
306 (define-vop (%make-simd-pack-double
)
307 (:translate %make-simd-pack-double
)
309 (:args
(lo :scs
(double-reg) :target dst
)
310 (hi :scs
(double-reg) :target tmp
))
311 (:arg-types double-float double-float
)
312 (:temporary
(:sc double-sse-reg
:from
(:argument
1)) tmp
)
313 (:results
(dst :scs
(double-sse-reg) :from
(:argument
0)))
314 (:result-types simd-pack-double
)
318 (inst unpcklpd dst tmp
)))
320 (define-vop (%make-simd-pack-single
)
321 (:translate %make-simd-pack-single
)
323 (:args
(x :scs
(single-reg) :target dst
)
324 (y :scs
(single-reg) :target tmp
)
325 (z :scs
(single-reg))
326 (w :scs
(single-reg)))
327 (:arg-types single-float single-float single-float single-float
)
328 (:temporary
(:sc single-sse-reg
:from
(:argument
1)) tmp
)
329 (:results
(dst :scs
(single-sse-reg) :from
(:argument
0)))
330 (:result-types simd-pack-single
)
333 (inst unpcklps dst z
)
335 (inst unpcklps tmp w
)
336 (inst unpcklps dst tmp
)))
338 (defknown %simd-pack-single-item
339 (simd-pack (integer 0 3)) single-float
(flushable))
341 (define-vop (%simd-pack-single-item
)
342 (:args
(x :scs
(int-sse-reg double-sse-reg single-sse-reg
)
344 (:translate %simd-pack-single-item
)
345 (:arg-types simd-pack
(:constant t
))
347 (:results
(dst :scs
(single-reg)))
348 (:result-types single-float
)
349 (:temporary
(:sc single-sse-reg
:from
(:argument
0)) tmp
)
352 (cond ((and (zerop index
)
353 (not (location= x dst
)))
359 (inst psrldq tmp
(* 4 index
)))
361 (inst movss dst tmp
)))))
365 (declaim (inline %simd-pack-singles
))
366 (defun %simd-pack-singles
(pack)
367 (declare (type simd-pack pack
))
368 (simd-pack-dispatch pack
369 (values (%simd-pack-single-item pack
0)
370 (%simd-pack-single-item pack
1)
371 (%simd-pack-single-item pack
2)
372 (%simd-pack-single-item pack
3)))))
374 (defknown %simd-pack-double-item
375 (simd-pack (integer 0 1)) double-float
(flushable))
377 (define-vop (%simd-pack-double-item
)
378 (:translate %simd-pack-double-item
)
379 (:args
(x :scs
(int-sse-reg double-sse-reg single-sse-reg
)
382 (:arg-types simd-pack
(:constant t
))
383 (:results
(dst :scs
(double-reg)))
384 (:result-types double-float
)
385 (:temporary
(:sc double-sse-reg
:from
(:argument
0)) tmp
)
388 (cond ((and (zerop index
)
389 (not (location= x dst
)))
395 (inst psrldq tmp
(* 8 index
)))
397 (inst movsd dst tmp
)))))
401 (declaim (inline %simd-pack-doubles
))
402 (defun %simd-pack-doubles
(pack)
403 (declare (type simd-pack pack
))
404 (simd-pack-dispatch pack
405 (values (%simd-pack-double-item pack
0)
406 (%simd-pack-double-item pack
1)))))