Late-breaking NEWS for late-breaking fixes
[sbcl.git] / src / compiler / x86-64 / simd-pack.lisp
blob5280227089b81b329a17882e11d5c072642623aa
1 ;;;; SSE intrinsics support for x86-64
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB-VM")
14 (defun ea-for-sse-stack (tn &optional (base rbp-tn))
15 (ea (frame-byte-offset (1+ (tn-offset tn))) base))
17 (defun float-sse-p (tn)
18 (sc-is tn single-sse-reg single-sse-stack single-sse-immediate
19 double-sse-reg double-sse-stack double-sse-immediate))
20 (defun int-sse-p (tn)
21 (sc-is tn int-sse-reg int-sse-stack int-sse-immediate))
23 #+sb-xc-host
24 (progn ; the host compiler will complain about absence of these
25 (defun %simd-pack-low (x) (error "Called %SIMD-PACK-LOW ~S" x))
26 (defun %simd-pack-high (x) (error "Called %SIMD-PACK-HIGH ~S" x)))
28 (define-move-fun (load-int-sse-immediate 1) (vop x y)
29 ((int-sse-immediate) (int-sse-reg))
30 (let* ((x (tn-value x))
31 (lo (%simd-pack-low x))
32 (hi (%simd-pack-high x)))
33 (cond ((= lo hi 0)
34 (inst pxor y y))
35 ((= lo hi (ldb (byte 64 0) -1))
36 ;; don't think this is recognized as dependency breaking...
37 (inst pcmpeqd y y))
39 (inst movdqa y (register-inline-constant x))))))
41 (define-move-fun (load-float-sse-immediate 1) (vop x y)
42 ((single-sse-immediate double-sse-immediate)
43 (single-sse-reg double-sse-reg))
44 (let* ((x (tn-value x))
45 (lo (%simd-pack-low x))
46 (hi (%simd-pack-high x)))
47 (cond ((= lo hi 0)
48 (inst xorps y y))
49 ((= lo hi (ldb (byte 64 0) -1))
50 (inst pcmpeqd y y))
52 (inst movaps y (register-inline-constant x))))))
54 (define-move-fun (load-int-sse 2) (vop x y)
55 ((int-sse-stack) (int-sse-reg))
56 (inst movdqu y (ea-for-sse-stack x)))
58 (define-move-fun (load-float-sse 2) (vop x y)
59 ((single-sse-stack double-sse-stack) (single-sse-reg double-sse-reg))
60 (inst movups y (ea-for-sse-stack x)))
62 (define-move-fun (store-int-sse 2) (vop x y)
63 ((int-sse-reg) (int-sse-stack))
64 (inst movdqu (ea-for-sse-stack y) x))
66 (define-move-fun (store-float-sse 2) (vop x y)
67 ((double-sse-reg single-sse-reg) (double-sse-stack single-sse-stack))
68 (inst movups (ea-for-sse-stack y) x))
70 (define-vop (sse-move)
71 (:args (x :scs (single-sse-reg double-sse-reg int-sse-reg)
72 :target y
73 :load-if (not (location= x y))))
74 (:results (y :scs (single-sse-reg double-sse-reg int-sse-reg)
75 :load-if (not (location= x y))))
76 (:note "SSE move")
77 (:generator 0
78 (move y x)))
79 (define-move-vop sse-move :move
80 (int-sse-reg single-sse-reg double-sse-reg)
81 (int-sse-reg single-sse-reg double-sse-reg))
83 (macrolet ((define-move-from-sse (type tag &rest scs)
84 (let ((name (symbolicate "MOVE-FROM-SSE/" type)))
85 `(progn
86 (define-vop (,name)
87 (:args (x :scs ,scs))
88 (:results (y :scs (descriptor-reg)))
89 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
90 (:node-var node)
91 (:arg-types ,type)
92 (:note "AVX2 to pointer coercion")
93 (:generator 13
94 (alloc-other simd-pack-widetag simd-pack-size y node nil thread-tn)
95 (storew (fixnumize ,tag)
96 y simd-pack-tag-slot other-pointer-lowtag)
97 (let ((ea (object-slot-ea y simd-pack-lo-value-slot other-pointer-lowtag)))
98 (if (float-sse-p x)
99 (inst movaps ea x)
100 (inst movdqa ea x)))))
101 (define-move-vop ,name :move
102 ,scs (descriptor-reg))))))
103 ;; see +simd-pack-element-types+
104 (define-move-from-sse simd-pack-single 0 single-sse-reg)
105 (define-move-from-sse simd-pack-double 1 double-sse-reg)
106 (define-move-from-sse simd-pack-ub8 2 int-sse-reg)
107 (define-move-from-sse simd-pack-ub16 3 int-sse-reg)
108 (define-move-from-sse simd-pack-ub32 4 int-sse-reg)
109 (define-move-from-sse simd-pack-ub64 5 int-sse-reg)
110 (define-move-from-sse simd-pack-sb8 6 int-sse-reg)
111 (define-move-from-sse simd-pack-sb16 7 int-sse-reg)
112 (define-move-from-sse simd-pack-sb32 8 int-sse-reg)
113 (define-move-from-sse simd-pack-sb64 9 int-sse-reg))
115 (define-vop (move-to-sse)
116 (:args (x :scs (descriptor-reg)))
117 (:results (y :scs (int-sse-reg double-sse-reg single-sse-reg)))
118 (:note "pointer to SSE coercion")
119 (:generator 2
120 (let ((ea (object-slot-ea x simd-pack-lo-value-slot other-pointer-lowtag)))
121 (if (float-sse-p y)
122 (inst movaps y ea)
123 (inst movdqa y ea)))))
124 (define-move-vop move-to-sse :move
125 (descriptor-reg)
126 (int-sse-reg double-sse-reg single-sse-reg))
128 (define-vop (move-sse-arg)
129 (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg) :target y)
130 (fp :scs (any-reg)
131 :load-if (not (sc-is y int-sse-reg double-sse-reg single-sse-reg))))
132 (:results (y))
133 (:note "SSE argument move")
134 (:generator 4
135 (sc-case y
136 ((int-sse-reg double-sse-reg single-sse-reg)
137 (unless (location= x y)
138 (if (or (float-sse-p x)
139 (float-sse-p y))
140 (inst movaps y x)
141 (inst movdqa y x))))
142 ((int-sse-stack double-sse-stack single-sse-stack)
143 (if (float-sse-p x)
144 (inst movups (ea-for-sse-stack y fp) x)
145 (inst movdqu (ea-for-sse-stack y fp) x))))))
146 (define-move-vop move-sse-arg :move-arg
147 (int-sse-reg double-sse-reg single-sse-reg descriptor-reg)
148 (int-sse-reg double-sse-reg single-sse-reg))
150 (define-move-vop move-arg :move-arg
151 (int-sse-reg double-sse-reg single-sse-reg)
152 (descriptor-reg))
155 (define-vop (%simd-pack-low)
156 (:translate %simd-pack-low)
157 (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg)))
158 (:arg-types simd-pack)
159 (:results (dst :scs (unsigned-reg)))
160 (:result-types unsigned-num)
161 (:policy :fast-safe)
162 (:generator 3
163 (inst movq dst x)))
165 (define-vop (%simd-pack-high)
166 (:translate %simd-pack-high)
167 (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg)
168 :target tmp))
169 (:arg-types simd-pack)
170 (:temporary (:sc sse-reg :from (:argument 0)) tmp)
171 (:results (dst :scs (unsigned-reg)))
172 (:result-types unsigned-num)
173 (:policy :fast-safe)
174 (:generator 3
175 (move tmp x)
176 (inst psrldq tmp 8)
177 (inst movq dst tmp)))
178 (define-vop (%simd-pack-high/sse4) ; 1 instruction and no temp
179 (:translate %simd-pack-high)
180 (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg)))
181 (:arg-types simd-pack)
182 (:results (dst :scs (unsigned-reg)))
183 (:result-types unsigned-num)
184 (:policy :fast-safe)
185 (:guard (member :sse4 *backend-subfeatures*))
186 (:generator 1
187 (inst pextrq dst x 1)))
189 (define-vop (%make-simd-pack)
190 (:translate %make-simd-pack)
191 (:policy :fast-safe)
192 (:args (tag :scs (any-reg))
193 (lo :scs (unsigned-reg))
194 (hi :scs (unsigned-reg)))
195 (:arg-types tagged-num unsigned-num unsigned-num)
196 (:results (dst :scs (descriptor-reg) :from :load))
197 (:result-types t)
198 #+gs-seg (:temporary (:sc unsigned-reg :offset 15) thread-tn)
199 (:node-var node)
200 (:generator 13
201 (alloc-other simd-pack-widetag simd-pack-size dst node nil thread-tn)
202 ;; see +simd-pack-element-types+
203 (storew tag dst simd-pack-tag-slot other-pointer-lowtag)
204 (storew lo dst simd-pack-lo-value-slot other-pointer-lowtag)
205 (storew hi dst simd-pack-hi-value-slot other-pointer-lowtag)))
207 (define-vop (%make-simd-pack-ub64)
208 (:translate %make-simd-pack-ub64)
209 (:policy :fast-safe)
210 (:args (lo :scs (unsigned-reg))
211 (hi :scs (unsigned-reg)))
212 (:arg-types unsigned-num unsigned-num)
213 (:temporary (:sc int-sse-reg) tmp)
214 (:results (dst :scs (int-sse-reg)))
215 (:result-types simd-pack-ub64)
216 (:generator 5
217 (inst movq dst lo)
218 (inst movq tmp hi)
219 (inst punpcklqdq dst tmp)))
221 (defmacro simd-pack-dispatch (pack &body body)
222 (check-type pack symbol)
223 `(let ((,pack ,pack))
224 (etypecase ,pack
225 ,@(map 'list (lambda (eltype)
226 `((simd-pack ,eltype) ,@body))
227 +simd-pack-element-types+))))
229 #-sb-xc-host
230 (macrolet ((unpack-unsigned (pack bits)
231 `(simd-pack-dispatch ,pack
232 (let ((lo (%simd-pack-low ,pack))
233 (hi (%simd-pack-high ,pack)))
234 (values
235 ,@(loop for pos by bits below 64 collect
236 `(unpack-unsigned-1 ,bits ,pos lo))
237 ,@(loop for pos by bits below 64 collect
238 `(unpack-unsigned-1 ,bits ,pos hi))))))
239 (unpack-unsigned-1 (bits position ub64)
240 `(ldb (byte ,bits ,position) ,ub64)))
241 (declaim (inline %simd-pack-ub8s))
242 (defun %simd-pack-ub8s (pack)
243 (declare (type simd-pack pack))
244 (unpack-unsigned pack 8))
246 (declaim (inline %simd-pack-ub16s))
247 (defun %simd-pack-ub16s (pack)
248 (declare (type simd-pack pack))
249 (unpack-unsigned pack 16))
251 (declaim (inline %simd-pack-ub32s))
252 (defun %simd-pack-ub32s (pack)
253 (declare (type simd-pack pack))
254 (unpack-unsigned pack 32))
256 (declaim (inline %simd-pack-ub64s))
257 (defun %simd-pack-ub64s (pack)
258 (declare (type simd-pack pack))
259 (unpack-unsigned pack 64)))
261 #-sb-xc-host
262 (macrolet ((unpack-signed (pack bits)
263 `(simd-pack-dispatch ,pack
264 (let ((lo (%simd-pack-low ,pack))
265 (hi (%simd-pack-high ,pack)))
266 (values
267 ,@(loop for pos by bits below 64 collect
268 `(unpack-signed-1 ,bits ,pos lo))
269 ,@(loop for pos by bits below 64 collect
270 `(unpack-signed-1 ,bits ,pos hi))))))
271 (unpack-signed-1 (bits position ub64)
272 `(- (mod (+ (ldb (byte ,bits ,position) ,ub64)
273 ,(expt 2 (1- bits)))
274 ,(expt 2 bits))
275 ,(expt 2 (1- bits)))))
276 (declaim (inline %simd-pack-sb8s))
277 (defun %simd-pack-sb8s (pack)
278 (declare (type simd-pack pack))
279 (unpack-signed pack 8))
281 (declaim (inline %simd-pack-sb16s))
282 (defun %simd-pack-sb16s (pack)
283 (declare (type simd-pack pack))
284 (unpack-signed pack 16))
286 (declaim (inline %simd-pack-sb32s))
287 (defun %simd-pack-sb32s (pack)
288 (declare (type simd-pack pack))
289 (unpack-signed pack 32))
291 (declaim (inline %simd-pack-sb64s))
292 (defun %simd-pack-sb64s (pack)
293 (declare (type simd-pack pack))
294 (unpack-signed pack 64)))
296 #-sb-xc-host
297 (progn
298 (declaim (inline %make-simd-pack-ub32))
299 (defun %make-simd-pack-ub32 (w x y z)
300 (declare (type (unsigned-byte 32) w x y z))
301 (%make-simd-pack
302 #.(position '(unsigned-byte 32) +simd-pack-element-types+ :test #'equal)
303 (logior w (ash x 32))
304 (logior y (ash z 32)))))
306 (define-vop (%make-simd-pack-double)
307 (:translate %make-simd-pack-double)
308 (:policy :fast-safe)
309 (:args (lo :scs (double-reg) :target dst)
310 (hi :scs (double-reg) :target tmp))
311 (:arg-types double-float double-float)
312 (:temporary (:sc double-sse-reg :from (:argument 1)) tmp)
313 (:results (dst :scs (double-sse-reg) :from (:argument 0)))
314 (:result-types simd-pack-double)
315 (:generator 5
316 (move dst lo)
317 (move tmp hi)
318 (inst unpcklpd dst tmp)))
320 (define-vop (%make-simd-pack-single)
321 (:translate %make-simd-pack-single)
322 (:policy :fast-safe)
323 (:args (x :scs (single-reg) :target dst)
324 (y :scs (single-reg) :target tmp)
325 (z :scs (single-reg))
326 (w :scs (single-reg)))
327 (:arg-types single-float single-float single-float single-float)
328 (:temporary (:sc single-sse-reg :from (:argument 1)) tmp)
329 (:results (dst :scs (single-sse-reg) :from (:argument 0)))
330 (:result-types simd-pack-single)
331 (:generator 5
332 (move dst x)
333 (inst unpcklps dst z)
334 (move tmp y)
335 (inst unpcklps tmp w)
336 (inst unpcklps dst tmp)))
338 (defknown %simd-pack-single-item
339 (simd-pack (integer 0 3)) single-float (flushable))
341 (define-vop (%simd-pack-single-item)
342 (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg)
343 :target tmp))
344 (:translate %simd-pack-single-item)
345 (:arg-types simd-pack (:constant t))
346 (:info index)
347 (:results (dst :scs (single-reg)))
348 (:result-types single-float)
349 (:temporary (:sc single-sse-reg :from (:argument 0)) tmp)
350 (:policy :fast-safe)
351 (:generator 3
352 (cond ((and (zerop index)
353 (not (location= x dst)))
354 (inst xorps dst dst)
355 (inst movss dst x))
357 (move tmp x)
358 (when (plusp index)
359 (inst psrldq tmp (* 4 index)))
360 (inst xorps dst dst)
361 (inst movss dst tmp)))))
363 #-sb-xc-host
364 (progn
365 (declaim (inline %simd-pack-singles))
366 (defun %simd-pack-singles (pack)
367 (declare (type simd-pack pack))
368 (simd-pack-dispatch pack
369 (values (%simd-pack-single-item pack 0)
370 (%simd-pack-single-item pack 1)
371 (%simd-pack-single-item pack 2)
372 (%simd-pack-single-item pack 3)))))
374 (defknown %simd-pack-double-item
375 (simd-pack (integer 0 1)) double-float (flushable))
377 (define-vop (%simd-pack-double-item)
378 (:translate %simd-pack-double-item)
379 (:args (x :scs (int-sse-reg double-sse-reg single-sse-reg)
380 :target tmp))
381 (:info index)
382 (:arg-types simd-pack (:constant t))
383 (:results (dst :scs (double-reg)))
384 (:result-types double-float)
385 (:temporary (:sc double-sse-reg :from (:argument 0)) tmp)
386 (:policy :fast-safe)
387 (:generator 3
388 (cond ((and (zerop index)
389 (not (location= x dst)))
390 (inst xorpd dst dst)
391 (inst movsd dst x))
393 (move tmp x)
394 (when (plusp index)
395 (inst psrldq tmp (* 8 index)))
396 (inst xorpd dst dst)
397 (inst movsd dst tmp)))))
399 #-sb-xc-host
400 (progn
401 (declaim (inline %simd-pack-doubles))
402 (defun %simd-pack-doubles (pack)
403 (declare (type simd-pack pack))
404 (simd-pack-dispatch pack
405 (values (%simd-pack-double-item pack 0)
406 (%simd-pack-double-item pack 1)))))