* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost,
[official-gcc.git] / gcc / config / i386 / emmintrin.h
blob828f4a07a9bbc48c7d5a0a1806bb5de90d7473fe
1 /* Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 /* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
27 #ifndef _EMMINTRIN_H_INCLUDED
28 #define _EMMINTRIN_H_INCLUDED
30 /* We need definitions from the SSE header files*/
31 #include <xmmintrin.h>
33 #ifndef __SSE2__
34 #pragma GCC push_options
35 #pragma GCC target("sse2")
36 #define __DISABLE_SSE2__
37 #endif /* __SSE2__ */
39 /* SSE2 */
40 typedef double __v2df __attribute__ ((__vector_size__ (16)));
41 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
42 typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
43 typedef int __v4si __attribute__ ((__vector_size__ (16)));
44 typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
45 typedef short __v8hi __attribute__ ((__vector_size__ (16)));
46 typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
47 typedef char __v16qi __attribute__ ((__vector_size__ (16)));
48 typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
50 /* The Intel API is flexible enough that we must allow aliasing with other
51 vector types, and their scalar components. */
52 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
53 typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
55 /* Unaligned version of the same types. */
56 typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
57 typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
59 /* Create a selector for use with the SHUFPD instruction. */
60 #define _MM_SHUFFLE2(fp1,fp0) \
61 (((fp1) << 1) | (fp0))
63 /* Create a vector with element 0 as F and the rest zero. */
64 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
65 _mm_set_sd (double __F)
67 return __extension__ (__m128d){ __F, 0.0 };
70 /* Create a vector with both elements equal to F. */
71 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
72 _mm_set1_pd (double __F)
74 return __extension__ (__m128d){ __F, __F };
77 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
78 _mm_set_pd1 (double __F)
80 return _mm_set1_pd (__F);
83 /* Create a vector with the lower value X and upper value W. */
84 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
85 _mm_set_pd (double __W, double __X)
87 return __extension__ (__m128d){ __X, __W };
90 /* Create a vector with the lower value W and upper value X. */
91 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
92 _mm_setr_pd (double __W, double __X)
94 return __extension__ (__m128d){ __W, __X };
97 /* Create an undefined vector. */
98 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99 _mm_undefined_pd (void)
101 __m128d __Y = __Y;
102 return __Y;
105 /* Create a vector of zeros. */
106 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
107 _mm_setzero_pd (void)
109 return __extension__ (__m128d){ 0.0, 0.0 };
112 /* Sets the low DPFP value of A from the low value of B. */
113 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
114 _mm_move_sd (__m128d __A, __m128d __B)
116 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
119 /* Load two DPFP values from P. The address must be 16-byte aligned. */
120 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
121 _mm_load_pd (double const *__P)
123 return *(__m128d *)__P;
126 /* Load two DPFP values from P. The address need not be 16-byte aligned. */
127 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
128 _mm_loadu_pd (double const *__P)
130 return *(__m128d_u *)__P;
133 /* Create a vector with all two elements equal to *P. */
134 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_load1_pd (double const *__P)
137 return _mm_set1_pd (*__P);
140 /* Create a vector with element 0 as *P and the rest zero. */
141 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
142 _mm_load_sd (double const *__P)
144 return _mm_set_sd (*__P);
147 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
148 _mm_load_pd1 (double const *__P)
150 return _mm_load1_pd (__P);
153 /* Load two DPFP values in reverse order. The address must be aligned. */
154 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
155 _mm_loadr_pd (double const *__P)
157 __m128d __tmp = _mm_load_pd (__P);
158 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
161 /* Store two DPFP values. The address must be 16-byte aligned. */
162 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
163 _mm_store_pd (double *__P, __m128d __A)
165 *(__m128d *)__P = __A;
168 /* Store two DPFP values. The address need not be 16-byte aligned. */
169 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
170 _mm_storeu_pd (double *__P, __m128d __A)
172 *(__m128d_u *)__P = __A;
175 /* Stores the lower DPFP value. */
176 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177 _mm_store_sd (double *__P, __m128d __A)
179 *__P = ((__v2df)__A)[0];
182 extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
183 _mm_cvtsd_f64 (__m128d __A)
185 return ((__v2df)__A)[0];
188 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
189 _mm_storel_pd (double *__P, __m128d __A)
191 _mm_store_sd (__P, __A);
194 /* Stores the upper DPFP value. */
195 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
196 _mm_storeh_pd (double *__P, __m128d __A)
198 *__P = ((__v2df)__A)[1];
201 /* Store the lower DPFP value across two words.
202 The address must be 16-byte aligned. */
203 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
204 _mm_store1_pd (double *__P, __m128d __A)
206 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
209 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_store_pd1 (double *__P, __m128d __A)
212 _mm_store1_pd (__P, __A);
215 /* Store two DPFP values in reverse order. The address must be aligned. */
216 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217 _mm_storer_pd (double *__P, __m128d __A)
219 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
222 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
223 _mm_cvtsi128_si32 (__m128i __A)
225 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
228 #ifdef __x86_64__
229 /* Intel intrinsic. */
230 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231 _mm_cvtsi128_si64 (__m128i __A)
233 return ((__v2di)__A)[0];
236 /* Microsoft intrinsic. */
237 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
238 _mm_cvtsi128_si64x (__m128i __A)
240 return ((__v2di)__A)[0];
242 #endif
244 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245 _mm_add_pd (__m128d __A, __m128d __B)
247 return (__m128d) ((__v2df)__A + (__v2df)__B);
250 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
251 _mm_add_sd (__m128d __A, __m128d __B)
253 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
256 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
257 _mm_sub_pd (__m128d __A, __m128d __B)
259 return (__m128d) ((__v2df)__A - (__v2df)__B);
262 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
263 _mm_sub_sd (__m128d __A, __m128d __B)
265 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
268 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
269 _mm_mul_pd (__m128d __A, __m128d __B)
271 return (__m128d) ((__v2df)__A * (__v2df)__B);
274 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
275 _mm_mul_sd (__m128d __A, __m128d __B)
277 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
280 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
281 _mm_div_pd (__m128d __A, __m128d __B)
283 return (__m128d) ((__v2df)__A / (__v2df)__B);
286 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
287 _mm_div_sd (__m128d __A, __m128d __B)
289 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
292 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
293 _mm_sqrt_pd (__m128d __A)
295 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
298 /* Return pair {sqrt (B[0]), A[1]}. */
299 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_sqrt_sd (__m128d __A, __m128d __B)
302 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
303 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
306 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_min_pd (__m128d __A, __m128d __B)
309 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
312 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
313 _mm_min_sd (__m128d __A, __m128d __B)
315 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
318 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_max_pd (__m128d __A, __m128d __B)
321 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
324 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
325 _mm_max_sd (__m128d __A, __m128d __B)
327 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
330 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
331 _mm_and_pd (__m128d __A, __m128d __B)
333 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
336 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
337 _mm_andnot_pd (__m128d __A, __m128d __B)
339 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
342 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
343 _mm_or_pd (__m128d __A, __m128d __B)
345 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
348 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
349 _mm_xor_pd (__m128d __A, __m128d __B)
351 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
354 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
355 _mm_cmpeq_pd (__m128d __A, __m128d __B)
357 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
360 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
361 _mm_cmplt_pd (__m128d __A, __m128d __B)
363 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
366 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
367 _mm_cmple_pd (__m128d __A, __m128d __B)
369 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
372 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
373 _mm_cmpgt_pd (__m128d __A, __m128d __B)
375 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
378 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
379 _mm_cmpge_pd (__m128d __A, __m128d __B)
381 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
384 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
385 _mm_cmpneq_pd (__m128d __A, __m128d __B)
387 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
390 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
391 _mm_cmpnlt_pd (__m128d __A, __m128d __B)
393 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
396 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
397 _mm_cmpnle_pd (__m128d __A, __m128d __B)
399 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
402 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
403 _mm_cmpngt_pd (__m128d __A, __m128d __B)
405 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
408 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
409 _mm_cmpnge_pd (__m128d __A, __m128d __B)
411 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
414 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
415 _mm_cmpord_pd (__m128d __A, __m128d __B)
417 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
420 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
421 _mm_cmpunord_pd (__m128d __A, __m128d __B)
423 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
426 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
427 _mm_cmpeq_sd (__m128d __A, __m128d __B)
429 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
432 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
433 _mm_cmplt_sd (__m128d __A, __m128d __B)
435 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
438 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
439 _mm_cmple_sd (__m128d __A, __m128d __B)
441 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
444 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
445 _mm_cmpgt_sd (__m128d __A, __m128d __B)
447 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
448 (__v2df)
449 __builtin_ia32_cmpltsd ((__v2df) __B,
450 (__v2df)
451 __A));
454 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
455 _mm_cmpge_sd (__m128d __A, __m128d __B)
457 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
458 (__v2df)
459 __builtin_ia32_cmplesd ((__v2df) __B,
460 (__v2df)
461 __A));
464 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
465 _mm_cmpneq_sd (__m128d __A, __m128d __B)
467 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
470 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
471 _mm_cmpnlt_sd (__m128d __A, __m128d __B)
473 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
476 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477 _mm_cmpnle_sd (__m128d __A, __m128d __B)
479 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
482 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
483 _mm_cmpngt_sd (__m128d __A, __m128d __B)
485 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
486 (__v2df)
487 __builtin_ia32_cmpnltsd ((__v2df) __B,
488 (__v2df)
489 __A));
492 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
493 _mm_cmpnge_sd (__m128d __A, __m128d __B)
495 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
496 (__v2df)
497 __builtin_ia32_cmpnlesd ((__v2df) __B,
498 (__v2df)
499 __A));
502 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
503 _mm_cmpord_sd (__m128d __A, __m128d __B)
505 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
508 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
509 _mm_cmpunord_sd (__m128d __A, __m128d __B)
511 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
514 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
515 _mm_comieq_sd (__m128d __A, __m128d __B)
517 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
520 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
521 _mm_comilt_sd (__m128d __A, __m128d __B)
523 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
526 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
527 _mm_comile_sd (__m128d __A, __m128d __B)
529 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
532 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
533 _mm_comigt_sd (__m128d __A, __m128d __B)
535 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
538 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
539 _mm_comige_sd (__m128d __A, __m128d __B)
541 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
544 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
545 _mm_comineq_sd (__m128d __A, __m128d __B)
547 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
550 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
551 _mm_ucomieq_sd (__m128d __A, __m128d __B)
553 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
556 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
557 _mm_ucomilt_sd (__m128d __A, __m128d __B)
559 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
562 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
563 _mm_ucomile_sd (__m128d __A, __m128d __B)
565 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
568 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
569 _mm_ucomigt_sd (__m128d __A, __m128d __B)
571 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
574 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
575 _mm_ucomige_sd (__m128d __A, __m128d __B)
577 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
580 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
581 _mm_ucomineq_sd (__m128d __A, __m128d __B)
583 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
586 /* Create a vector of Qi, where i is the element number. */
588 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
589 _mm_set_epi64x (long long __q1, long long __q0)
591 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
594 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
595 _mm_set_epi64 (__m64 __q1, __m64 __q0)
597 return _mm_set_epi64x ((long long)__q1, (long long)__q0);
600 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
601 _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
603 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
606 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
607 _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
608 short __q3, short __q2, short __q1, short __q0)
610 return __extension__ (__m128i)(__v8hi){
611 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
614 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
615 _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
616 char __q11, char __q10, char __q09, char __q08,
617 char __q07, char __q06, char __q05, char __q04,
618 char __q03, char __q02, char __q01, char __q00)
620 return __extension__ (__m128i)(__v16qi){
621 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
622 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
626 /* Set all of the elements of the vector to A. */
628 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
629 _mm_set1_epi64x (long long __A)
631 return _mm_set_epi64x (__A, __A);
634 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
635 _mm_set1_epi64 (__m64 __A)
637 return _mm_set_epi64 (__A, __A);
640 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
641 _mm_set1_epi32 (int __A)
643 return _mm_set_epi32 (__A, __A, __A, __A);
646 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
647 _mm_set1_epi16 (short __A)
649 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
652 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
653 _mm_set1_epi8 (char __A)
655 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
656 __A, __A, __A, __A, __A, __A, __A, __A);
659 /* Create a vector of Qi, where i is the element number.
660 The parameter order is reversed from the _mm_set_epi* functions. */
662 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
663 _mm_setr_epi64 (__m64 __q0, __m64 __q1)
665 return _mm_set_epi64 (__q1, __q0);
668 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
669 _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
671 return _mm_set_epi32 (__q3, __q2, __q1, __q0);
674 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
675 _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
676 short __q4, short __q5, short __q6, short __q7)
678 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
681 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
682 _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
683 char __q04, char __q05, char __q06, char __q07,
684 char __q08, char __q09, char __q10, char __q11,
685 char __q12, char __q13, char __q14, char __q15)
687 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
688 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
691 /* Create a vector with element 0 as *P and the rest zero. */
693 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
694 _mm_load_si128 (__m128i const *__P)
696 return *__P;
699 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
700 _mm_loadu_si128 (__m128i_u const *__P)
702 return *__P;
705 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
706 _mm_loadl_epi64 (__m128i_u const *__P)
708 return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
711 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
712 _mm_store_si128 (__m128i *__P, __m128i __B)
714 *__P = __B;
717 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
718 _mm_storeu_si128 (__m128i_u *__P, __m128i __B)
720 *__P = __B;
723 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
724 _mm_storel_epi64 (__m128i_u *__P, __m128i __B)
726 *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
729 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
730 _mm_movepi64_pi64 (__m128i __B)
732 return (__m64) ((__v2di)__B)[0];
735 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
736 _mm_movpi64_epi64 (__m64 __A)
738 return _mm_set_epi64 ((__m64)0LL, __A);
741 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
742 _mm_move_epi64 (__m128i __A)
744 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
747 /* Create an undefined vector. */
748 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
749 _mm_undefined_si128 (void)
751 __m128i __Y = __Y;
752 return __Y;
755 /* Create a vector of zeros. */
756 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
757 _mm_setzero_si128 (void)
759 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
762 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
763 _mm_cvtepi32_pd (__m128i __A)
765 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
768 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
769 _mm_cvtepi32_ps (__m128i __A)
771 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
774 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
775 _mm_cvtpd_epi32 (__m128d __A)
777 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
780 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
781 _mm_cvtpd_pi32 (__m128d __A)
783 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
786 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
787 _mm_cvtpd_ps (__m128d __A)
789 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
792 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
793 _mm_cvttpd_epi32 (__m128d __A)
795 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
798 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
799 _mm_cvttpd_pi32 (__m128d __A)
801 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
804 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
805 _mm_cvtpi32_pd (__m64 __A)
807 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
810 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
811 _mm_cvtps_epi32 (__m128 __A)
813 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
816 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
817 _mm_cvttps_epi32 (__m128 __A)
819 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
822 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
823 _mm_cvtps_pd (__m128 __A)
825 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
828 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
829 _mm_cvtsd_si32 (__m128d __A)
831 return __builtin_ia32_cvtsd2si ((__v2df) __A);
834 #ifdef __x86_64__
835 /* Intel intrinsic. */
836 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
837 _mm_cvtsd_si64 (__m128d __A)
839 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
842 /* Microsoft intrinsic. */
843 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
844 _mm_cvtsd_si64x (__m128d __A)
846 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
848 #endif
850 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
851 _mm_cvttsd_si32 (__m128d __A)
853 return __builtin_ia32_cvttsd2si ((__v2df) __A);
856 #ifdef __x86_64__
857 /* Intel intrinsic. */
858 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
859 _mm_cvttsd_si64 (__m128d __A)
861 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
864 /* Microsoft intrinsic. */
865 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
866 _mm_cvttsd_si64x (__m128d __A)
868 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
870 #endif
872 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873 _mm_cvtsd_ss (__m128 __A, __m128d __B)
875 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
878 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879 _mm_cvtsi32_sd (__m128d __A, int __B)
881 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
884 #ifdef __x86_64__
885 /* Intel intrinsic. */
886 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
887 _mm_cvtsi64_sd (__m128d __A, long long __B)
889 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
892 /* Microsoft intrinsic. */
893 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
894 _mm_cvtsi64x_sd (__m128d __A, long long __B)
896 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
898 #endif
900 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
901 _mm_cvtss_sd (__m128d __A, __m128 __B)
903 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
906 #ifdef __OPTIMIZE__
907 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
908 _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
910 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
912 #else
913 #define _mm_shuffle_pd(A, B, N) \
914 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
915 (__v2df)(__m128d)(B), (int)(N)))
916 #endif
918 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
919 _mm_unpackhi_pd (__m128d __A, __m128d __B)
921 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
924 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
925 _mm_unpacklo_pd (__m128d __A, __m128d __B)
927 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
930 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
931 _mm_loadh_pd (__m128d __A, double const *__B)
933 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
936 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
937 _mm_loadl_pd (__m128d __A, double const *__B)
939 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
942 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
943 _mm_movemask_pd (__m128d __A)
945 return __builtin_ia32_movmskpd ((__v2df)__A);
948 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
949 _mm_packs_epi16 (__m128i __A, __m128i __B)
951 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
954 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
955 _mm_packs_epi32 (__m128i __A, __m128i __B)
957 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
960 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
961 _mm_packus_epi16 (__m128i __A, __m128i __B)
963 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
966 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
967 _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
969 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
972 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
973 _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
975 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
978 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
979 _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
981 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
984 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
985 _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
987 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
990 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
991 _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
993 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
996 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
997 _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
999 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
1002 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1003 _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
1005 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1008 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1009 _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1011 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1014 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1015 _mm_add_epi8 (__m128i __A, __m128i __B)
1017 return (__m128i) ((__v16qu)__A + (__v16qu)__B);
1020 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1021 _mm_add_epi16 (__m128i __A, __m128i __B)
1023 return (__m128i) ((__v8hu)__A + (__v8hu)__B);
1026 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1027 _mm_add_epi32 (__m128i __A, __m128i __B)
1029 return (__m128i) ((__v4su)__A + (__v4su)__B);
1032 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1033 _mm_add_epi64 (__m128i __A, __m128i __B)
1035 return (__m128i) ((__v2du)__A + (__v2du)__B);
1038 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1039 _mm_adds_epi8 (__m128i __A, __m128i __B)
1041 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1044 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1045 _mm_adds_epi16 (__m128i __A, __m128i __B)
1047 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1050 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1051 _mm_adds_epu8 (__m128i __A, __m128i __B)
1053 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1056 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1057 _mm_adds_epu16 (__m128i __A, __m128i __B)
1059 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1062 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm_sub_epi8 (__m128i __A, __m128i __B)
1065 return (__m128i) ((__v16qu)__A - (__v16qu)__B);
1068 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1069 _mm_sub_epi16 (__m128i __A, __m128i __B)
1071 return (__m128i) ((__v8hu)__A - (__v8hu)__B);
1074 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1075 _mm_sub_epi32 (__m128i __A, __m128i __B)
1077 return (__m128i) ((__v4su)__A - (__v4su)__B);
1080 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1081 _mm_sub_epi64 (__m128i __A, __m128i __B)
1083 return (__m128i) ((__v2du)__A - (__v2du)__B);
1086 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1087 _mm_subs_epi8 (__m128i __A, __m128i __B)
1089 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1092 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1093 _mm_subs_epi16 (__m128i __A, __m128i __B)
1095 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1098 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1099 _mm_subs_epu8 (__m128i __A, __m128i __B)
1101 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1104 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm_subs_epu16 (__m128i __A, __m128i __B)
1107 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1110 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1111 _mm_madd_epi16 (__m128i __A, __m128i __B)
1113 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1116 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1117 _mm_mulhi_epi16 (__m128i __A, __m128i __B)
1119 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1122 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1123 _mm_mullo_epi16 (__m128i __A, __m128i __B)
1125 return (__m128i) ((__v8hu)__A * (__v8hu)__B);
1128 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm_mul_su32 (__m64 __A, __m64 __B)
1131 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1134 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1135 _mm_mul_epu32 (__m128i __A, __m128i __B)
1137 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1140 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1141 _mm_slli_epi16 (__m128i __A, int __B)
1143 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1146 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1147 _mm_slli_epi32 (__m128i __A, int __B)
1149 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1152 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1153 _mm_slli_epi64 (__m128i __A, int __B)
1155 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1158 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1159 _mm_srai_epi16 (__m128i __A, int __B)
1161 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1164 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1165 _mm_srai_epi32 (__m128i __A, int __B)
1167 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1170 #ifdef __OPTIMIZE__
1171 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1172 _mm_bsrli_si128 (__m128i __A, const int __N)
1174 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1177 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1178 _mm_bslli_si128 (__m128i __A, const int __N)
1180 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1183 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm_srli_si128 (__m128i __A, const int __N)
1186 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1189 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1190 _mm_slli_si128 (__m128i __A, const int __N)
1192 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1194 #else
1195 #define _mm_bsrli_si128(A, N) \
1196 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1197 #define _mm_bslli_si128(A, N) \
1198 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1199 #define _mm_srli_si128(A, N) \
1200 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1201 #define _mm_slli_si128(A, N) \
1202 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1203 #endif
1205 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm_srli_epi16 (__m128i __A, int __B)
1208 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1211 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm_srli_epi32 (__m128i __A, int __B)
1214 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1217 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm_srli_epi64 (__m128i __A, int __B)
1220 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1223 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1224 _mm_sll_epi16 (__m128i __A, __m128i __B)
1226 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1229 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1230 _mm_sll_epi32 (__m128i __A, __m128i __B)
1232 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1235 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1236 _mm_sll_epi64 (__m128i __A, __m128i __B)
1238 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1241 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1242 _mm_sra_epi16 (__m128i __A, __m128i __B)
1244 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1247 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1248 _mm_sra_epi32 (__m128i __A, __m128i __B)
1250 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1253 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1254 _mm_srl_epi16 (__m128i __A, __m128i __B)
1256 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1259 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1260 _mm_srl_epi32 (__m128i __A, __m128i __B)
1262 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1265 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm_srl_epi64 (__m128i __A, __m128i __B)
1268 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1271 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1272 _mm_and_si128 (__m128i __A, __m128i __B)
1274 return (__m128i) ((__v2du)__A & (__v2du)__B);
1277 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm_andnot_si128 (__m128i __A, __m128i __B)
1280 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1283 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1284 _mm_or_si128 (__m128i __A, __m128i __B)
1286 return (__m128i) ((__v2du)__A | (__v2du)__B);
1289 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1290 _mm_xor_si128 (__m128i __A, __m128i __B)
1292 return (__m128i) ((__v2du)__A ^ (__v2du)__B);
1295 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1296 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1298 return (__m128i) ((__v16qi)__A == (__v16qi)__B);
1301 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1304 return (__m128i) ((__v8hi)__A == (__v8hi)__B);
1307 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1308 _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1310 return (__m128i) ((__v4si)__A == (__v4si)__B);
1313 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1314 _mm_cmplt_epi8 (__m128i __A, __m128i __B)
1316 return (__m128i) ((__v16qi)__A < (__v16qi)__B);
1319 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm_cmplt_epi16 (__m128i __A, __m128i __B)
1322 return (__m128i) ((__v8hi)__A < (__v8hi)__B);
1325 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm_cmplt_epi32 (__m128i __A, __m128i __B)
1328 return (__m128i) ((__v4si)__A < (__v4si)__B);
1331 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1332 _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1334 return (__m128i) ((__v16qi)__A > (__v16qi)__B);
1337 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1338 _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1340 return (__m128i) ((__v8hi)__A > (__v8hi)__B);
1343 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1344 _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1346 return (__m128i) ((__v4si)__A > (__v4si)__B);
1349 #ifdef __OPTIMIZE__
1350 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1351 _mm_extract_epi16 (__m128i const __A, int const __N)
1353 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1356 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1357 _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1359 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1361 #else
1362 #define _mm_extract_epi16(A, N) \
1363 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1364 #define _mm_insert_epi16(A, D, N) \
1365 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1366 (int)(D), (int)(N)))
1367 #endif
1369 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm_max_epi16 (__m128i __A, __m128i __B)
1372 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1375 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm_max_epu8 (__m128i __A, __m128i __B)
1378 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1381 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1382 _mm_min_epi16 (__m128i __A, __m128i __B)
1384 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1387 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm_min_epu8 (__m128i __A, __m128i __B)
1390 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1393 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1394 _mm_movemask_epi8 (__m128i __A)
1396 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1399 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1400 _mm_mulhi_epu16 (__m128i __A, __m128i __B)
1402 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1405 #ifdef __OPTIMIZE__
1406 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1407 _mm_shufflehi_epi16 (__m128i __A, const int __mask)
1409 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1412 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm_shufflelo_epi16 (__m128i __A, const int __mask)
1415 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1418 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1419 _mm_shuffle_epi32 (__m128i __A, const int __mask)
1421 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1423 #else
1424 #define _mm_shufflehi_epi16(A, N) \
1425 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1426 #define _mm_shufflelo_epi16(A, N) \
1427 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1428 #define _mm_shuffle_epi32(A, N) \
1429 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1430 #endif
1432 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1433 _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1435 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1438 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1439 _mm_avg_epu8 (__m128i __A, __m128i __B)
1441 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1444 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1445 _mm_avg_epu16 (__m128i __A, __m128i __B)
1447 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1450 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1451 _mm_sad_epu8 (__m128i __A, __m128i __B)
1453 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1456 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1457 _mm_stream_si32 (int *__A, int __B)
1459 __builtin_ia32_movnti (__A, __B);
1462 #ifdef __x86_64__
1463 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1464 _mm_stream_si64 (long long int *__A, long long int __B)
1466 __builtin_ia32_movnti64 (__A, __B);
1468 #endif
1470 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm_stream_si128 (__m128i *__A, __m128i __B)
1473 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1476 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1477 _mm_stream_pd (double *__A, __m128d __B)
1479 __builtin_ia32_movntpd (__A, (__v2df)__B);
1482 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1483 _mm_clflush (void const *__A)
1485 __builtin_ia32_clflush (__A);
1488 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1489 _mm_lfence (void)
1491 __builtin_ia32_lfence ();
1494 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm_mfence (void)
1497 __builtin_ia32_mfence ();
1500 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm_cvtsi32_si128 (int __A)
1503 return _mm_set_epi32 (0, 0, 0, __A);
1506 #ifdef __x86_64__
1507 /* Intel intrinsic. */
1508 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1509 _mm_cvtsi64_si128 (long long __A)
1511 return _mm_set_epi64x (0, __A);
1514 /* Microsoft intrinsic. */
1515 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm_cvtsi64x_si128 (long long __A)
1518 return _mm_set_epi64x (0, __A);
1520 #endif
1522 /* Casts between various SP, DP, INT vector types. Note that these do no
1523 conversion of values, they just change the type. */
1524 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1525 _mm_castpd_ps(__m128d __A)
1527 return (__m128) __A;
1530 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1531 _mm_castpd_si128(__m128d __A)
1533 return (__m128i) __A;
1536 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1537 _mm_castps_pd(__m128 __A)
1539 return (__m128d) __A;
1542 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1543 _mm_castps_si128(__m128 __A)
1545 return (__m128i) __A;
1548 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm_castsi128_ps(__m128i __A)
1551 return (__m128) __A;
1554 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm_castsi128_pd(__m128i __A)
1557 return (__m128d) __A;
1560 #ifdef __DISABLE_SSE2__
1561 #undef __DISABLE_SSE2__
1562 #pragma GCC pop_options
1563 #endif /* __DISABLE_SSE2__ */
1565 #endif /* _EMMINTRIN_H_INCLUDED */