Merged revisions 208012,208018-208019,208021,208023-208030,208033,208037,208040-20804...
[official-gcc.git] / main / gcc / config / i386 / emmintrin.h
bloba2bdf0edade1e3fe8bf852a8b1796ac73338cdcb
1 /* Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 /* Implemented from the specification included in the Intel C++ Compiler
25 User Guide and Reference, version 9.0. */
27 #ifndef _EMMINTRIN_H_INCLUDED
28 #define _EMMINTRIN_H_INCLUDED
30 /* We need definitions from the SSE header files*/
31 #include <xmmintrin.h>
33 #ifndef __SSE2__
34 #pragma GCC push_options
35 #pragma GCC target("sse2")
36 #define __DISABLE_SSE2__
37 #endif /* __SSE2__ */
39 /* SSE2 */
40 typedef double __v2df __attribute__ ((__vector_size__ (16)));
41 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
42 typedef int __v4si __attribute__ ((__vector_size__ (16)));
43 typedef short __v8hi __attribute__ ((__vector_size__ (16)));
44 typedef char __v16qi __attribute__ ((__vector_size__ (16)));
46 /* The Intel API is flexible enough that we must allow aliasing with other
47 vector types, and their scalar components. */
48 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
49 typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
51 /* Create a selector for use with the SHUFPD instruction. */
52 #define _MM_SHUFFLE2(fp1,fp0) \
53 (((fp1) << 1) | (fp0))
55 /* Create a vector with element 0 as F and the rest zero. */
56 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 _mm_set_sd (double __F)
59 return __extension__ (__m128d){ __F, 0.0 };
62 /* Create a vector with both elements equal to F. */
63 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 _mm_set1_pd (double __F)
66 return __extension__ (__m128d){ __F, __F };
69 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
70 _mm_set_pd1 (double __F)
72 return _mm_set1_pd (__F);
75 /* Create a vector with the lower value X and upper value W. */
76 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 _mm_set_pd (double __W, double __X)
79 return __extension__ (__m128d){ __X, __W };
82 /* Create a vector with the lower value W and upper value X. */
83 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 _mm_setr_pd (double __W, double __X)
86 return __extension__ (__m128d){ __W, __X };
89 /* Create an undefined vector. */
90 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
91 _mm_undefined_pd (void)
93 __m128d __Y = __Y;
94 return __Y;
97 /* Create a vector of zeros. */
98 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99 _mm_setzero_pd (void)
101 return __extension__ (__m128d){ 0.0, 0.0 };
104 /* Sets the low DPFP value of A from the low value of B. */
105 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
106 _mm_move_sd (__m128d __A, __m128d __B)
108 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
111 /* Load two DPFP values from P. The address must be 16-byte aligned. */
112 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
113 _mm_load_pd (double const *__P)
115 return *(__m128d *)__P;
118 /* Load two DPFP values from P. The address need not be 16-byte aligned. */
119 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120 _mm_loadu_pd (double const *__P)
122 return __builtin_ia32_loadupd (__P);
125 /* Create a vector with all two elements equal to *P. */
126 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
127 _mm_load1_pd (double const *__P)
129 return _mm_set1_pd (*__P);
132 /* Create a vector with element 0 as *P and the rest zero. */
133 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
134 _mm_load_sd (double const *__P)
136 return _mm_set_sd (*__P);
139 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
140 _mm_load_pd1 (double const *__P)
142 return _mm_load1_pd (__P);
145 /* Load two DPFP values in reverse order. The address must be aligned. */
146 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
147 _mm_loadr_pd (double const *__P)
149 __m128d __tmp = _mm_load_pd (__P);
150 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
153 /* Store two DPFP values. The address must be 16-byte aligned. */
154 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
155 _mm_store_pd (double *__P, __m128d __A)
157 *(__m128d *)__P = __A;
160 /* Store two DPFP values. The address need not be 16-byte aligned. */
161 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 _mm_storeu_pd (double *__P, __m128d __A)
164 __builtin_ia32_storeupd (__P, __A);
167 /* Stores the lower DPFP value. */
168 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
169 _mm_store_sd (double *__P, __m128d __A)
171 *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
174 extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
175 _mm_cvtsd_f64 (__m128d __A)
177 return __builtin_ia32_vec_ext_v2df (__A, 0);
180 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
181 _mm_storel_pd (double *__P, __m128d __A)
183 _mm_store_sd (__P, __A);
186 /* Stores the upper DPFP value. */
187 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
188 _mm_storeh_pd (double *__P, __m128d __A)
190 *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
193 /* Store the lower DPFP value across two words.
194 The address must be 16-byte aligned. */
195 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
196 _mm_store1_pd (double *__P, __m128d __A)
198 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
201 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 _mm_store_pd1 (double *__P, __m128d __A)
204 _mm_store1_pd (__P, __A);
207 /* Store two DPFP values in reverse order. The address must be aligned. */
208 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
209 _mm_storer_pd (double *__P, __m128d __A)
211 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
214 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
215 _mm_cvtsi128_si32 (__m128i __A)
217 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
220 #ifdef __x86_64__
221 /* Intel intrinsic. */
222 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
223 _mm_cvtsi128_si64 (__m128i __A)
225 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
228 /* Microsoft intrinsic. */
229 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
230 _mm_cvtsi128_si64x (__m128i __A)
232 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
234 #endif
236 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237 _mm_add_pd (__m128d __A, __m128d __B)
239 return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
242 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
243 _mm_add_sd (__m128d __A, __m128d __B)
245 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
248 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
249 _mm_sub_pd (__m128d __A, __m128d __B)
251 return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
254 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
255 _mm_sub_sd (__m128d __A, __m128d __B)
257 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
260 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
261 _mm_mul_pd (__m128d __A, __m128d __B)
263 return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
266 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
267 _mm_mul_sd (__m128d __A, __m128d __B)
269 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
272 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
273 _mm_div_pd (__m128d __A, __m128d __B)
275 return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
278 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
279 _mm_div_sd (__m128d __A, __m128d __B)
281 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
284 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
285 _mm_sqrt_pd (__m128d __A)
287 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
290 /* Return pair {sqrt (A[0), B[1]}. */
291 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
292 _mm_sqrt_sd (__m128d __A, __m128d __B)
294 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
295 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
298 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
299 _mm_min_pd (__m128d __A, __m128d __B)
301 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
304 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
305 _mm_min_sd (__m128d __A, __m128d __B)
307 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
310 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
311 _mm_max_pd (__m128d __A, __m128d __B)
313 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
316 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
317 _mm_max_sd (__m128d __A, __m128d __B)
319 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
322 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
323 _mm_and_pd (__m128d __A, __m128d __B)
325 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
328 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
329 _mm_andnot_pd (__m128d __A, __m128d __B)
331 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
334 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
335 _mm_or_pd (__m128d __A, __m128d __B)
337 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
340 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
341 _mm_xor_pd (__m128d __A, __m128d __B)
343 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
346 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
347 _mm_cmpeq_pd (__m128d __A, __m128d __B)
349 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
352 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
353 _mm_cmplt_pd (__m128d __A, __m128d __B)
355 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
358 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
359 _mm_cmple_pd (__m128d __A, __m128d __B)
361 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
364 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
365 _mm_cmpgt_pd (__m128d __A, __m128d __B)
367 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
370 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
371 _mm_cmpge_pd (__m128d __A, __m128d __B)
373 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
376 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
377 _mm_cmpneq_pd (__m128d __A, __m128d __B)
379 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
382 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
383 _mm_cmpnlt_pd (__m128d __A, __m128d __B)
385 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
388 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
389 _mm_cmpnle_pd (__m128d __A, __m128d __B)
391 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
394 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
395 _mm_cmpngt_pd (__m128d __A, __m128d __B)
397 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
400 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
401 _mm_cmpnge_pd (__m128d __A, __m128d __B)
403 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
406 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
407 _mm_cmpord_pd (__m128d __A, __m128d __B)
409 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
412 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
413 _mm_cmpunord_pd (__m128d __A, __m128d __B)
415 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
418 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
419 _mm_cmpeq_sd (__m128d __A, __m128d __B)
421 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
424 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
425 _mm_cmplt_sd (__m128d __A, __m128d __B)
427 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
430 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
431 _mm_cmple_sd (__m128d __A, __m128d __B)
433 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
436 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
437 _mm_cmpgt_sd (__m128d __A, __m128d __B)
439 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
440 (__v2df)
441 __builtin_ia32_cmpltsd ((__v2df) __B,
442 (__v2df)
443 __A));
446 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
447 _mm_cmpge_sd (__m128d __A, __m128d __B)
449 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
450 (__v2df)
451 __builtin_ia32_cmplesd ((__v2df) __B,
452 (__v2df)
453 __A));
456 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
457 _mm_cmpneq_sd (__m128d __A, __m128d __B)
459 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
462 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
463 _mm_cmpnlt_sd (__m128d __A, __m128d __B)
465 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
468 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
469 _mm_cmpnle_sd (__m128d __A, __m128d __B)
471 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
474 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
475 _mm_cmpngt_sd (__m128d __A, __m128d __B)
477 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
478 (__v2df)
479 __builtin_ia32_cmpnltsd ((__v2df) __B,
480 (__v2df)
481 __A));
484 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
485 _mm_cmpnge_sd (__m128d __A, __m128d __B)
487 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
488 (__v2df)
489 __builtin_ia32_cmpnlesd ((__v2df) __B,
490 (__v2df)
491 __A));
494 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
495 _mm_cmpord_sd (__m128d __A, __m128d __B)
497 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
500 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
501 _mm_cmpunord_sd (__m128d __A, __m128d __B)
503 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
506 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
507 _mm_comieq_sd (__m128d __A, __m128d __B)
509 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
512 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
513 _mm_comilt_sd (__m128d __A, __m128d __B)
515 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
518 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
519 _mm_comile_sd (__m128d __A, __m128d __B)
521 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
524 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
525 _mm_comigt_sd (__m128d __A, __m128d __B)
527 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
530 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
531 _mm_comige_sd (__m128d __A, __m128d __B)
533 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
536 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
537 _mm_comineq_sd (__m128d __A, __m128d __B)
539 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
542 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
543 _mm_ucomieq_sd (__m128d __A, __m128d __B)
545 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
548 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
549 _mm_ucomilt_sd (__m128d __A, __m128d __B)
551 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
554 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
555 _mm_ucomile_sd (__m128d __A, __m128d __B)
557 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
560 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
561 _mm_ucomigt_sd (__m128d __A, __m128d __B)
563 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
566 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
567 _mm_ucomige_sd (__m128d __A, __m128d __B)
569 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
572 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
573 _mm_ucomineq_sd (__m128d __A, __m128d __B)
575 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
578 /* Create a vector of Qi, where i is the element number. */
580 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
581 _mm_set_epi64x (long long __q1, long long __q0)
583 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
586 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
587 _mm_set_epi64 (__m64 __q1, __m64 __q0)
589 return _mm_set_epi64x ((long long)__q1, (long long)__q0);
592 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
593 _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
595 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
598 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
599 _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
600 short __q3, short __q2, short __q1, short __q0)
602 return __extension__ (__m128i)(__v8hi){
603 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
606 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
607 _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
608 char __q11, char __q10, char __q09, char __q08,
609 char __q07, char __q06, char __q05, char __q04,
610 char __q03, char __q02, char __q01, char __q00)
612 return __extension__ (__m128i)(__v16qi){
613 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
614 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
618 /* Set all of the elements of the vector to A. */
620 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
621 _mm_set1_epi64x (long long __A)
623 return _mm_set_epi64x (__A, __A);
626 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
627 _mm_set1_epi64 (__m64 __A)
629 return _mm_set_epi64 (__A, __A);
632 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
633 _mm_set1_epi32 (int __A)
635 return _mm_set_epi32 (__A, __A, __A, __A);
638 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
639 _mm_set1_epi16 (short __A)
641 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
644 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
645 _mm_set1_epi8 (char __A)
647 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
648 __A, __A, __A, __A, __A, __A, __A, __A);
651 /* Create a vector of Qi, where i is the element number.
652 The parameter order is reversed from the _mm_set_epi* functions. */
654 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
655 _mm_setr_epi64 (__m64 __q0, __m64 __q1)
657 return _mm_set_epi64 (__q1, __q0);
660 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
661 _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
663 return _mm_set_epi32 (__q3, __q2, __q1, __q0);
666 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
667 _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
668 short __q4, short __q5, short __q6, short __q7)
670 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
673 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
674 _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
675 char __q04, char __q05, char __q06, char __q07,
676 char __q08, char __q09, char __q10, char __q11,
677 char __q12, char __q13, char __q14, char __q15)
679 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
680 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
683 /* Create a vector with element 0 as *P and the rest zero. */
685 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
686 _mm_load_si128 (__m128i const *__P)
688 return *__P;
691 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692 _mm_loadu_si128 (__m128i const *__P)
694 return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
697 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698 _mm_loadl_epi64 (__m128i const *__P)
700 return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
703 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704 _mm_store_si128 (__m128i *__P, __m128i __B)
706 *__P = __B;
709 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
710 _mm_storeu_si128 (__m128i *__P, __m128i __B)
712 __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
715 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
716 _mm_storel_epi64 (__m128i *__P, __m128i __B)
718 *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
721 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
722 _mm_movepi64_pi64 (__m128i __B)
724 return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
727 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
728 _mm_movpi64_epi64 (__m64 __A)
730 return _mm_set_epi64 ((__m64)0LL, __A);
733 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
734 _mm_move_epi64 (__m128i __A)
736 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
739 /* Create an undefined vector. */
740 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
741 _mm_undefined_si128 (void)
743 __m128i __Y = __Y;
744 return __Y;
747 /* Create a vector of zeros. */
748 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
749 _mm_setzero_si128 (void)
751 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
754 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
755 _mm_cvtepi32_pd (__m128i __A)
757 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
760 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
761 _mm_cvtepi32_ps (__m128i __A)
763 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
766 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
767 _mm_cvtpd_epi32 (__m128d __A)
769 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
772 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
773 _mm_cvtpd_pi32 (__m128d __A)
775 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
778 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
779 _mm_cvtpd_ps (__m128d __A)
781 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
784 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
785 _mm_cvttpd_epi32 (__m128d __A)
787 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
790 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
791 _mm_cvttpd_pi32 (__m128d __A)
793 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
796 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
797 _mm_cvtpi32_pd (__m64 __A)
799 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
802 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
803 _mm_cvtps_epi32 (__m128 __A)
805 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
808 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
809 _mm_cvttps_epi32 (__m128 __A)
811 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
814 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
815 _mm_cvtps_pd (__m128 __A)
817 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
820 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
821 _mm_cvtsd_si32 (__m128d __A)
823 return __builtin_ia32_cvtsd2si ((__v2df) __A);
826 #ifdef __x86_64__
827 /* Intel intrinsic. */
828 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
829 _mm_cvtsd_si64 (__m128d __A)
831 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
834 /* Microsoft intrinsic. */
835 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
836 _mm_cvtsd_si64x (__m128d __A)
838 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
840 #endif
842 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
843 _mm_cvttsd_si32 (__m128d __A)
845 return __builtin_ia32_cvttsd2si ((__v2df) __A);
848 #ifdef __x86_64__
849 /* Intel intrinsic. */
850 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
851 _mm_cvttsd_si64 (__m128d __A)
853 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
856 /* Microsoft intrinsic. */
857 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
858 _mm_cvttsd_si64x (__m128d __A)
860 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
862 #endif
864 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
865 _mm_cvtsd_ss (__m128 __A, __m128d __B)
867 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
870 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
871 _mm_cvtsi32_sd (__m128d __A, int __B)
873 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
876 #ifdef __x86_64__
877 /* Intel intrinsic. */
878 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879 _mm_cvtsi64_sd (__m128d __A, long long __B)
881 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
884 /* Microsoft intrinsic. */
885 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
886 _mm_cvtsi64x_sd (__m128d __A, long long __B)
888 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
890 #endif
892 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
893 _mm_cvtss_sd (__m128d __A, __m128 __B)
895 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
898 #ifdef __OPTIMIZE__
899 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
900 _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
902 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
904 #else
905 #define _mm_shuffle_pd(A, B, N) \
906 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
907 (__v2df)(__m128d)(B), (int)(N)))
908 #endif
910 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
911 _mm_unpackhi_pd (__m128d __A, __m128d __B)
913 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
916 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
917 _mm_unpacklo_pd (__m128d __A, __m128d __B)
919 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
922 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
923 _mm_loadh_pd (__m128d __A, double const *__B)
925 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
928 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
929 _mm_loadl_pd (__m128d __A, double const *__B)
931 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
934 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
935 _mm_movemask_pd (__m128d __A)
937 return __builtin_ia32_movmskpd ((__v2df)__A);
940 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
941 _mm_packs_epi16 (__m128i __A, __m128i __B)
943 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
946 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
947 _mm_packs_epi32 (__m128i __A, __m128i __B)
949 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
952 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
953 _mm_packus_epi16 (__m128i __A, __m128i __B)
955 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
958 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
959 _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
961 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
964 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
965 _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
967 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
970 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
971 _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
973 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
976 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
977 _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
979 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
982 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
983 _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
985 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
988 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
989 _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
991 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
994 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
995 _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
997 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1000 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1001 _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1003 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1006 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1007 _mm_add_epi8 (__m128i __A, __m128i __B)
1009 return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
1012 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm_add_epi16 (__m128i __A, __m128i __B)
1015 return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
1018 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1019 _mm_add_epi32 (__m128i __A, __m128i __B)
1021 return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
1024 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1025 _mm_add_epi64 (__m128i __A, __m128i __B)
1027 return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
1030 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm_adds_epi8 (__m128i __A, __m128i __B)
1033 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1036 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1037 _mm_adds_epi16 (__m128i __A, __m128i __B)
1039 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1042 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1043 _mm_adds_epu8 (__m128i __A, __m128i __B)
1045 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1048 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1049 _mm_adds_epu16 (__m128i __A, __m128i __B)
1051 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1054 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1055 _mm_sub_epi8 (__m128i __A, __m128i __B)
1057 return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1060 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1061 _mm_sub_epi16 (__m128i __A, __m128i __B)
1063 return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1066 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1067 _mm_sub_epi32 (__m128i __A, __m128i __B)
1069 return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1072 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm_sub_epi64 (__m128i __A, __m128i __B)
1075 return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1078 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1079 _mm_subs_epi8 (__m128i __A, __m128i __B)
1081 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1084 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1085 _mm_subs_epi16 (__m128i __A, __m128i __B)
1087 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1090 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1091 _mm_subs_epu8 (__m128i __A, __m128i __B)
1093 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1096 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1097 _mm_subs_epu16 (__m128i __A, __m128i __B)
1099 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1102 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1103 _mm_madd_epi16 (__m128i __A, __m128i __B)
1105 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1108 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1109 _mm_mulhi_epi16 (__m128i __A, __m128i __B)
1111 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1114 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1115 _mm_mullo_epi16 (__m128i __A, __m128i __B)
1117 return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1120 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1121 _mm_mul_su32 (__m64 __A, __m64 __B)
1123 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1126 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1127 _mm_mul_epu32 (__m128i __A, __m128i __B)
1129 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1132 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm_slli_epi16 (__m128i __A, int __B)
1135 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1138 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm_slli_epi32 (__m128i __A, int __B)
1141 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1144 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1145 _mm_slli_epi64 (__m128i __A, int __B)
1147 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1150 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm_srai_epi16 (__m128i __A, int __B)
1153 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1156 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1157 _mm_srai_epi32 (__m128i __A, int __B)
1159 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1162 #ifdef __OPTIMIZE__
1163 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1164 _mm_srli_si128 (__m128i __A, const int __N)
1166 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1169 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1170 _mm_slli_si128 (__m128i __A, const int __N)
1172 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1174 #else
1175 #define _mm_srli_si128(A, N) \
1176 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1177 #define _mm_slli_si128(A, N) \
1178 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1179 #endif
1181 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1182 _mm_srli_epi16 (__m128i __A, int __B)
1184 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1187 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1188 _mm_srli_epi32 (__m128i __A, int __B)
1190 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1193 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1194 _mm_srli_epi64 (__m128i __A, int __B)
1196 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1199 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1200 _mm_sll_epi16 (__m128i __A, __m128i __B)
1202 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1205 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm_sll_epi32 (__m128i __A, __m128i __B)
1208 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1211 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm_sll_epi64 (__m128i __A, __m128i __B)
1214 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1217 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm_sra_epi16 (__m128i __A, __m128i __B)
1220 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1223 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1224 _mm_sra_epi32 (__m128i __A, __m128i __B)
1226 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1229 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1230 _mm_srl_epi16 (__m128i __A, __m128i __B)
1232 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1235 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1236 _mm_srl_epi32 (__m128i __A, __m128i __B)
1238 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1241 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1242 _mm_srl_epi64 (__m128i __A, __m128i __B)
1244 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1247 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1248 _mm_and_si128 (__m128i __A, __m128i __B)
1250 return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1253 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1254 _mm_andnot_si128 (__m128i __A, __m128i __B)
1256 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1259 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1260 _mm_or_si128 (__m128i __A, __m128i __B)
1262 return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1265 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm_xor_si128 (__m128i __A, __m128i __B)
1268 return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1271 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1272 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1274 return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1277 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1280 return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1283 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1284 _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1286 return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1289 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1290 _mm_cmplt_epi8 (__m128i __A, __m128i __B)
1292 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1295 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1296 _mm_cmplt_epi16 (__m128i __A, __m128i __B)
1298 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1301 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm_cmplt_epi32 (__m128i __A, __m128i __B)
1304 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1307 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1308 _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1310 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1313 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1314 _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1316 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1319 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1320 _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1322 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1325 #ifdef __OPTIMIZE__
1326 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1327 _mm_extract_epi16 (__m128i const __A, int const __N)
1329 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1332 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1333 _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1335 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1337 #else
1338 #define _mm_extract_epi16(A, N) \
1339 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1340 #define _mm_insert_epi16(A, D, N) \
1341 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1342 (int)(D), (int)(N)))
1343 #endif
1345 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1346 _mm_max_epi16 (__m128i __A, __m128i __B)
1348 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1351 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm_max_epu8 (__m128i __A, __m128i __B)
1354 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1357 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1358 _mm_min_epi16 (__m128i __A, __m128i __B)
1360 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1363 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1364 _mm_min_epu8 (__m128i __A, __m128i __B)
1366 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1369 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1370 _mm_movemask_epi8 (__m128i __A)
1372 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1375 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm_mulhi_epu16 (__m128i __A, __m128i __B)
1378 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1381 #ifdef __OPTIMIZE__
1382 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1383 _mm_shufflehi_epi16 (__m128i __A, const int __mask)
1385 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1388 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1389 _mm_shufflelo_epi16 (__m128i __A, const int __mask)
1391 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1394 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1395 _mm_shuffle_epi32 (__m128i __A, const int __mask)
1397 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1399 #else
1400 #define _mm_shufflehi_epi16(A, N) \
1401 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1402 #define _mm_shufflelo_epi16(A, N) \
1403 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1404 #define _mm_shuffle_epi32(A, N) \
1405 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1406 #endif
1408 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1409 _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1411 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1414 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm_avg_epu8 (__m128i __A, __m128i __B)
1417 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1420 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1421 _mm_avg_epu16 (__m128i __A, __m128i __B)
1423 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1426 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1427 _mm_sad_epu8 (__m128i __A, __m128i __B)
1429 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1432 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1433 _mm_stream_si32 (int *__A, int __B)
1435 __builtin_ia32_movnti (__A, __B);
1438 #ifdef __x86_64__
1439 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1440 _mm_stream_si64 (long long int *__A, long long int __B)
1442 __builtin_ia32_movnti64 (__A, __B);
1444 #endif
1446 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1447 _mm_stream_si128 (__m128i *__A, __m128i __B)
1449 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1452 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1453 _mm_stream_pd (double *__A, __m128d __B)
1455 __builtin_ia32_movntpd (__A, (__v2df)__B);
1458 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1459 _mm_clflush (void const *__A)
1461 __builtin_ia32_clflush (__A);
1464 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1465 _mm_lfence (void)
1467 __builtin_ia32_lfence ();
1470 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm_mfence (void)
1473 __builtin_ia32_mfence ();
1476 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1477 _mm_cvtsi32_si128 (int __A)
1479 return _mm_set_epi32 (0, 0, 0, __A);
1482 #ifdef __x86_64__
1483 /* Intel intrinsic. */
1484 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1485 _mm_cvtsi64_si128 (long long __A)
1487 return _mm_set_epi64x (0, __A);
1490 /* Microsoft intrinsic. */
1491 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm_cvtsi64x_si128 (long long __A)
1494 return _mm_set_epi64x (0, __A);
1496 #endif
1498 /* Casts between various SP, DP, INT vector types. Note that these do no
1499 conversion of values, they just change the type. */
1500 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1501 _mm_castpd_ps(__m128d __A)
1503 return (__m128) __A;
1506 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1507 _mm_castpd_si128(__m128d __A)
1509 return (__m128i) __A;
1512 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1513 _mm_castps_pd(__m128 __A)
1515 return (__m128d) __A;
1518 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1519 _mm_castps_si128(__m128 __A)
1521 return (__m128i) __A;
1524 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1525 _mm_castsi128_ps(__m128i __A)
1527 return (__m128) __A;
1530 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1531 _mm_castsi128_pd(__m128i __A)
1533 return (__m128d) __A;
1536 #ifdef __DISABLE_SSE2__
1537 #undef __DISABLE_SSE2__
1538 #pragma GCC pop_options
1539 #endif /* __DISABLE_SSE2__ */
1541 #endif /* _EMMINTRIN_H_INCLUDED */