2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / config / i386 / emmintrin.h
blob3618c2bfaaa5ded35f1e88810f8f3e4050ac4e55
1 /* Copyright (C) 2003 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING. If not, write to
17 the Free Software Foundation, 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 /* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
27 /* Implemented from the specification included in the Intel C++ Compiler
28 User Guide and Reference, version 8.0. */
30 #ifndef _EMMINTRIN_H_INCLUDED
31 #define _EMMINTRIN_H_INCLUDED
33 #ifdef __SSE2__
34 #include <xmmintrin.h>
36 /* SSE2 */
37 typedef int __v2df __attribute__ ((mode (V2DF)));
38 typedef int __v2di __attribute__ ((mode (V2DI)));
39 typedef int __v4si __attribute__ ((mode (V4SI)));
40 typedef int __v8hi __attribute__ ((mode (V8HI)));
41 typedef int __v16qi __attribute__ ((mode (V16QI)));
43 /* Create a selector for use with the SHUFPD instruction. */
44 #define _MM_SHUFFLE2(fp1,fp0) \
45 (((fp1) << 1) | (fp0))
47 #define __m128i __v2di
48 #define __m128d __v2df
50 /* Create a vector with element 0 as *P and the rest zero. */
51 static __inline __m128d
52 _mm_load_sd (double const *__P)
54 return (__m128d) __builtin_ia32_loadsd (__P);
57 /* Create a vector with all two elements equal to *P. */
58 static __inline __m128d
59 _mm_load1_pd (double const *__P)
61 __v2df __tmp = __builtin_ia32_loadsd (__P);
62 return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));
65 static __inline __m128d
66 _mm_load_pd1 (double const *__P)
68 return _mm_load1_pd (__P);
71 /* Load two DPFP values from P. The address must be 16-byte aligned. */
72 static __inline __m128d
73 _mm_load_pd (double const *__P)
75 return (__m128d) __builtin_ia32_loadapd (__P);
78 /* Load two DPFP values from P. The address need not be 16-byte aligned. */
79 static __inline __m128d
80 _mm_loadu_pd (double const *__P)
82 return (__m128d) __builtin_ia32_loadupd (__P);
85 /* Load two DPFP values in reverse order. The address must be aligned. */
86 static __inline __m128d
87 _mm_loadr_pd (double const *__P)
89 __v2df __tmp = __builtin_ia32_loadapd (__P);
90 return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
93 /* Create a vector with element 0 as F and the rest zero. */
94 static __inline __m128d
95 _mm_set_sd (double __F)
97 return (__m128d) __builtin_ia32_loadsd (&__F);
100 /* Create a vector with all two elements equal to F. */
101 static __inline __m128d
102 _mm_set1_pd (double __F)
104 __v2df __tmp = __builtin_ia32_loadsd (&__F);
105 return (__m128d) __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,0));
108 static __inline __m128d
109 _mm_set_pd1 (double __F)
111 return _mm_set1_pd (__F);
114 /* Create the vector [Z Y]. */
115 static __inline __m128d
116 _mm_set_pd (double __Z, double __Y)
118 union {
119 double __a[2];
120 __m128d __v;
121 } __u;
123 __u.__a[0] = __Y;
124 __u.__a[1] = __Z;
126 return __u.__v;
129 /* Create the vector [Y Z]. */
130 static __inline __m128d
131 _mm_setr_pd (double __Z, double __Y)
133 return _mm_set_pd (__Y, __Z);
136 /* Create a vector of zeros. */
137 static __inline __m128d
138 _mm_setzero_pd (void)
140 return (__m128d) __builtin_ia32_setzeropd ();
143 /* Stores the lower DPFP value. */
144 static __inline void
145 _mm_store_sd (double *__P, __m128d __A)
147 __builtin_ia32_storesd (__P, (__v2df)__A);
150 /* Store the lower DPFP value across two words. */
151 static __inline void
152 _mm_store1_pd (double *__P, __m128d __A)
154 __v2df __va = (__v2df)__A;
155 __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,0));
156 __builtin_ia32_storeapd (__P, __tmp);
159 static __inline void
160 _mm_store_pd1 (double *__P, __m128d __A)
162 _mm_store1_pd (__P, __A);
165 /* Store two DPFP values. The address must be 16-byte aligned. */
166 static __inline void
167 _mm_store_pd (double *__P, __m128d __A)
169 __builtin_ia32_storeapd (__P, (__v2df)__A);
172 /* Store two DPFP values. The address need not be 16-byte aligned. */
173 static __inline void
174 _mm_storeu_pd (double *__P, __m128d __A)
176 __builtin_ia32_storeupd (__P, (__v2df)__A);
179 /* Store two DPFP values in reverse order. The address must be aligned. */
180 static __inline void
181 _mm_storer_pd (double *__P, __m128d __A)
183 __v2df __va = (__v2df)__A;
184 __v2df __tmp = __builtin_ia32_shufpd (__va, __va, _MM_SHUFFLE2 (0,1));
185 __builtin_ia32_storeapd (__P, __tmp);
188 /* Sets the low DPFP value of A from the low value of B. */
189 static __inline __m128d
190 _mm_move_sd (__m128d __A, __m128d __B)
192 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
196 static __inline __m128d
197 _mm_add_pd (__m128d __A, __m128d __B)
199 return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
202 static __inline __m128d
203 _mm_add_sd (__m128d __A, __m128d __B)
205 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
208 static __inline __m128d
209 _mm_sub_pd (__m128d __A, __m128d __B)
211 return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
214 static __inline __m128d
215 _mm_sub_sd (__m128d __A, __m128d __B)
217 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
220 static __inline __m128d
221 _mm_mul_pd (__m128d __A, __m128d __B)
223 return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
226 static __inline __m128d
227 _mm_mul_sd (__m128d __A, __m128d __B)
229 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
232 static __inline __m128d
233 _mm_div_pd (__m128d __A, __m128d __B)
235 return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
238 static __inline __m128d
239 _mm_div_sd (__m128d __A, __m128d __B)
241 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
244 static __inline __m128d
245 _mm_sqrt_pd (__m128d __A)
247 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
250 /* Return pair {sqrt (A[0), B[1]}. */
251 static __inline __m128d
252 _mm_sqrt_sd (__m128d __A, __m128d __B)
254 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
255 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
258 static __inline __m128d
259 _mm_min_pd (__m128d __A, __m128d __B)
261 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
264 static __inline __m128d
265 _mm_min_sd (__m128d __A, __m128d __B)
267 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
270 static __inline __m128d
271 _mm_max_pd (__m128d __A, __m128d __B)
273 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
276 static __inline __m128d
277 _mm_max_sd (__m128d __A, __m128d __B)
279 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
282 static __inline __m128d
283 _mm_and_pd (__m128d __A, __m128d __B)
285 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
288 static __inline __m128d
289 _mm_andnot_pd (__m128d __A, __m128d __B)
291 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
294 static __inline __m128d
295 _mm_or_pd (__m128d __A, __m128d __B)
297 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
300 static __inline __m128d
301 _mm_xor_pd (__m128d __A, __m128d __B)
303 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
306 static __inline __m128d
307 _mm_cmpeq_pd (__m128d __A, __m128d __B)
309 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
312 static __inline __m128d
313 _mm_cmplt_pd (__m128d __A, __m128d __B)
315 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
318 static __inline __m128d
319 _mm_cmple_pd (__m128d __A, __m128d __B)
321 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
324 static __inline __m128d
325 _mm_cmpgt_pd (__m128d __A, __m128d __B)
327 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
330 static __inline __m128d
331 _mm_cmpge_pd (__m128d __A, __m128d __B)
333 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
336 static __inline __m128d
337 _mm_cmpneq_pd (__m128d __A, __m128d __B)
339 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
342 static __inline __m128d
343 _mm_cmpnlt_pd (__m128d __A, __m128d __B)
345 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
348 static __inline __m128d
349 _mm_cmpnle_pd (__m128d __A, __m128d __B)
351 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
354 static __inline __m128d
355 _mm_cmpngt_pd (__m128d __A, __m128d __B)
357 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
360 static __inline __m128d
361 _mm_cmpnge_pd (__m128d __A, __m128d __B)
363 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
366 static __inline __m128d
367 _mm_cmpord_pd (__m128d __A, __m128d __B)
369 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
372 static __inline __m128d
373 _mm_cmpunord_pd (__m128d __A, __m128d __B)
375 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
378 static __inline __m128d
379 _mm_cmpeq_sd (__m128d __A, __m128d __B)
381 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
384 static __inline __m128d
385 _mm_cmplt_sd (__m128d __A, __m128d __B)
387 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
390 static __inline __m128d
391 _mm_cmple_sd (__m128d __A, __m128d __B)
393 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
396 static __inline __m128d
397 _mm_cmpgt_sd (__m128d __A, __m128d __B)
399 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
400 (__v2df)
401 __builtin_ia32_cmpltsd ((__v2df) __B,
402 (__v2df)
403 __A));
406 static __inline __m128d
407 _mm_cmpge_sd (__m128d __A, __m128d __B)
409 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
410 (__v2df)
411 __builtin_ia32_cmplesd ((__v2df) __B,
412 (__v2df)
413 __A));
416 static __inline __m128d
417 _mm_cmpneq_sd (__m128d __A, __m128d __B)
419 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
422 static __inline __m128d
423 _mm_cmpnlt_sd (__m128d __A, __m128d __B)
425 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
428 static __inline __m128d
429 _mm_cmpnle_sd (__m128d __A, __m128d __B)
431 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
434 static __inline __m128d
435 _mm_cmpngt_sd (__m128d __A, __m128d __B)
437 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
438 (__v2df)
439 __builtin_ia32_cmpnltsd ((__v2df) __B,
440 (__v2df)
441 __A));
444 static __inline __m128d
445 _mm_cmpnge_sd (__m128d __A, __m128d __B)
447 return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
448 (__v2df)
449 __builtin_ia32_cmpnlesd ((__v2df) __B,
450 (__v2df)
451 __A));
454 static __inline __m128d
455 _mm_cmpord_sd (__m128d __A, __m128d __B)
457 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
460 static __inline __m128d
461 _mm_cmpunord_sd (__m128d __A, __m128d __B)
463 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
466 static __inline int
467 _mm_comieq_sd (__m128d __A, __m128d __B)
469 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
472 static __inline int
473 _mm_comilt_sd (__m128d __A, __m128d __B)
475 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
478 static __inline int
479 _mm_comile_sd (__m128d __A, __m128d __B)
481 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
484 static __inline int
485 _mm_comigt_sd (__m128d __A, __m128d __B)
487 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
490 static __inline int
491 _mm_comige_sd (__m128d __A, __m128d __B)
493 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
496 static __inline int
497 _mm_comineq_sd (__m128d __A, __m128d __B)
499 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
502 static __inline int
503 _mm_ucomieq_sd (__m128d __A, __m128d __B)
505 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
508 static __inline int
509 _mm_ucomilt_sd (__m128d __A, __m128d __B)
511 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
514 static __inline int
515 _mm_ucomile_sd (__m128d __A, __m128d __B)
517 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
520 static __inline int
521 _mm_ucomigt_sd (__m128d __A, __m128d __B)
523 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
526 static __inline int
527 _mm_ucomige_sd (__m128d __A, __m128d __B)
529 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
532 static __inline int
533 _mm_ucomineq_sd (__m128d __A, __m128d __B)
535 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
538 /* Create a vector with element 0 as *P and the rest zero. */
540 static __inline __m128i
541 _mm_load_si128 (__m128i const *__P)
543 return (__m128i) __builtin_ia32_loaddqa ((char const *)__P);
546 static __inline __m128i
547 _mm_loadu_si128 (__m128i const *__P)
549 return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
552 static __inline __m128i
553 _mm_loadl_epi64 (__m128i const *__P)
555 return (__m128i) __builtin_ia32_movq2dq (*(unsigned long long *)__P);
558 static __inline void
559 _mm_store_si128 (__m128i *__P, __m128i __B)
561 __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B);
564 static __inline void
565 _mm_storeu_si128 (__m128i *__P, __m128i __B)
567 __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
570 static __inline void
571 _mm_storel_epi64 (__m128i *__P, __m128i __B)
573 *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
576 static __inline __m64
577 _mm_movepi64_pi64 (__m128i __B)
579 return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
582 static __inline __m128i
583 _mm_move_epi64 (__m128i __A)
585 return (__m128i) __builtin_ia32_movq ((__v2di)__A);
588 /* Create a vector of zeros. */
589 static __inline __m128i
590 _mm_setzero_si128 (void)
592 return (__m128i) __builtin_ia32_setzero128 ();
595 static __inline __m128i
596 _mm_set_epi64 (__m64 __A, __m64 __B)
598 __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
599 __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
600 return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp2, __tmp);
603 /* Create the vector [Z Y X W]. */
604 static __inline __m128i
605 _mm_set_epi32 (int __Z, int __Y, int __X, int __W)
607 union {
608 int __a[4];
609 __m128i __v;
610 } __u;
612 __u.__a[0] = __W;
613 __u.__a[1] = __X;
614 __u.__a[2] = __Y;
615 __u.__a[3] = __Z;
617 return __u.__v;
620 #ifdef __x86_64__
621 /* Create the vector [Z Y]. */
622 static __inline __m128i
623 _mm_set_epi64x (long long __Z, long long __Y)
625 union {
626 long __a[2];
627 __m128i __v;
628 } __u;
630 __u.__a[0] = __Y;
631 __u.__a[1] = __Z;
633 return __u.__v;
635 #endif
637 /* Create the vector [S T U V Z Y X W]. */
638 static __inline __m128i
639 _mm_set_epi16 (short __Z, short __Y, short __X, short __W,
640 short __V, short __U, short __T, short __S)
642 union {
643 short __a[8];
644 __m128i __v;
645 } __u;
647 __u.__a[0] = __S;
648 __u.__a[1] = __T;
649 __u.__a[2] = __U;
650 __u.__a[3] = __V;
651 __u.__a[4] = __W;
652 __u.__a[5] = __X;
653 __u.__a[6] = __Y;
654 __u.__a[7] = __Z;
656 return __u.__v;
659 /* Create the vector [S T U V Z Y X W]. */
660 static __inline __m128i
661 _mm_set_epi8 (char __Z, char __Y, char __X, char __W,
662 char __V, char __U, char __T, char __S,
663 char __Z1, char __Y1, char __X1, char __W1,
664 char __V1, char __U1, char __T1, char __S1)
666 union {
667 char __a[16];
668 __m128i __v;
669 } __u;
671 __u.__a[0] = __S1;
672 __u.__a[1] = __T1;
673 __u.__a[2] = __U1;
674 __u.__a[3] = __V1;
675 __u.__a[4] = __W1;
676 __u.__a[5] = __X1;
677 __u.__a[6] = __Y1;
678 __u.__a[7] = __Z1;
679 __u.__a[8] = __S;
680 __u.__a[9] = __T;
681 __u.__a[10] = __U;
682 __u.__a[11] = __V;
683 __u.__a[12] = __W;
684 __u.__a[13] = __X;
685 __u.__a[14] = __Y;
686 __u.__a[15] = __Z;
688 return __u.__v;
691 static __inline __m128i
692 _mm_set1_epi64 (__m64 __A)
694 __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
695 return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp);
698 static __inline __m128i
699 _mm_set1_epi32 (int __A)
701 __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__A);
702 return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
705 #ifdef __x86_64__
706 static __inline __m128i
707 _mm_set1_epi64x (long long __A)
709 __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
710 return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
712 #endif
714 static __inline __m128i
715 _mm_set1_epi16 (short __A)
717 int __Acopy = (unsigned short)__A;
718 __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
719 __tmp = (__v4si)__builtin_ia32_punpcklwd128 ((__v8hi)__tmp, (__v8hi)__tmp);
720 return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
723 static __inline __m128i
724 _mm_set1_epi8 (char __A)
726 int __Acopy = (unsigned char)__A;
727 __v4si __tmp = (__v4si)__builtin_ia32_loadd (&__Acopy);
728 __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
729 __tmp = (__v4si)__builtin_ia32_punpcklbw128 ((__v16qi)__tmp, (__v16qi)__tmp);
730 return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
733 static __inline __m128i
734 _mm_setr_epi64 (__m64 __A, __m64 __B)
736 __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
737 __v2di __tmp2 = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__B);
738 return (__m128i)__builtin_ia32_punpcklqdq128 (__tmp, __tmp2);
741 /* Create the vector [Z Y X W]. */
742 static __inline __m128i
743 _mm_setr_epi32 (int __W, int __X, int __Y, int __Z)
745 union {
746 int __a[4];
747 __m128i __v;
748 } __u;
750 __u.__a[0] = __W;
751 __u.__a[1] = __X;
752 __u.__a[2] = __Y;
753 __u.__a[3] = __Z;
755 return __u.__v;
757 /* Create the vector [S T U V Z Y X W]. */
758 static __inline __m128i
759 _mm_setr_epi16 (short __S, short __T, short __U, short __V,
760 short __W, short __X, short __Y, short __Z)
762 union {
763 short __a[8];
764 __m128i __v;
765 } __u;
767 __u.__a[0] = __S;
768 __u.__a[1] = __T;
769 __u.__a[2] = __U;
770 __u.__a[3] = __V;
771 __u.__a[4] = __W;
772 __u.__a[5] = __X;
773 __u.__a[6] = __Y;
774 __u.__a[7] = __Z;
776 return __u.__v;
779 /* Create the vector [S T U V Z Y X W]. */
780 static __inline __m128i
781 _mm_setr_epi8 (char __S1, char __T1, char __U1, char __V1,
782 char __W1, char __X1, char __Y1, char __Z1,
783 char __S, char __T, char __U, char __V,
784 char __W, char __X, char __Y, char __Z)
786 union {
787 char __a[16];
788 __m128i __v;
789 } __u;
791 __u.__a[0] = __S1;
792 __u.__a[1] = __T1;
793 __u.__a[2] = __U1;
794 __u.__a[3] = __V1;
795 __u.__a[4] = __W1;
796 __u.__a[5] = __X1;
797 __u.__a[6] = __Y1;
798 __u.__a[7] = __Z1;
799 __u.__a[8] = __S;
800 __u.__a[9] = __T;
801 __u.__a[10] = __U;
802 __u.__a[11] = __V;
803 __u.__a[12] = __W;
804 __u.__a[13] = __X;
805 __u.__a[14] = __Y;
806 __u.__a[15] = __Z;
808 return __u.__v;
811 static __inline __m128d
812 _mm_cvtepi32_pd (__m128i __A)
814 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
817 static __inline __m128
818 _mm_cvtepi32_ps (__m128i __A)
820 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
823 static __inline __m128i
824 _mm_cvtpd_epi32 (__m128d __A)
826 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
829 static __inline __m64
830 _mm_cvtpd_pi32 (__m128d __A)
832 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
835 static __inline __m128
836 _mm_cvtpd_ps (__m128d __A)
838 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
841 static __inline __m128i
842 _mm_cvttpd_epi32 (__m128d __A)
844 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
847 static __inline __m64
848 _mm_cvttpd_pi32 (__m128d __A)
850 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
853 static __inline __m128d
854 _mm_cvtpi32_pd (__m64 __A)
856 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
859 static __inline __m128i
860 _mm_cvtps_epi32 (__m128 __A)
862 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
865 static __inline __m128i
866 _mm_cvttps_epi32 (__m128 __A)
868 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
871 static __inline __m128d
872 _mm_cvtps_pd (__m128 __A)
874 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
877 static __inline int
878 _mm_cvtsd_si32 (__m128d __A)
880 return __builtin_ia32_cvtsd2si ((__v2df) __A);
883 #ifdef __x86_64__
884 static __inline long long
885 _mm_cvtsd_si64x (__m128d __A)
887 return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
889 #endif
891 static __inline int
892 _mm_cvttsd_si32 (__m128d __A)
894 return __builtin_ia32_cvttsd2si ((__v2df) __A);
897 #ifdef __x86_64__
898 static __inline long long
899 _mm_cvttsd_si64x (__m128d __A)
901 return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
903 #endif
905 static __inline __m128
906 _mm_cvtsd_ss (__m128 __A, __m128d __B)
908 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
911 static __inline __m128d
912 _mm_cvtsi32_sd (__m128d __A, int __B)
914 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
917 #ifdef __x86_64__
918 static __inline __m128d
919 _mm_cvtsi64x_sd (__m128d __A, long long __B)
921 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
923 #endif
925 static __inline __m128d
926 _mm_cvtss_sd (__m128d __A, __m128 __B)
928 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
931 #define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
933 static __inline __m128d
934 _mm_unpackhi_pd (__m128d __A, __m128d __B)
936 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
939 static __inline __m128d
940 _mm_unpacklo_pd (__m128d __A, __m128d __B)
942 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
945 static __inline __m128d
946 _mm_loadh_pd (__m128d __A, double const *__B)
948 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
951 static __inline void
952 _mm_storeh_pd (double *__A, __m128d __B)
954 __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
957 static __inline __m128d
958 _mm_loadl_pd (__m128d __A, double const *__B)
960 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
963 static __inline void
964 _mm_storel_pd (double *__A, __m128d __B)
966 __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
969 static __inline int
970 _mm_movemask_pd (__m128d __A)
972 return __builtin_ia32_movmskpd ((__v2df)__A);
975 static __inline __m128i
976 _mm_packs_epi16 (__m128i __A, __m128i __B)
978 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
981 static __inline __m128i
982 _mm_packs_epi32 (__m128i __A, __m128i __B)
984 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
987 static __inline __m128i
988 _mm_packus_epi16 (__m128i __A, __m128i __B)
990 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
993 static __inline __m128i
994 _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
996 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
999 static __inline __m128i
1000 _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
1002 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
1005 static __inline __m128i
1006 _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
1008 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
1011 static __inline __m128i
1012 _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
1014 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
1017 static __inline __m128i
1018 _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
1020 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
1023 static __inline __m128i
1024 _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
1026 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
1029 static __inline __m128i
1030 _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
1032 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1035 static __inline __m128i
1036 _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1038 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1041 static __inline __m128i
1042 _mm_add_epi8 (__m128i __A, __m128i __B)
1044 return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
1047 static __inline __m128i
1048 _mm_add_epi16 (__m128i __A, __m128i __B)
1050 return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
1053 static __inline __m128i
1054 _mm_add_epi32 (__m128i __A, __m128i __B)
1056 return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
1059 static __inline __m128i
1060 _mm_add_epi64 (__m128i __A, __m128i __B)
1062 return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
1065 static __inline __m128i
1066 _mm_adds_epi8 (__m128i __A, __m128i __B)
1068 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1071 static __inline __m128i
1072 _mm_adds_epi16 (__m128i __A, __m128i __B)
1074 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1077 static __inline __m128i
1078 _mm_adds_epu8 (__m128i __A, __m128i __B)
1080 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1083 static __inline __m128i
1084 _mm_adds_epu16 (__m128i __A, __m128i __B)
1086 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1089 static __inline __m128i
1090 _mm_sub_epi8 (__m128i __A, __m128i __B)
1092 return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1095 static __inline __m128i
1096 _mm_sub_epi16 (__m128i __A, __m128i __B)
1098 return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1101 static __inline __m128i
1102 _mm_sub_epi32 (__m128i __A, __m128i __B)
1104 return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1107 static __inline __m128i
1108 _mm_sub_epi64 (__m128i __A, __m128i __B)
1110 return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1113 static __inline __m128i
1114 _mm_subs_epi8 (__m128i __A, __m128i __B)
1116 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1119 static __inline __m128i
1120 _mm_subs_epi16 (__m128i __A, __m128i __B)
1122 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1125 static __inline __m128i
1126 _mm_subs_epu8 (__m128i __A, __m128i __B)
1128 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1131 static __inline __m128i
1132 _mm_subs_epu16 (__m128i __A, __m128i __B)
1134 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1137 static __inline __m128i
1138 _mm_madd_epi16 (__m128i __A, __m128i __B)
1140 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1143 static __inline __m128i
1144 _mm_mulhi_epi16 (__m128i __A, __m128i __B)
1146 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1149 static __inline __m128i
1150 _mm_mullo_epi16 (__m128i __A, __m128i __B)
1152 return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1155 static __inline __m64
1156 _mm_mul_su32 (__m64 __A, __m64 __B)
1158 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1161 static __inline __m128i
1162 _mm_mul_epu32 (__m128i __A, __m128i __B)
1164 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1167 static __inline __m128i
1168 _mm_sll_epi16 (__m128i __A, __m128i __B)
1170 return (__m128i)__builtin_ia32_psllw128 ((__v8hi)__A, (__v2di)__B);
1173 static __inline __m128i
1174 _mm_sll_epi32 (__m128i __A, __m128i __B)
1176 return (__m128i)__builtin_ia32_pslld128 ((__v4si)__A, (__v2di)__B);
1179 static __inline __m128i
1180 _mm_sll_epi64 (__m128i __A, __m128i __B)
1182 return (__m128i)__builtin_ia32_psllq128 ((__v2di)__A, (__v2di)__B);
1185 static __inline __m128i
1186 _mm_sra_epi16 (__m128i __A, __m128i __B)
1188 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v2di)__B);
1191 static __inline __m128i
1192 _mm_sra_epi32 (__m128i __A, __m128i __B)
1194 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v2di)__B);
1197 static __inline __m128i
1198 _mm_srl_epi16 (__m128i __A, __m128i __B)
1200 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v2di)__B);
1203 static __inline __m128i
1204 _mm_srl_epi32 (__m128i __A, __m128i __B)
1206 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v2di)__B);
1209 static __inline __m128i
1210 _mm_srl_epi64 (__m128i __A, __m128i __B)
1212 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1215 static __inline __m128i
1216 _mm_slli_epi16 (__m128i __A, int __B)
1218 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1221 static __inline __m128i
1222 _mm_slli_epi32 (__m128i __A, int __B)
1224 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1227 static __inline __m128i
1228 _mm_slli_epi64 (__m128i __A, int __B)
1230 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1233 static __inline __m128i
1234 _mm_srai_epi16 (__m128i __A, int __B)
1236 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1239 static __inline __m128i
1240 _mm_srai_epi32 (__m128i __A, int __B)
1242 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1245 #if 0
1246 static __m128i __attribute__((__always_inline__))
1247 _mm_srli_si128 (__m128i __A, const int __B)
1249 return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
1252 static __m128i __attribute__((__always_inline__))
1253 _mm_srli_si128 (__m128i __A, const int __B)
1255 return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
1257 #endif
1258 #define _mm_srli_si128(__A, __B) ((__m128i)__builtin_ia32_psrldqi128 (__A, __B))
1259 #define _mm_slli_si128(__A, __B) ((__m128i)__builtin_ia32_pslldqi128 (__A, __B))
1261 static __inline __m128i
1262 _mm_srli_epi16 (__m128i __A, int __B)
1264 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1267 static __inline __m128i
1268 _mm_srli_epi32 (__m128i __A, int __B)
1270 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1273 static __inline __m128i
1274 _mm_srli_epi64 (__m128i __A, int __B)
1276 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1279 static __inline __m128i
1280 _mm_and_si128 (__m128i __A, __m128i __B)
1282 return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1285 static __inline __m128i
1286 _mm_andnot_si128 (__m128i __A, __m128i __B)
1288 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1291 static __inline __m128i
1292 _mm_or_si128 (__m128i __A, __m128i __B)
1294 return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1297 static __inline __m128i
1298 _mm_xor_si128 (__m128i __A, __m128i __B)
1300 return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1303 static __inline __m128i
1304 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1306 return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1309 static __inline __m128i
1310 _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1312 return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1315 static __inline __m128i
1316 _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1318 return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1321 static __inline __m128i
1322 _mm_cmplt_epi8 (__m128i __A, __m128i __B)
1324 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1327 static __inline __m128i
1328 _mm_cmplt_epi16 (__m128i __A, __m128i __B)
1330 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1333 static __inline __m128i
1334 _mm_cmplt_epi32 (__m128i __A, __m128i __B)
1336 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1339 static __inline __m128i
1340 _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1342 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1345 static __inline __m128i
1346 _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1348 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1351 static __inline __m128i
1352 _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1354 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1357 #define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B)
1359 #define _mm_insert_epi16(__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C))
1361 static __inline __m128i
1362 _mm_max_epi16 (__m128i __A, __m128i __B)
1364 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1367 static __inline __m128i
1368 _mm_max_epu8 (__m128i __A, __m128i __B)
1370 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1373 static __inline __m128i
1374 _mm_min_epi16 (__m128i __A, __m128i __B)
1376 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1379 static __inline __m128i
1380 _mm_min_epu8 (__m128i __A, __m128i __B)
1382 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1385 static __inline int
1386 _mm_movemask_epi8 (__m128i __A)
1388 return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1391 static __inline __m128i
1392 _mm_mulhi_epu16 (__m128i __A, __m128i __B)
1394 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1397 #define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
1398 #define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
1399 #define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
1401 static __inline void
1402 _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1404 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1407 static __inline __m128i
1408 _mm_avg_epu8 (__m128i __A, __m128i __B)
1410 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1413 static __inline __m128i
1414 _mm_avg_epu16 (__m128i __A, __m128i __B)
1416 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1419 static __inline __m128i
1420 _mm_sad_epu8 (__m128i __A, __m128i __B)
1422 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1425 static __inline void
1426 _mm_stream_si32 (int *__A, int __B)
1428 __builtin_ia32_movnti (__A, __B);
1431 static __inline void
1432 _mm_stream_si128 (__m128i *__A, __m128i __B)
1434 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1437 static __inline void
1438 _mm_stream_pd (double *__A, __m128d __B)
1440 __builtin_ia32_movntpd (__A, (__v2df)__B);
1443 static __inline __m128i
1444 _mm_movpi64_epi64 (__m64 __A)
1446 return (__m128i)__builtin_ia32_movq2dq ((unsigned long long)__A);
1449 static __inline void
1450 _mm_clflush (void const *__A)
1452 return __builtin_ia32_clflush (__A);
1455 static __inline void
1456 _mm_lfence (void)
1458 __builtin_ia32_lfence ();
1461 static __inline void
1462 _mm_mfence (void)
1464 __builtin_ia32_mfence ();
1467 static __inline __m128i
1468 _mm_cvtsi32_si128 (int __A)
1470 return (__m128i) __builtin_ia32_loadd (&__A);
1473 #ifdef __x86_64__
1474 static __inline __m128i
1475 _mm_cvtsi64x_si128 (long long __A)
1477 return (__m128i) __builtin_ia32_movq2dq (__A);
1479 #endif
1481 static __inline int
1482 _mm_cvtsi128_si32 (__m128i __A)
1484 int __tmp;
1485 __builtin_ia32_stored (&__tmp, (__v4si)__A);
1486 return __tmp;
1489 #ifdef __x86_64__
1490 static __inline long long
1491 _mm_cvtsi128_si64x (__m128i __A)
1493 return __builtin_ia32_movdq2q ((__v2di)__A);
1495 #endif
1497 #endif /* __SSE2__ */
1499 #endif /* _EMMINTRIN_H_INCLUDED */