Merge from mainline (gomp-merge-2005-02-26).
[official-gcc.git] / gcc / config / i386 / mmintrin.h
blob68c8313f9aec3b0c06a1c74601add275337cc5b8
1 /* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING. If not, write to
17 the Free Software Foundation, 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 /* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
27 /* Implemented from the specification included in the Intel C++ Compiler
28 User Guide and Reference, version 8.0. */
30 #ifndef _MMINTRIN_H_INCLUDED
31 #define _MMINTRIN_H_INCLUDED
33 #ifndef __MMX__
34 # error "MMX instruction set not enabled"
35 #else
36 /* The data type intended for user use. */
37 typedef int __m64 __attribute__ ((__vector_size__ (8)));
39 /* Internal data types for implementing the intrinsics. */
40 typedef int __v2si __attribute__ ((__vector_size__ (8)));
41 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
42 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
44 /* Empty the multimedia state. */
45 static __inline void
46 _mm_empty (void)
48 __builtin_ia32_emms ();
51 static __inline void
52 _m_empty (void)
54 _mm_empty ();
57 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
58 static __inline __m64
59 _mm_cvtsi32_si64 (int __i)
61 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
64 static __inline __m64
65 _m_from_int (int __i)
67 return _mm_cvtsi32_si64 (__i);
70 #ifdef __x86_64__
71 /* Convert I to a __m64 object. */
72 static __inline __m64
73 _mm_cvtsi64x_si64 (long long __i)
75 return (__m64) __i;
78 /* Convert I to a __m64 object. */
79 static __inline __m64
80 _mm_set_pi64x (long long __i)
82 return (__m64) __i;
84 #endif
86 /* Convert the lower 32 bits of the __m64 object into an integer. */
87 static __inline int
88 _mm_cvtsi64_si32 (__m64 __i)
90 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
93 static __inline int
94 _m_to_int (__m64 __i)
96 return _mm_cvtsi64_si32 (__i);
99 #ifdef __x86_64__
100 /* Convert the lower 32 bits of the __m64 object into an integer. */
101 static __inline long long
102 _mm_cvtsi64_si64x (__m64 __i)
104 return (long long)__i;
106 #endif
108 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
109 the result, and the four 16-bit values from M2 into the upper four 8-bit
110 values of the result, all with signed saturation. */
111 static __inline __m64
112 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
114 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
117 static __inline __m64
118 _m_packsswb (__m64 __m1, __m64 __m2)
120 return _mm_packs_pi16 (__m1, __m2);
123 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
124 the result, and the two 32-bit values from M2 into the upper two 16-bit
125 values of the result, all with signed saturation. */
126 static __inline __m64
127 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
129 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
132 static __inline __m64
133 _m_packssdw (__m64 __m1, __m64 __m2)
135 return _mm_packs_pi32 (__m1, __m2);
138 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
139 the result, and the four 16-bit values from M2 into the upper four 8-bit
140 values of the result, all with unsigned saturation. */
141 static __inline __m64
142 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
144 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
147 static __inline __m64
148 _m_packuswb (__m64 __m1, __m64 __m2)
150 return _mm_packs_pu16 (__m1, __m2);
153 /* Interleave the four 8-bit values from the high half of M1 with the four
154 8-bit values from the high half of M2. */
155 static __inline __m64
156 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
158 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
161 static __inline __m64
162 _m_punpckhbw (__m64 __m1, __m64 __m2)
164 return _mm_unpackhi_pi8 (__m1, __m2);
167 /* Interleave the two 16-bit values from the high half of M1 with the two
168 16-bit values from the high half of M2. */
169 static __inline __m64
170 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
172 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
175 static __inline __m64
176 _m_punpckhwd (__m64 __m1, __m64 __m2)
178 return _mm_unpackhi_pi16 (__m1, __m2);
181 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
182 value from the high half of M2. */
183 static __inline __m64
184 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
186 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
189 static __inline __m64
190 _m_punpckhdq (__m64 __m1, __m64 __m2)
192 return _mm_unpackhi_pi32 (__m1, __m2);
195 /* Interleave the four 8-bit values from the low half of M1 with the four
196 8-bit values from the low half of M2. */
197 static __inline __m64
198 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
200 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
203 static __inline __m64
204 _m_punpcklbw (__m64 __m1, __m64 __m2)
206 return _mm_unpacklo_pi8 (__m1, __m2);
209 /* Interleave the two 16-bit values from the low half of M1 with the two
210 16-bit values from the low half of M2. */
211 static __inline __m64
212 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
214 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
217 static __inline __m64
218 _m_punpcklwd (__m64 __m1, __m64 __m2)
220 return _mm_unpacklo_pi16 (__m1, __m2);
223 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
224 value from the low half of M2. */
225 static __inline __m64
226 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
228 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
231 static __inline __m64
232 _m_punpckldq (__m64 __m1, __m64 __m2)
234 return _mm_unpacklo_pi32 (__m1, __m2);
237 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
238 static __inline __m64
239 _mm_add_pi8 (__m64 __m1, __m64 __m2)
241 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
244 static __inline __m64
245 _m_paddb (__m64 __m1, __m64 __m2)
247 return _mm_add_pi8 (__m1, __m2);
250 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
251 static __inline __m64
252 _mm_add_pi16 (__m64 __m1, __m64 __m2)
254 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
257 static __inline __m64
258 _m_paddw (__m64 __m1, __m64 __m2)
260 return _mm_add_pi16 (__m1, __m2);
263 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
264 static __inline __m64
265 _mm_add_pi32 (__m64 __m1, __m64 __m2)
267 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
270 static __inline __m64
271 _m_paddd (__m64 __m1, __m64 __m2)
273 return _mm_add_pi32 (__m1, __m2);
276 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
277 static __inline __m64
278 _mm_add_si64 (__m64 __m1, __m64 __m2)
280 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
283 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
284 saturated arithmetic. */
285 static __inline __m64
286 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
288 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
291 static __inline __m64
292 _m_paddsb (__m64 __m1, __m64 __m2)
294 return _mm_adds_pi8 (__m1, __m2);
297 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
298 saturated arithmetic. */
299 static __inline __m64
300 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
302 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
305 static __inline __m64
306 _m_paddsw (__m64 __m1, __m64 __m2)
308 return _mm_adds_pi16 (__m1, __m2);
311 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
312 saturated arithmetic. */
313 static __inline __m64
314 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
316 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
319 static __inline __m64
320 _m_paddusb (__m64 __m1, __m64 __m2)
322 return _mm_adds_pu8 (__m1, __m2);
325 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
326 saturated arithmetic. */
327 static __inline __m64
328 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
330 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
333 static __inline __m64
334 _m_paddusw (__m64 __m1, __m64 __m2)
336 return _mm_adds_pu16 (__m1, __m2);
339 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
340 static __inline __m64
341 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
343 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
346 static __inline __m64
347 _m_psubb (__m64 __m1, __m64 __m2)
349 return _mm_sub_pi8 (__m1, __m2);
352 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
353 static __inline __m64
354 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
356 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
359 static __inline __m64
360 _m_psubw (__m64 __m1, __m64 __m2)
362 return _mm_sub_pi16 (__m1, __m2);
365 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
366 static __inline __m64
367 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
369 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
372 static __inline __m64
373 _m_psubd (__m64 __m1, __m64 __m2)
375 return _mm_sub_pi32 (__m1, __m2);
378 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
379 static __inline __m64
380 _mm_sub_si64 (__m64 __m1, __m64 __m2)
382 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
385 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
386 saturating arithmetic. */
387 static __inline __m64
388 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
390 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
393 static __inline __m64
394 _m_psubsb (__m64 __m1, __m64 __m2)
396 return _mm_subs_pi8 (__m1, __m2);
399 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
400 signed saturating arithmetic. */
401 static __inline __m64
402 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
404 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
407 static __inline __m64
408 _m_psubsw (__m64 __m1, __m64 __m2)
410 return _mm_subs_pi16 (__m1, __m2);
413 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
414 unsigned saturating arithmetic. */
415 static __inline __m64
416 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
418 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
421 static __inline __m64
422 _m_psubusb (__m64 __m1, __m64 __m2)
424 return _mm_subs_pu8 (__m1, __m2);
427 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
428 unsigned saturating arithmetic. */
429 static __inline __m64
430 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
432 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
435 static __inline __m64
436 _m_psubusw (__m64 __m1, __m64 __m2)
438 return _mm_subs_pu16 (__m1, __m2);
441 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
442 four 32-bit intermediate results, which are then summed by pairs to
443 produce two 32-bit results. */
444 static __inline __m64
445 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
447 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
450 static __inline __m64
451 _m_pmaddwd (__m64 __m1, __m64 __m2)
453 return _mm_madd_pi16 (__m1, __m2);
456 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
457 M2 and produce the high 16 bits of the 32-bit results. */
458 static __inline __m64
459 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
461 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
464 static __inline __m64
465 _m_pmulhw (__m64 __m1, __m64 __m2)
467 return _mm_mulhi_pi16 (__m1, __m2);
470 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
471 the low 16 bits of the results. */
472 static __inline __m64
473 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
475 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
478 static __inline __m64
479 _m_pmullw (__m64 __m1, __m64 __m2)
481 return _mm_mullo_pi16 (__m1, __m2);
484 /* Shift four 16-bit values in M left by COUNT. */
485 static __inline __m64
486 _mm_sll_pi16 (__m64 __m, __m64 __count)
488 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
491 static __inline __m64
492 _m_psllw (__m64 __m, __m64 __count)
494 return _mm_sll_pi16 (__m, __count);
497 static __inline __m64
498 _mm_slli_pi16 (__m64 __m, int __count)
500 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
503 static __inline __m64
504 _m_psllwi (__m64 __m, int __count)
506 return _mm_slli_pi16 (__m, __count);
509 /* Shift two 32-bit values in M left by COUNT. */
510 static __inline __m64
511 _mm_sll_pi32 (__m64 __m, __m64 __count)
513 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
516 static __inline __m64
517 _m_pslld (__m64 __m, __m64 __count)
519 return _mm_sll_pi32 (__m, __count);
522 static __inline __m64
523 _mm_slli_pi32 (__m64 __m, int __count)
525 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
528 static __inline __m64
529 _m_pslldi (__m64 __m, int __count)
531 return _mm_slli_pi32 (__m, __count);
534 /* Shift the 64-bit value in M left by COUNT. */
535 static __inline __m64
536 _mm_sll_si64 (__m64 __m, __m64 __count)
538 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
541 static __inline __m64
542 _m_psllq (__m64 __m, __m64 __count)
544 return _mm_sll_si64 (__m, __count);
547 static __inline __m64
548 _mm_slli_si64 (__m64 __m, int __count)
550 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
553 static __inline __m64
554 _m_psllqi (__m64 __m, int __count)
556 return _mm_slli_si64 (__m, __count);
559 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
560 static __inline __m64
561 _mm_sra_pi16 (__m64 __m, __m64 __count)
563 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
566 static __inline __m64
567 _m_psraw (__m64 __m, __m64 __count)
569 return _mm_sra_pi16 (__m, __count);
572 static __inline __m64
573 _mm_srai_pi16 (__m64 __m, int __count)
575 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
578 static __inline __m64
579 _m_psrawi (__m64 __m, int __count)
581 return _mm_srai_pi16 (__m, __count);
584 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
585 static __inline __m64
586 _mm_sra_pi32 (__m64 __m, __m64 __count)
588 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
591 static __inline __m64
592 _m_psrad (__m64 __m, __m64 __count)
594 return _mm_sra_pi32 (__m, __count);
597 static __inline __m64
598 _mm_srai_pi32 (__m64 __m, int __count)
600 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
603 static __inline __m64
604 _m_psradi (__m64 __m, int __count)
606 return _mm_srai_pi32 (__m, __count);
609 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
610 static __inline __m64
611 _mm_srl_pi16 (__m64 __m, __m64 __count)
613 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
616 static __inline __m64
617 _m_psrlw (__m64 __m, __m64 __count)
619 return _mm_srl_pi16 (__m, __count);
622 static __inline __m64
623 _mm_srli_pi16 (__m64 __m, int __count)
625 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
628 static __inline __m64
629 _m_psrlwi (__m64 __m, int __count)
631 return _mm_srli_pi16 (__m, __count);
634 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
635 static __inline __m64
636 _mm_srl_pi32 (__m64 __m, __m64 __count)
638 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
641 static __inline __m64
642 _m_psrld (__m64 __m, __m64 __count)
644 return _mm_srl_pi32 (__m, __count);
647 static __inline __m64
648 _mm_srli_pi32 (__m64 __m, int __count)
650 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
653 static __inline __m64
654 _m_psrldi (__m64 __m, int __count)
656 return _mm_srli_pi32 (__m, __count);
659 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
660 static __inline __m64
661 _mm_srl_si64 (__m64 __m, __m64 __count)
663 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
666 static __inline __m64
667 _m_psrlq (__m64 __m, __m64 __count)
669 return _mm_srl_si64 (__m, __count);
672 static __inline __m64
673 _mm_srli_si64 (__m64 __m, int __count)
675 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
678 static __inline __m64
679 _m_psrlqi (__m64 __m, int __count)
681 return _mm_srli_si64 (__m, __count);
684 /* Bit-wise AND the 64-bit values in M1 and M2. */
685 static __inline __m64
686 _mm_and_si64 (__m64 __m1, __m64 __m2)
688 return __builtin_ia32_pand (__m1, __m2);
691 static __inline __m64
692 _m_pand (__m64 __m1, __m64 __m2)
694 return _mm_and_si64 (__m1, __m2);
697 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
698 64-bit value in M2. */
699 static __inline __m64
700 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
702 return __builtin_ia32_pandn (__m1, __m2);
705 static __inline __m64
706 _m_pandn (__m64 __m1, __m64 __m2)
708 return _mm_andnot_si64 (__m1, __m2);
711 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
712 static __inline __m64
713 _mm_or_si64 (__m64 __m1, __m64 __m2)
715 return __builtin_ia32_por (__m1, __m2);
718 static __inline __m64
719 _m_por (__m64 __m1, __m64 __m2)
721 return _mm_or_si64 (__m1, __m2);
724 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
725 static __inline __m64
726 _mm_xor_si64 (__m64 __m1, __m64 __m2)
728 return __builtin_ia32_pxor (__m1, __m2);
731 static __inline __m64
732 _m_pxor (__m64 __m1, __m64 __m2)
734 return _mm_xor_si64 (__m1, __m2);
737 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
738 test is true and zero if false. */
739 static __inline __m64
740 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
742 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
745 static __inline __m64
746 _m_pcmpeqb (__m64 __m1, __m64 __m2)
748 return _mm_cmpeq_pi8 (__m1, __m2);
751 static __inline __m64
752 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
754 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
757 static __inline __m64
758 _m_pcmpgtb (__m64 __m1, __m64 __m2)
760 return _mm_cmpgt_pi8 (__m1, __m2);
763 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
764 the test is true and zero if false. */
765 static __inline __m64
766 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
768 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
771 static __inline __m64
772 _m_pcmpeqw (__m64 __m1, __m64 __m2)
774 return _mm_cmpeq_pi16 (__m1, __m2);
777 static __inline __m64
778 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
780 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
783 static __inline __m64
784 _m_pcmpgtw (__m64 __m1, __m64 __m2)
786 return _mm_cmpgt_pi16 (__m1, __m2);
789 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
790 the test is true and zero if false. */
791 static __inline __m64
792 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
794 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
797 static __inline __m64
798 _m_pcmpeqd (__m64 __m1, __m64 __m2)
800 return _mm_cmpeq_pi32 (__m1, __m2);
803 static __inline __m64
804 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
806 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
809 static __inline __m64
810 _m_pcmpgtd (__m64 __m1, __m64 __m2)
812 return _mm_cmpgt_pi32 (__m1, __m2);
815 /* Creates a 64-bit zero. */
816 static __inline __m64
817 _mm_setzero_si64 (void)
819 return (__m64)0LL;
822 /* Creates a vector of two 32-bit values; I0 is least significant. */
823 static __inline __m64
824 _mm_set_pi32 (int __i1, int __i0)
826 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
829 /* Creates a vector of four 16-bit values; W0 is least significant. */
830 static __inline __m64
831 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
833 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
836 /* Creates a vector of eight 8-bit values; B0 is least significant. */
837 static __inline __m64
838 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
839 char __b3, char __b2, char __b1, char __b0)
841 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
842 __b4, __b5, __b6, __b7);
845 /* Similar, but with the arguments in reverse order. */
846 static __inline __m64
847 _mm_setr_pi32 (int __i0, int __i1)
849 return _mm_set_pi32 (__i1, __i0);
852 static __inline __m64
853 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
855 return _mm_set_pi16 (__w3, __w2, __w1, __w0);
858 static __inline __m64
859 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
860 char __b4, char __b5, char __b6, char __b7)
862 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
865 /* Creates a vector of two 32-bit values, both elements containing I. */
866 static __inline __m64
867 _mm_set1_pi32 (int __i)
869 return _mm_set_pi32 (__i, __i);
872 /* Creates a vector of four 16-bit values, all elements containing W. */
873 static __inline __m64
874 _mm_set1_pi16 (short __w)
876 return _mm_set_pi16 (__w, __w, __w, __w);
879 /* Creates a vector of eight 8-bit values, all elements containing B. */
880 static __inline __m64
881 _mm_set1_pi8 (char __b)
883 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
886 #endif /* __MMX__ */
887 #endif /* _MMINTRIN_H_INCLUDED */