* Merge from mainline
[official-gcc.git] / gcc / config / i386 / mmintrin.h
blobb98caf0cac2231646e68b7ffc7f9e6b160acdabe
1 /* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING. If not, write to
17 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA. */
20 /* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
27 /* Implemented from the specification included in the Intel C++ Compiler
28 User Guide and Reference, version 8.0. */
30 #ifndef _MMINTRIN_H_INCLUDED
31 #define _MMINTRIN_H_INCLUDED
33 #ifndef __MMX__
34 # error "MMX instruction set not enabled"
35 #else
36 /* The data type intended for user use. */
37 typedef int __m64 __attribute__ ((__vector_size__ (8)));
39 /* Internal data types for implementing the intrinsics. */
40 typedef int __v2si __attribute__ ((__vector_size__ (8)));
41 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
42 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
44 /* Empty the multimedia state. */
45 static __inline void __attribute__((__always_inline__))
46 _mm_empty (void)
48 __builtin_ia32_emms ();
51 static __inline void __attribute__((__always_inline__))
52 _m_empty (void)
54 _mm_empty ();
57 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
58 static __inline __m64 __attribute__((__always_inline__))
59 _mm_cvtsi32_si64 (int __i)
61 return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
64 static __inline __m64 __attribute__((__always_inline__))
65 _m_from_int (int __i)
67 return _mm_cvtsi32_si64 (__i);
70 #ifdef __x86_64__
71 /* Convert I to a __m64 object. */
72 static __inline __m64 __attribute__((__always_inline__))
73 _mm_cvtsi64x_si64 (long long __i)
75 return (__m64) __i;
78 /* Convert I to a __m64 object. */
79 static __inline __m64 __attribute__((__always_inline__))
80 _mm_set_pi64x (long long __i)
82 return (__m64) __i;
84 #endif
86 /* Convert the lower 32 bits of the __m64 object into an integer. */
87 static __inline int __attribute__((__always_inline__))
88 _mm_cvtsi64_si32 (__m64 __i)
90 return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
93 static __inline int __attribute__((__always_inline__))
94 _m_to_int (__m64 __i)
96 return _mm_cvtsi64_si32 (__i);
99 #ifdef __x86_64__
100 /* Convert the lower 32 bits of the __m64 object into an integer. */
101 static __inline long long __attribute__((__always_inline__))
102 _mm_cvtsi64_si64x (__m64 __i)
104 return (long long)__i;
106 #endif
108 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
109 the result, and the four 16-bit values from M2 into the upper four 8-bit
110 values of the result, all with signed saturation. */
111 static __inline __m64 __attribute__((__always_inline__))
112 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
114 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
117 static __inline __m64 __attribute__((__always_inline__))
118 _m_packsswb (__m64 __m1, __m64 __m2)
120 return _mm_packs_pi16 (__m1, __m2);
123 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
124 the result, and the two 32-bit values from M2 into the upper two 16-bit
125 values of the result, all with signed saturation. */
126 static __inline __m64 __attribute__((__always_inline__))
127 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
129 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
132 static __inline __m64 __attribute__((__always_inline__))
133 _m_packssdw (__m64 __m1, __m64 __m2)
135 return _mm_packs_pi32 (__m1, __m2);
138 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
139 the result, and the four 16-bit values from M2 into the upper four 8-bit
140 values of the result, all with unsigned saturation. */
141 static __inline __m64 __attribute__((__always_inline__))
142 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
144 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
147 static __inline __m64 __attribute__((__always_inline__))
148 _m_packuswb (__m64 __m1, __m64 __m2)
150 return _mm_packs_pu16 (__m1, __m2);
153 /* Interleave the four 8-bit values from the high half of M1 with the four
154 8-bit values from the high half of M2. */
155 static __inline __m64 __attribute__((__always_inline__))
156 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
158 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
161 static __inline __m64 __attribute__((__always_inline__))
162 _m_punpckhbw (__m64 __m1, __m64 __m2)
164 return _mm_unpackhi_pi8 (__m1, __m2);
167 /* Interleave the two 16-bit values from the high half of M1 with the two
168 16-bit values from the high half of M2. */
169 static __inline __m64 __attribute__((__always_inline__))
170 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
172 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
175 static __inline __m64 __attribute__((__always_inline__))
176 _m_punpckhwd (__m64 __m1, __m64 __m2)
178 return _mm_unpackhi_pi16 (__m1, __m2);
181 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
182 value from the high half of M2. */
183 static __inline __m64 __attribute__((__always_inline__))
184 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
186 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
189 static __inline __m64 __attribute__((__always_inline__))
190 _m_punpckhdq (__m64 __m1, __m64 __m2)
192 return _mm_unpackhi_pi32 (__m1, __m2);
195 /* Interleave the four 8-bit values from the low half of M1 with the four
196 8-bit values from the low half of M2. */
197 static __inline __m64 __attribute__((__always_inline__))
198 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
200 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
203 static __inline __m64 __attribute__((__always_inline__))
204 _m_punpcklbw (__m64 __m1, __m64 __m2)
206 return _mm_unpacklo_pi8 (__m1, __m2);
209 /* Interleave the two 16-bit values from the low half of M1 with the two
210 16-bit values from the low half of M2. */
211 static __inline __m64 __attribute__((__always_inline__))
212 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
214 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
217 static __inline __m64 __attribute__((__always_inline__))
218 _m_punpcklwd (__m64 __m1, __m64 __m2)
220 return _mm_unpacklo_pi16 (__m1, __m2);
223 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
224 value from the low half of M2. */
225 static __inline __m64 __attribute__((__always_inline__))
226 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
228 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
231 static __inline __m64 __attribute__((__always_inline__))
232 _m_punpckldq (__m64 __m1, __m64 __m2)
234 return _mm_unpacklo_pi32 (__m1, __m2);
237 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
238 static __inline __m64 __attribute__((__always_inline__))
239 _mm_add_pi8 (__m64 __m1, __m64 __m2)
241 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
244 static __inline __m64 __attribute__((__always_inline__))
245 _m_paddb (__m64 __m1, __m64 __m2)
247 return _mm_add_pi8 (__m1, __m2);
250 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
251 static __inline __m64 __attribute__((__always_inline__))
252 _mm_add_pi16 (__m64 __m1, __m64 __m2)
254 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
257 static __inline __m64 __attribute__((__always_inline__))
258 _m_paddw (__m64 __m1, __m64 __m2)
260 return _mm_add_pi16 (__m1, __m2);
263 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
264 static __inline __m64 __attribute__((__always_inline__))
265 _mm_add_pi32 (__m64 __m1, __m64 __m2)
267 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
270 static __inline __m64 __attribute__((__always_inline__))
271 _m_paddd (__m64 __m1, __m64 __m2)
273 return _mm_add_pi32 (__m1, __m2);
276 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
277 #ifdef __SSE2__
278 static __inline __m64 __attribute__((__always_inline__))
279 _mm_add_si64 (__m64 __m1, __m64 __m2)
281 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
283 #endif
285 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
286 saturated arithmetic. */
287 static __inline __m64 __attribute__((__always_inline__))
288 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
290 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
293 static __inline __m64 __attribute__((__always_inline__))
294 _m_paddsb (__m64 __m1, __m64 __m2)
296 return _mm_adds_pi8 (__m1, __m2);
299 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
300 saturated arithmetic. */
301 static __inline __m64 __attribute__((__always_inline__))
302 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
304 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
307 static __inline __m64 __attribute__((__always_inline__))
308 _m_paddsw (__m64 __m1, __m64 __m2)
310 return _mm_adds_pi16 (__m1, __m2);
313 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
314 saturated arithmetic. */
315 static __inline __m64 __attribute__((__always_inline__))
316 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
318 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
321 static __inline __m64 __attribute__((__always_inline__))
322 _m_paddusb (__m64 __m1, __m64 __m2)
324 return _mm_adds_pu8 (__m1, __m2);
327 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
328 saturated arithmetic. */
329 static __inline __m64 __attribute__((__always_inline__))
330 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
332 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
335 static __inline __m64 __attribute__((__always_inline__))
336 _m_paddusw (__m64 __m1, __m64 __m2)
338 return _mm_adds_pu16 (__m1, __m2);
341 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
342 static __inline __m64 __attribute__((__always_inline__))
343 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
345 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
348 static __inline __m64 __attribute__((__always_inline__))
349 _m_psubb (__m64 __m1, __m64 __m2)
351 return _mm_sub_pi8 (__m1, __m2);
354 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
355 static __inline __m64 __attribute__((__always_inline__))
356 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
358 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
361 static __inline __m64 __attribute__((__always_inline__))
362 _m_psubw (__m64 __m1, __m64 __m2)
364 return _mm_sub_pi16 (__m1, __m2);
367 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
368 static __inline __m64 __attribute__((__always_inline__))
369 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
371 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
374 static __inline __m64 __attribute__((__always_inline__))
375 _m_psubd (__m64 __m1, __m64 __m2)
377 return _mm_sub_pi32 (__m1, __m2);
380 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
381 #ifdef __SSE2__
382 static __inline __m64 __attribute__((__always_inline__))
383 _mm_sub_si64 (__m64 __m1, __m64 __m2)
385 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
387 #endif
389 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
390 saturating arithmetic. */
391 static __inline __m64 __attribute__((__always_inline__))
392 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
394 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
397 static __inline __m64 __attribute__((__always_inline__))
398 _m_psubsb (__m64 __m1, __m64 __m2)
400 return _mm_subs_pi8 (__m1, __m2);
403 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
404 signed saturating arithmetic. */
405 static __inline __m64 __attribute__((__always_inline__))
406 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
408 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
411 static __inline __m64 __attribute__((__always_inline__))
412 _m_psubsw (__m64 __m1, __m64 __m2)
414 return _mm_subs_pi16 (__m1, __m2);
417 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
418 unsigned saturating arithmetic. */
419 static __inline __m64 __attribute__((__always_inline__))
420 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
422 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
425 static __inline __m64 __attribute__((__always_inline__))
426 _m_psubusb (__m64 __m1, __m64 __m2)
428 return _mm_subs_pu8 (__m1, __m2);
431 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
432 unsigned saturating arithmetic. */
433 static __inline __m64 __attribute__((__always_inline__))
434 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
436 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
439 static __inline __m64 __attribute__((__always_inline__))
440 _m_psubusw (__m64 __m1, __m64 __m2)
442 return _mm_subs_pu16 (__m1, __m2);
445 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
446 four 32-bit intermediate results, which are then summed by pairs to
447 produce two 32-bit results. */
448 static __inline __m64 __attribute__((__always_inline__))
449 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
451 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
454 static __inline __m64 __attribute__((__always_inline__))
455 _m_pmaddwd (__m64 __m1, __m64 __m2)
457 return _mm_madd_pi16 (__m1, __m2);
460 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
461 M2 and produce the high 16 bits of the 32-bit results. */
462 static __inline __m64 __attribute__((__always_inline__))
463 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
465 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
468 static __inline __m64 __attribute__((__always_inline__))
469 _m_pmulhw (__m64 __m1, __m64 __m2)
471 return _mm_mulhi_pi16 (__m1, __m2);
474 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
475 the low 16 bits of the results. */
476 static __inline __m64 __attribute__((__always_inline__))
477 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
479 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
482 static __inline __m64 __attribute__((__always_inline__))
483 _m_pmullw (__m64 __m1, __m64 __m2)
485 return _mm_mullo_pi16 (__m1, __m2);
488 /* Shift four 16-bit values in M left by COUNT. */
489 static __inline __m64 __attribute__((__always_inline__))
490 _mm_sll_pi16 (__m64 __m, __m64 __count)
492 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
495 static __inline __m64 __attribute__((__always_inline__))
496 _m_psllw (__m64 __m, __m64 __count)
498 return _mm_sll_pi16 (__m, __count);
501 static __inline __m64 __attribute__((__always_inline__))
502 _mm_slli_pi16 (__m64 __m, int __count)
504 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
507 static __inline __m64 __attribute__((__always_inline__))
508 _m_psllwi (__m64 __m, int __count)
510 return _mm_slli_pi16 (__m, __count);
513 /* Shift two 32-bit values in M left by COUNT. */
514 static __inline __m64 __attribute__((__always_inline__))
515 _mm_sll_pi32 (__m64 __m, __m64 __count)
517 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
520 static __inline __m64 __attribute__((__always_inline__))
521 _m_pslld (__m64 __m, __m64 __count)
523 return _mm_sll_pi32 (__m, __count);
526 static __inline __m64 __attribute__((__always_inline__))
527 _mm_slli_pi32 (__m64 __m, int __count)
529 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
532 static __inline __m64 __attribute__((__always_inline__))
533 _m_pslldi (__m64 __m, int __count)
535 return _mm_slli_pi32 (__m, __count);
538 /* Shift the 64-bit value in M left by COUNT. */
539 static __inline __m64 __attribute__((__always_inline__))
540 _mm_sll_si64 (__m64 __m, __m64 __count)
542 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
545 static __inline __m64 __attribute__((__always_inline__))
546 _m_psllq (__m64 __m, __m64 __count)
548 return _mm_sll_si64 (__m, __count);
551 static __inline __m64 __attribute__((__always_inline__))
552 _mm_slli_si64 (__m64 __m, int __count)
554 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
557 static __inline __m64 __attribute__((__always_inline__))
558 _m_psllqi (__m64 __m, int __count)
560 return _mm_slli_si64 (__m, __count);
563 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
564 static __inline __m64 __attribute__((__always_inline__))
565 _mm_sra_pi16 (__m64 __m, __m64 __count)
567 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
570 static __inline __m64 __attribute__((__always_inline__))
571 _m_psraw (__m64 __m, __m64 __count)
573 return _mm_sra_pi16 (__m, __count);
576 static __inline __m64 __attribute__((__always_inline__))
577 _mm_srai_pi16 (__m64 __m, int __count)
579 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
582 static __inline __m64 __attribute__((__always_inline__))
583 _m_psrawi (__m64 __m, int __count)
585 return _mm_srai_pi16 (__m, __count);
588 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
589 static __inline __m64 __attribute__((__always_inline__))
590 _mm_sra_pi32 (__m64 __m, __m64 __count)
592 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
595 static __inline __m64 __attribute__((__always_inline__))
596 _m_psrad (__m64 __m, __m64 __count)
598 return _mm_sra_pi32 (__m, __count);
601 static __inline __m64 __attribute__((__always_inline__))
602 _mm_srai_pi32 (__m64 __m, int __count)
604 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
607 static __inline __m64 __attribute__((__always_inline__))
608 _m_psradi (__m64 __m, int __count)
610 return _mm_srai_pi32 (__m, __count);
613 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
614 static __inline __m64 __attribute__((__always_inline__))
615 _mm_srl_pi16 (__m64 __m, __m64 __count)
617 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
620 static __inline __m64 __attribute__((__always_inline__))
621 _m_psrlw (__m64 __m, __m64 __count)
623 return _mm_srl_pi16 (__m, __count);
626 static __inline __m64 __attribute__((__always_inline__))
627 _mm_srli_pi16 (__m64 __m, int __count)
629 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
632 static __inline __m64 __attribute__((__always_inline__))
633 _m_psrlwi (__m64 __m, int __count)
635 return _mm_srli_pi16 (__m, __count);
638 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
639 static __inline __m64 __attribute__((__always_inline__))
640 _mm_srl_pi32 (__m64 __m, __m64 __count)
642 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
645 static __inline __m64 __attribute__((__always_inline__))
646 _m_psrld (__m64 __m, __m64 __count)
648 return _mm_srl_pi32 (__m, __count);
651 static __inline __m64 __attribute__((__always_inline__))
652 _mm_srli_pi32 (__m64 __m, int __count)
654 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
657 static __inline __m64 __attribute__((__always_inline__))
658 _m_psrldi (__m64 __m, int __count)
660 return _mm_srli_pi32 (__m, __count);
663 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
664 static __inline __m64 __attribute__((__always_inline__))
665 _mm_srl_si64 (__m64 __m, __m64 __count)
667 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
670 static __inline __m64 __attribute__((__always_inline__))
671 _m_psrlq (__m64 __m, __m64 __count)
673 return _mm_srl_si64 (__m, __count);
676 static __inline __m64 __attribute__((__always_inline__))
677 _mm_srli_si64 (__m64 __m, int __count)
679 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
682 static __inline __m64 __attribute__((__always_inline__))
683 _m_psrlqi (__m64 __m, int __count)
685 return _mm_srli_si64 (__m, __count);
688 /* Bit-wise AND the 64-bit values in M1 and M2. */
689 static __inline __m64 __attribute__((__always_inline__))
690 _mm_and_si64 (__m64 __m1, __m64 __m2)
692 return __builtin_ia32_pand (__m1, __m2);
695 static __inline __m64 __attribute__((__always_inline__))
696 _m_pand (__m64 __m1, __m64 __m2)
698 return _mm_and_si64 (__m1, __m2);
701 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
702 64-bit value in M2. */
703 static __inline __m64 __attribute__((__always_inline__))
704 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
706 return __builtin_ia32_pandn (__m1, __m2);
709 static __inline __m64 __attribute__((__always_inline__))
710 _m_pandn (__m64 __m1, __m64 __m2)
712 return _mm_andnot_si64 (__m1, __m2);
715 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
716 static __inline __m64 __attribute__((__always_inline__))
717 _mm_or_si64 (__m64 __m1, __m64 __m2)
719 return __builtin_ia32_por (__m1, __m2);
722 static __inline __m64 __attribute__((__always_inline__))
723 _m_por (__m64 __m1, __m64 __m2)
725 return _mm_or_si64 (__m1, __m2);
728 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
729 static __inline __m64 __attribute__((__always_inline__))
730 _mm_xor_si64 (__m64 __m1, __m64 __m2)
732 return __builtin_ia32_pxor (__m1, __m2);
735 static __inline __m64 __attribute__((__always_inline__))
736 _m_pxor (__m64 __m1, __m64 __m2)
738 return _mm_xor_si64 (__m1, __m2);
741 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
742 test is true and zero if false. */
743 static __inline __m64 __attribute__((__always_inline__))
744 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
746 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
749 static __inline __m64 __attribute__((__always_inline__))
750 _m_pcmpeqb (__m64 __m1, __m64 __m2)
752 return _mm_cmpeq_pi8 (__m1, __m2);
755 static __inline __m64 __attribute__((__always_inline__))
756 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
758 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
761 static __inline __m64 __attribute__((__always_inline__))
762 _m_pcmpgtb (__m64 __m1, __m64 __m2)
764 return _mm_cmpgt_pi8 (__m1, __m2);
767 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
768 the test is true and zero if false. */
769 static __inline __m64 __attribute__((__always_inline__))
770 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
772 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
775 static __inline __m64 __attribute__((__always_inline__))
776 _m_pcmpeqw (__m64 __m1, __m64 __m2)
778 return _mm_cmpeq_pi16 (__m1, __m2);
781 static __inline __m64 __attribute__((__always_inline__))
782 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
784 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
787 static __inline __m64 __attribute__((__always_inline__))
788 _m_pcmpgtw (__m64 __m1, __m64 __m2)
790 return _mm_cmpgt_pi16 (__m1, __m2);
793 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
794 the test is true and zero if false. */
795 static __inline __m64 __attribute__((__always_inline__))
796 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
798 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
801 static __inline __m64 __attribute__((__always_inline__))
802 _m_pcmpeqd (__m64 __m1, __m64 __m2)
804 return _mm_cmpeq_pi32 (__m1, __m2);
807 static __inline __m64 __attribute__((__always_inline__))
808 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
810 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
813 static __inline __m64 __attribute__((__always_inline__))
814 _m_pcmpgtd (__m64 __m1, __m64 __m2)
816 return _mm_cmpgt_pi32 (__m1, __m2);
819 /* Creates a 64-bit zero. */
820 static __inline __m64 __attribute__((__always_inline__))
821 _mm_setzero_si64 (void)
823 return (__m64)0LL;
826 /* Creates a vector of two 32-bit values; I0 is least significant. */
827 static __inline __m64 __attribute__((__always_inline__))
828 _mm_set_pi32 (int __i1, int __i0)
830 return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
833 /* Creates a vector of four 16-bit values; W0 is least significant. */
834 static __inline __m64 __attribute__((__always_inline__))
835 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
837 return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
840 /* Creates a vector of eight 8-bit values; B0 is least significant. */
841 static __inline __m64 __attribute__((__always_inline__))
842 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
843 char __b3, char __b2, char __b1, char __b0)
845 return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
846 __b4, __b5, __b6, __b7);
849 /* Similar, but with the arguments in reverse order. */
850 static __inline __m64 __attribute__((__always_inline__))
851 _mm_setr_pi32 (int __i0, int __i1)
853 return _mm_set_pi32 (__i1, __i0);
856 static __inline __m64 __attribute__((__always_inline__))
857 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
859 return _mm_set_pi16 (__w3, __w2, __w1, __w0);
862 static __inline __m64 __attribute__((__always_inline__))
863 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
864 char __b4, char __b5, char __b6, char __b7)
866 return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
869 /* Creates a vector of two 32-bit values, both elements containing I. */
870 static __inline __m64 __attribute__((__always_inline__))
871 _mm_set1_pi32 (int __i)
873 return _mm_set_pi32 (__i, __i);
876 /* Creates a vector of four 16-bit values, all elements containing W. */
877 static __inline __m64 __attribute__((__always_inline__))
878 _mm_set1_pi16 (short __w)
880 return _mm_set_pi16 (__w, __w, __w, __w);
883 /* Creates a vector of eight 8-bit values, all elements containing B. */
884 static __inline __m64 __attribute__((__always_inline__))
885 _mm_set1_pi8 (char __b)
887 return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
890 #endif /* __MMX__ */
891 #endif /* _MMINTRIN_H_INCLUDED */