Add AMD SSE5 support; Add iterator over function arguments; Add stdarg_p, prototype_p...
[official-gcc.git] / gcc / config / i386 / bmmintrin.h
blobf321cee0d3631718653976291168153911676d7d
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING. If not, write to
17 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA. */
20 /* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
27 #ifndef _BMMINTRIN_H_INCLUDED
28 #define _BMMINTRIN_H_INCLUDED
30 #ifndef __SSE5__
31 # error "SSE5 instruction set not enabled"
32 #else
34 /* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
35 #include <ammintrin.h>
36 #include <mmintrin-common.h>
38 /* Floating point multiply/add type instructions */
39 static __inline __m128 __attribute__((__always_inline__))
40 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
42 return (__m128) __builtin_ia32_fmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
45 static __inline __m128d __attribute__((__always_inline__))
46 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
48 return (__m128d) __builtin_ia32_fmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
51 static __inline __m128 __attribute__((__always_inline__))
52 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
54 return (__m128) __builtin_ia32_fmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
57 static __inline __m128d __attribute__((__always_inline__))
58 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
60 return (__m128d) __builtin_ia32_fmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
63 static __inline __m128 __attribute__((__always_inline__))
64 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
66 return (__m128) __builtin_ia32_fmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
69 static __inline __m128d __attribute__((__always_inline__))
70 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
72 return (__m128d) __builtin_ia32_fmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
75 static __inline __m128 __attribute__((__always_inline__))
76 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
78 return (__m128) __builtin_ia32_fmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
81 static __inline __m128d __attribute__((__always_inline__))
82 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
84 return (__m128d) __builtin_ia32_fmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
87 static __inline __m128 __attribute__((__always_inline__))
88 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
90 return (__m128) __builtin_ia32_fnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
93 static __inline __m128d __attribute__((__always_inline__))
94 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
96 return (__m128d) __builtin_ia32_fnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
99 static __inline __m128 __attribute__((__always_inline__))
100 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
102 return (__m128) __builtin_ia32_fnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
105 static __inline __m128d __attribute__((__always_inline__))
106 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
108 return (__m128d) __builtin_ia32_fnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
111 static __inline __m128 __attribute__((__always_inline__))
112 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
114 return (__m128) __builtin_ia32_fnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
117 static __inline __m128d __attribute__((__always_inline__))
118 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
120 return (__m128d) __builtin_ia32_fnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
123 static __inline __m128 __attribute__((__always_inline__))
124 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
126 return (__m128) __builtin_ia32_fnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
129 static __inline __m128d __attribute__((__always_inline__))
130 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
132 return (__m128d) __builtin_ia32_fnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
135 /* Integer multiply/add intructions. */
136 static __inline __m128i __attribute__((__always_inline__))
137 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
139 return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
142 static __inline __m128i __attribute__((__always_inline__))
143 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
145 return (__m128i) __builtin_ia32_pmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
148 static __inline __m128i __attribute__((__always_inline__))
149 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
151 return (__m128i) __builtin_ia32_pmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
154 static __inline __m128i __attribute__((__always_inline__))
155 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
157 return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
160 static __inline __m128i __attribute__((__always_inline__))
161 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
163 return (__m128i) __builtin_ia32_pmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
166 static __inline __m128i __attribute__((__always_inline__))
167 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
169 return (__m128i) __builtin_ia32_pmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
172 static __inline __m128i __attribute__((__always_inline__))
173 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
175 return (__m128i) __builtin_ia32_pmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
178 static __inline __m128i __attribute__((__always_inline__))
179 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
181 return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
184 static __inline __m128i __attribute__((__always_inline__))
185 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
187 return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
190 static __inline __m128i __attribute__((__always_inline__))
191 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
193 return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
196 static __inline __m128i __attribute__((__always_inline__))
197 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
199 return (__m128i) __builtin_ia32_pmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
202 static __inline __m128i __attribute__((__always_inline__))
203 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
205 return (__m128i) __builtin_ia32_pmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
208 /* Packed Integer Horizontal Add and Subtract */
209 static __inline __m128i __attribute__((__always_inline__))
210 _mm_haddw_epi8(__m128i __A)
212 return (__m128i) __builtin_ia32_phaddbw ((__v16qi)__A);
215 static __inline __m128i __attribute__((__always_inline__))
216 _mm_haddd_epi8(__m128i __A)
218 return (__m128i) __builtin_ia32_phaddbd ((__v16qi)__A);
221 static __inline __m128i __attribute__((__always_inline__))
222 _mm_haddq_epi8(__m128i __A)
224 return (__m128i) __builtin_ia32_phaddbq ((__v16qi)__A);
227 static __inline __m128i __attribute__((__always_inline__))
228 _mm_haddd_epi16(__m128i __A)
230 return (__m128i) __builtin_ia32_phaddwd ((__v8hi)__A);
233 static __inline __m128i __attribute__((__always_inline__))
234 _mm_haddq_epi16(__m128i __A)
236 return (__m128i) __builtin_ia32_phaddwq ((__v8hi)__A);
239 static __inline __m128i __attribute__((__always_inline__))
240 _mm_haddq_epi32(__m128i __A)
242 return (__m128i) __builtin_ia32_phadddq ((__v4si)__A);
245 static __inline __m128i __attribute__((__always_inline__))
246 _mm_haddw_epu8(__m128i __A)
248 return (__m128i) __builtin_ia32_phaddubw ((__v16qi)__A);
251 static __inline __m128i __attribute__((__always_inline__))
252 _mm_haddd_epu8(__m128i __A)
254 return (__m128i) __builtin_ia32_phaddubd ((__v16qi)__A);
257 static __inline __m128i __attribute__((__always_inline__))
258 _mm_haddq_epu8(__m128i __A)
260 return (__m128i) __builtin_ia32_phaddubq ((__v16qi)__A);
263 static __inline __m128i __attribute__((__always_inline__))
264 _mm_haddd_epu16(__m128i __A)
266 return (__m128i) __builtin_ia32_phadduwd ((__v8hi)__A);
269 static __inline __m128i __attribute__((__always_inline__))
270 _mm_haddq_epu16(__m128i __A)
272 return (__m128i) __builtin_ia32_phadduwq ((__v8hi)__A);
275 static __inline __m128i __attribute__((__always_inline__))
276 _mm_haddq_epu32(__m128i __A)
278 return (__m128i) __builtin_ia32_phaddudq ((__v4si)__A);
281 static __inline __m128i __attribute__((__always_inline__))
282 _mm_hsubw_epi8(__m128i __A)
284 return (__m128i) __builtin_ia32_phsubbw ((__v16qi)__A);
287 static __inline __m128i __attribute__((__always_inline__))
288 _mm_hsubd_epi16(__m128i __A)
290 return (__m128i) __builtin_ia32_phsubwd ((__v8hi)__A);
293 static __inline __m128i __attribute__((__always_inline__))
294 _mm_hsubq_epi32(__m128i __A)
296 return (__m128i) __builtin_ia32_phsubdq ((__v4si)__A);
299 /* Vector conditional move and permute */
300 static __inline __m128i __attribute__((__always_inline__))
301 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
303 return (__m128i) __builtin_ia32_pcmov (__A, __B, __C);
306 static __inline __m128i __attribute__((__always_inline__))
307 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
309 return (__m128i) __builtin_ia32_pperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
312 static __inline __m128 __attribute__((__always_inline__))
313 _mm_perm_ps(__m128 __A, __m128 __B, __m128i __C)
315 return (__m128) __builtin_ia32_permps ((__m128)__A, (__m128)__B, (__v16qi)__C);
318 static __inline __m128d __attribute__((__always_inline__))
319 _mm_perm_pd(__m128d __A, __m128d __B, __m128i __C)
321 return (__m128d) __builtin_ia32_permpd ((__m128d)__A, (__m128d)__B, (__v16qi)__C);
324 /* Packed Integer Rotates and Shifts */
326 /* Rotates - Non-Immediate form */
327 static __inline __m128i __attribute__((__always_inline__))
328 _mm_rot_epi8(__m128i __A, __m128i __B)
330 return (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B);
333 static __inline __m128i __attribute__((__always_inline__))
334 _mm_rot_epi16(__m128i __A, __m128i __B)
336 return (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B);
339 static __inline __m128i __attribute__((__always_inline__))
340 _mm_rot_epi32(__m128i __A, __m128i __B)
342 return (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B);
345 static __inline __m128i __attribute__((__always_inline__))
346 _mm_rot_epi64(__m128i __A, __m128i __B)
348 return (__m128i) __builtin_ia32_protq ((__v2di)__A, (__v2di)__B);
352 /* Rotates - Immediate form */
353 #ifdef __OPTIMIZE__
354 static __inline __m128i __attribute__((__always_inline__))
355 _mm_roti_epi8(__m128i __A, int __B)
357 return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
360 static __inline __m128i __attribute__((__always_inline__))
361 _mm_roti_epi16(__m128i __A, int __B)
363 return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
366 static __inline __m128i __attribute__((__always_inline__))
367 _mm_roti_epi32(__m128i __A, int __B)
369 return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
372 static __inline __m128i __attribute__((__always_inline__))
373 _mm_roti_epi64(__m128i __A, int __B)
375 return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
377 #else
378 #define _mm_roti_epi8(A, B) ((_m128i) __builtin_ia32_protbi ((__v16qi)(A), B)
379 #define _mm_roti_epi16(A, B) ((_m128i) __builtin_ia32_protwi ((__v8hi)(A), B)
380 #define _mm_roti_epi32(A, B) ((_m128i) __builtin_ia32_protdi ((__v4si)(A), B)
381 #define _mm_roti_epi64(A, B) ((_m128i) __builtin_ia32_protqi ((__v2di)(A), B)
382 #endif
384 /* pshl */
386 static __inline __m128i __attribute__((__always_inline__))
387 _mm_shl_epi8(__m128i __A, __m128i __B)
389 return (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B);
392 static __inline __m128i __attribute__((__always_inline__))
393 _mm_shl_epi16(__m128i __A, __m128i __B)
395 return (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B);
398 static __inline __m128i __attribute__((__always_inline__))
399 _mm_shl_epi32(__m128i __A, __m128i __B)
401 return (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B);
404 static __inline __m128i __attribute__((__always_inline__))
405 _mm_shl_epi64(__m128i __A, __m128i __B)
407 return (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B);
410 /* psha */
411 static __inline __m128i __attribute__((__always_inline__))
412 _mm_sha_epi8(__m128i __A, __m128i __B)
414 return (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B);
417 static __inline __m128i __attribute__((__always_inline__))
418 _mm_sha_epi16(__m128i __A, __m128i __B)
420 return (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B);
423 static __inline __m128i __attribute__((__always_inline__))
424 _mm_sha_epi32(__m128i __A, __m128i __B)
426 return (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B);
429 static __inline __m128i __attribute__((__always_inline__))
430 _mm_sha_epi64(__m128i __A, __m128i __B)
432 return (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B);
435 /* Compare and Predicate Generation */
437 /* com (floating point, packed single) */
438 static __inline __m128 __attribute__((__always_inline__))
439 _mm_comeq_ps(__m128 __A, __m128 __B)
441 return (__m128) __builtin_ia32_comeqps ((__v4sf)__A, (__v4sf)__B);
444 static __inline __m128 __attribute__((__always_inline__))
445 _mm_comlt_ps(__m128 __A, __m128 __B)
447 return (__m128) __builtin_ia32_comltps ((__v4sf)__A, (__v4sf)__B);
450 static __inline __m128 __attribute__((__always_inline__))
451 _mm_comle_ps(__m128 __A, __m128 __B)
453 return (__m128) __builtin_ia32_comleps ((__v4sf)__A, (__v4sf)__B);
456 static __inline __m128 __attribute__((__always_inline__))
457 _mm_comunord_ps(__m128 __A, __m128 __B)
459 return (__m128) __builtin_ia32_comunordps ((__v4sf)__A, (__v4sf)__B);
462 static __inline __m128 __attribute__((__always_inline__))
463 _mm_comneq_ps(__m128 __A, __m128 __B)
465 return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B);
468 static __inline __m128 __attribute__((__always_inline__))
469 _mm_comnlt_ps(__m128 __A, __m128 __B)
471 return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B);
474 static __inline __m128 __attribute__((__always_inline__))
475 _mm_comnle_ps(__m128 __A, __m128 __B)
477 return (__m128) __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B);
481 static __inline __m128 __attribute__((__always_inline__))
482 _mm_comord_ps(__m128 __A, __m128 __B)
484 return (__m128) __builtin_ia32_comordps ((__v4sf)__A, (__v4sf)__B);
488 static __inline __m128 __attribute__((__always_inline__))
489 _mm_comueq_ps(__m128 __A, __m128 __B)
491 return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B);
494 static __inline __m128 __attribute__((__always_inline__))
495 _mm_comnge_ps(__m128 __A, __m128 __B)
497 return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B);
500 static __inline __m128 __attribute__((__always_inline__))
501 _mm_comngt_ps(__m128 __A, __m128 __B)
503 return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B);
506 static __inline __m128 __attribute__((__always_inline__))
507 _mm_comfalse_ps(__m128 __A, __m128 __B)
509 return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B);
512 static __inline __m128 __attribute__((__always_inline__))
513 _mm_comoneq_ps(__m128 __A, __m128 __B)
515 return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
518 static __inline __m128 __attribute__((__always_inline__))
519 _mm_comge_ps(__m128 __A, __m128 __B)
521 return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B);
524 static __inline __m128 __attribute__((__always_inline__))
525 _mm_comgt_ps(__m128 __A, __m128 __B)
527 return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B);
530 static __inline __m128 __attribute__((__always_inline__))
531 _mm_comtrue_ps(__m128 __A, __m128 __B)
533 return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B);
536 /* com (floating point, packed double) */
538 static __inline __m128d __attribute__((__always_inline__))
539 _mm_comeq_pd(__m128d __A, __m128d __B)
541 return (__m128d) __builtin_ia32_comeqpd ((__v2df)__A, (__v2df)__B);
544 static __inline __m128d __attribute__((__always_inline__))
545 _mm_comlt_pd(__m128d __A, __m128d __B)
547 return (__m128d) __builtin_ia32_comltpd ((__v2df)__A, (__v2df)__B);
550 static __inline __m128d __attribute__((__always_inline__))
551 _mm_comle_pd(__m128d __A, __m128d __B)
553 return (__m128d) __builtin_ia32_comlepd ((__v2df)__A, (__v2df)__B);
556 static __inline __m128d __attribute__((__always_inline__))
557 _mm_comunord_pd(__m128d __A, __m128d __B)
559 return (__m128d) __builtin_ia32_comunordpd ((__v2df)__A, (__v2df)__B);
562 static __inline __m128d __attribute__((__always_inline__))
563 _mm_comneq_pd(__m128d __A, __m128d __B)
565 return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B);
568 static __inline __m128d __attribute__((__always_inline__))
569 _mm_comnlt_pd(__m128d __A, __m128d __B)
571 return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B);
574 static __inline __m128d __attribute__((__always_inline__))
575 _mm_comnle_pd(__m128d __A, __m128d __B)
577 return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B);
581 static __inline __m128d __attribute__((__always_inline__))
582 _mm_comord_pd(__m128d __A, __m128d __B)
584 return (__m128d) __builtin_ia32_comordpd ((__v2df)__A, (__v2df)__B);
587 static __inline __m128d __attribute__((__always_inline__))
588 _mm_comueq_pd(__m128d __A, __m128d __B)
590 return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B);
593 static __inline __m128d __attribute__((__always_inline__))
594 _mm_comnge_pd(__m128d __A, __m128d __B)
596 return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B);
599 static __inline __m128d __attribute__((__always_inline__))
600 _mm_comngt_pd(__m128d __A, __m128d __B)
602 return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B);
605 static __inline __m128d __attribute__((__always_inline__))
606 _mm_comfalse_pd(__m128d __A, __m128d __B)
608 return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B);
611 static __inline __m128d __attribute__((__always_inline__))
612 _mm_comoneq_pd(__m128d __A, __m128d __B)
614 return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B);
617 static __inline __m128d __attribute__((__always_inline__))
618 _mm_comge_pd(__m128d __A, __m128d __B)
620 return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B);
623 static __inline __m128d __attribute__((__always_inline__))
624 _mm_comgt_pd(__m128d __A, __m128d __B)
626 return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B);
629 static __inline __m128d __attribute__((__always_inline__))
630 _mm_comtrue_pd(__m128d __A, __m128d __B)
632 return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B);
635 /* com (floating point, scalar single) */
636 static __inline __m128 __attribute__((__always_inline__))
637 _mm_comeq_ss(__m128 __A, __m128 __B)
639 return (__m128) __builtin_ia32_comeqss ((__v4sf)__A, (__v4sf)__B);
642 static __inline __m128 __attribute__((__always_inline__))
643 _mm_comlt_ss(__m128 __A, __m128 __B)
645 return (__m128) __builtin_ia32_comltss ((__v4sf)__A, (__v4sf)__B);
648 static __inline __m128 __attribute__((__always_inline__))
649 _mm_comle_ss(__m128 __A, __m128 __B)
651 return (__m128) __builtin_ia32_comless ((__v4sf)__A, (__v4sf)__B);
654 static __inline __m128 __attribute__((__always_inline__))
655 _mm_comunord_ss(__m128 __A, __m128 __B)
657 return (__m128) __builtin_ia32_comunordss ((__v4sf)__A, (__v4sf)__B);
660 static __inline __m128 __attribute__((__always_inline__))
661 _mm_comneq_ss(__m128 __A, __m128 __B)
663 return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B);
666 static __inline __m128 __attribute__((__always_inline__))
667 _mm_comnlt_ss(__m128 __A, __m128 __B)
669 return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B);
672 static __inline __m128 __attribute__((__always_inline__))
673 _mm_comnle_ss(__m128 __A, __m128 __B)
675 return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B);
679 static __inline __m128 __attribute__((__always_inline__))
680 _mm_comord_ss(__m128 __A, __m128 __B)
682 return (__m128) __builtin_ia32_comordss ((__v4sf)__A, (__v4sf)__B);
685 static __inline __m128 __attribute__((__always_inline__))
686 _mm_comueq_ss(__m128 __A, __m128 __B)
688 return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B);
691 static __inline __m128 __attribute__((__always_inline__))
692 _mm_comnge_ss(__m128 __A, __m128 __B)
694 return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B);
697 static __inline __m128 __attribute__((__always_inline__))
698 _mm_comngt_ss(__m128 __A, __m128 __B)
700 return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B);
703 static __inline __m128 __attribute__((__always_inline__))
704 _mm_comfalse_ss(__m128 __A, __m128 __B)
706 return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B);
709 static __inline __m128 __attribute__((__always_inline__))
710 _mm_comoneq_ss(__m128 __A, __m128 __B)
712 return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B);
715 static __inline __m128 __attribute__((__always_inline__))
716 _mm_comge_ss(__m128 __A, __m128 __B)
718 return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B);
721 static __inline __m128 __attribute__((__always_inline__))
722 _mm_comgt_ss(__m128 __A, __m128 __B)
724 return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B);
727 static __inline __m128 __attribute__((__always_inline__))
728 _mm_comtrue_ss(__m128 __A, __m128 __B)
730 return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B);
733 /* com (floating point, scalar double) */
735 static __inline __m128d __attribute__((__always_inline__))
736 _mm_comeq_sd(__m128d __A, __m128d __B)
738 return (__m128d) __builtin_ia32_comeqsd ((__v2df)__A, (__v2df)__B);
741 static __inline __m128d __attribute__((__always_inline__))
742 _mm_comlt_sd(__m128d __A, __m128d __B)
744 return (__m128d) __builtin_ia32_comltsd ((__v2df)__A, (__v2df)__B);
747 static __inline __m128d __attribute__((__always_inline__))
748 _mm_comle_sd(__m128d __A, __m128d __B)
750 return (__m128d) __builtin_ia32_comlesd ((__v2df)__A, (__v2df)__B);
753 static __inline __m128d __attribute__((__always_inline__))
754 _mm_comunord_sd(__m128d __A, __m128d __B)
756 return (__m128d) __builtin_ia32_comunordsd ((__v2df)__A, (__v2df)__B);
759 static __inline __m128d __attribute__((__always_inline__))
760 _mm_comneq_sd(__m128d __A, __m128d __B)
762 return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B);
765 static __inline __m128d __attribute__((__always_inline__))
766 _mm_comnlt_sd(__m128d __A, __m128d __B)
768 return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B);
771 static __inline __m128d __attribute__((__always_inline__))
772 _mm_comnle_sd(__m128d __A, __m128d __B)
774 return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B);
778 static __inline __m128d __attribute__((__always_inline__))
779 _mm_comord_sd(__m128d __A, __m128d __B)
781 return (__m128d) __builtin_ia32_comordsd ((__v2df)__A, (__v2df)__B);
784 static __inline __m128d __attribute__((__always_inline__))
785 _mm_comueq_sd(__m128d __A, __m128d __B)
787 return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B);
790 static __inline __m128d __attribute__((__always_inline__))
791 _mm_comnge_sd(__m128d __A, __m128d __B)
793 return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B);
796 static __inline __m128d __attribute__((__always_inline__))
797 _mm_comngt_sd(__m128d __A, __m128d __B)
799 return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B);
802 static __inline __m128d __attribute__((__always_inline__))
803 _mm_comfalse_sd(__m128d __A, __m128d __B)
805 return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B);
808 static __inline __m128d __attribute__((__always_inline__))
809 _mm_comoneq_sd(__m128d __A, __m128d __B)
811 return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B);
814 static __inline __m128d __attribute__((__always_inline__))
815 _mm_comge_sd(__m128d __A, __m128d __B)
817 return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B);
820 static __inline __m128d __attribute__((__always_inline__))
821 _mm_comgt_sd(__m128d __A, __m128d __B)
823 return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B);
826 static __inline __m128d __attribute__((__always_inline__))
827 _mm_comtrue_sd(__m128d __A, __m128d __B)
829 return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B);
833 /*pcom (integer, unsinged bytes) */
835 static __inline __m128i __attribute__((__always_inline__))
836 _mm_comlt_epu8(__m128i __A, __m128i __B)
838 return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B);
841 static __inline __m128i __attribute__((__always_inline__))
842 _mm_comle_epu8(__m128i __A, __m128i __B)
844 return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B);
847 static __inline __m128i __attribute__((__always_inline__))
848 _mm_comgt_epu8(__m128i __A, __m128i __B)
850 return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B);
853 static __inline __m128i __attribute__((__always_inline__))
854 _mm_comge_epu8(__m128i __A, __m128i __B)
856 return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B);
859 static __inline __m128i __attribute__((__always_inline__))
860 _mm_comeq_epu8(__m128i __A, __m128i __B)
862 return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B);
865 static __inline __m128i __attribute__((__always_inline__))
866 _mm_comneq_epu8(__m128i __A, __m128i __B)
868 return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B);
871 static __inline __m128i __attribute__((__always_inline__))
872 _mm_comfalse_epu8(__m128i __A, __m128i __B)
874 return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B);
877 static __inline __m128i __attribute__((__always_inline__))
878 _mm_comtrue_epu8(__m128i __A, __m128i __B)
880 return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B);
883 /*pcom (integer, unsinged words) */
885 static __inline __m128i __attribute__((__always_inline__))
886 _mm_comlt_epu16(__m128i __A, __m128i __B)
888 return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B);
891 static __inline __m128i __attribute__((__always_inline__))
892 _mm_comle_epu16(__m128i __A, __m128i __B)
894 return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B);
897 static __inline __m128i __attribute__((__always_inline__))
898 _mm_comgt_epu16(__m128i __A, __m128i __B)
900 return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B);
903 static __inline __m128i __attribute__((__always_inline__))
904 _mm_comge_epu16(__m128i __A, __m128i __B)
906 return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B);
909 static __inline __m128i __attribute__((__always_inline__))
910 _mm_comeq_epu16(__m128i __A, __m128i __B)
912 return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B);
915 static __inline __m128i __attribute__((__always_inline__))
916 _mm_comneq_epu16(__m128i __A, __m128i __B)
918 return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B);
921 static __inline __m128i __attribute__((__always_inline__))
922 _mm_comfalse_epu16(__m128i __A, __m128i __B)
924 return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B);
927 static __inline __m128i __attribute__((__always_inline__))
928 _mm_comtrue_epu16(__m128i __A, __m128i __B)
930 return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B);
933 /*pcom (integer, unsinged double words) */
935 static __inline __m128i __attribute__((__always_inline__))
936 _mm_comlt_epu32(__m128i __A, __m128i __B)
938 return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B);
941 static __inline __m128i __attribute__((__always_inline__))
942 _mm_comle_epu32(__m128i __A, __m128i __B)
944 return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B);
947 static __inline __m128i __attribute__((__always_inline__))
948 _mm_comgt_epu32(__m128i __A, __m128i __B)
950 return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B);
953 static __inline __m128i __attribute__((__always_inline__))
954 _mm_comge_epu32(__m128i __A, __m128i __B)
956 return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B);
959 static __inline __m128i __attribute__((__always_inline__))
960 _mm_comeq_epu32(__m128i __A, __m128i __B)
962 return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B);
965 static __inline __m128i __attribute__((__always_inline__))
966 _mm_comneq_epu32(__m128i __A, __m128i __B)
968 return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B);
971 static __inline __m128i __attribute__((__always_inline__))
972 _mm_comfalse_epu32(__m128i __A, __m128i __B)
974 return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B);
977 static __inline __m128i __attribute__((__always_inline__))
978 _mm_comtrue_epu32(__m128i __A, __m128i __B)
980 return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B);
983 /*pcom (integer, unsinged quad words) */
985 static __inline __m128i __attribute__((__always_inline__))
986 _mm_comlt_epu64(__m128i __A, __m128i __B)
988 return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B);
991 static __inline __m128i __attribute__((__always_inline__))
992 _mm_comle_epu64(__m128i __A, __m128i __B)
994 return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B);
997 static __inline __m128i __attribute__((__always_inline__))
998 _mm_comgt_epu64(__m128i __A, __m128i __B)
1000 return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B);
1003 static __inline __m128i __attribute__((__always_inline__))
1004 _mm_comge_epu64(__m128i __A, __m128i __B)
1006 return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B);
1009 static __inline __m128i __attribute__((__always_inline__))
1010 _mm_comeq_epu64(__m128i __A, __m128i __B)
1012 return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B);
1015 static __inline __m128i __attribute__((__always_inline__))
1016 _mm_comneq_epu64(__m128i __A, __m128i __B)
1018 return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B);
1021 static __inline __m128i __attribute__((__always_inline__))
1022 _mm_comfalse_epu64(__m128i __A, __m128i __B)
1024 return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B);
1027 static __inline __m128i __attribute__((__always_inline__))
1028 _mm_comtrue_epu64(__m128i __A, __m128i __B)
1030 return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B);
1033 /*pcom (integer, signed bytes) */
1035 static __inline __m128i __attribute__((__always_inline__))
1036 _mm_comlt_epi8(__m128i __A, __m128i __B)
1038 return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B);
1041 static __inline __m128i __attribute__((__always_inline__))
1042 _mm_comle_epi8(__m128i __A, __m128i __B)
1044 return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B);
1047 static __inline __m128i __attribute__((__always_inline__))
1048 _mm_comgt_epi8(__m128i __A, __m128i __B)
1050 return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B);
1053 static __inline __m128i __attribute__((__always_inline__))
1054 _mm_comge_epi8(__m128i __A, __m128i __B)
1056 return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B);
1059 static __inline __m128i __attribute__((__always_inline__))
1060 _mm_comeq_epi8(__m128i __A, __m128i __B)
1062 return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B);
1065 static __inline __m128i __attribute__((__always_inline__))
1066 _mm_comneq_epi8(__m128i __A, __m128i __B)
1068 return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B);
1071 static __inline __m128i __attribute__((__always_inline__))
1072 _mm_comfalse_epi8(__m128i __A, __m128i __B)
1074 return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B);
1077 static __inline __m128i __attribute__((__always_inline__))
1078 _mm_comtrue_epi8(__m128i __A, __m128i __B)
1080 return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B);
1083 /*pcom (integer, signed words) */
1085 static __inline __m128i __attribute__((__always_inline__))
1086 _mm_comlt_epi16(__m128i __A, __m128i __B)
1088 return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B);
1091 static __inline __m128i __attribute__((__always_inline__))
1092 _mm_comle_epi16(__m128i __A, __m128i __B)
1094 return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B);
1097 static __inline __m128i __attribute__((__always_inline__))
1098 _mm_comgt_epi16(__m128i __A, __m128i __B)
1100 return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B);
1103 static __inline __m128i __attribute__((__always_inline__))
1104 _mm_comge_epi16(__m128i __A, __m128i __B)
1106 return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B);
1109 static __inline __m128i __attribute__((__always_inline__))
1110 _mm_comeq_epi16(__m128i __A, __m128i __B)
1112 return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B);
1115 static __inline __m128i __attribute__((__always_inline__))
1116 _mm_comneq_epi16(__m128i __A, __m128i __B)
1118 return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B);
1121 static __inline __m128i __attribute__((__always_inline__))
1122 _mm_comfalse_epi16(__m128i __A, __m128i __B)
1124 return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B);
1127 static __inline __m128i __attribute__((__always_inline__))
1128 _mm_comtrue_epi16(__m128i __A, __m128i __B)
1130 return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B);
1133 /*pcom (integer, signed double words) */
1135 static __inline __m128i __attribute__((__always_inline__))
1136 _mm_comlt_epi32(__m128i __A, __m128i __B)
1138 return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B);
1141 static __inline __m128i __attribute__((__always_inline__))
1142 _mm_comle_epi32(__m128i __A, __m128i __B)
1144 return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B);
1147 static __inline __m128i __attribute__((__always_inline__))
1148 _mm_comgt_epi32(__m128i __A, __m128i __B)
1150 return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B);
1153 static __inline __m128i __attribute__((__always_inline__))
1154 _mm_comge_epi32(__m128i __A, __m128i __B)
1156 return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B);
1159 static __inline __m128i __attribute__((__always_inline__))
1160 _mm_comeq_epi32(__m128i __A, __m128i __B)
1162 return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B);
1165 static __inline __m128i __attribute__((__always_inline__))
1166 _mm_comneq_epi32(__m128i __A, __m128i __B)
1168 return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B);
1171 static __inline __m128i __attribute__((__always_inline__))
1172 _mm_comfalse_epi32(__m128i __A, __m128i __B)
1174 return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B);
1177 static __inline __m128i __attribute__((__always_inline__))
1178 _mm_comtrue_epi32(__m128i __A, __m128i __B)
1180 return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B);
1183 /*pcom (integer, signed quad words) */
1185 static __inline __m128i __attribute__((__always_inline__))
1186 _mm_comlt_epi64(__m128i __A, __m128i __B)
1188 return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B);
1191 static __inline __m128i __attribute__((__always_inline__))
1192 _mm_comle_epi64(__m128i __A, __m128i __B)
1194 return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B);
1197 static __inline __m128i __attribute__((__always_inline__))
1198 _mm_comgt_epi64(__m128i __A, __m128i __B)
1200 return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B);
1203 static __inline __m128i __attribute__((__always_inline__))
1204 _mm_comge_epi64(__m128i __A, __m128i __B)
1206 return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B);
1209 static __inline __m128i __attribute__((__always_inline__))
1210 _mm_comeq_epi64(__m128i __A, __m128i __B)
1212 return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B);
1215 static __inline __m128i __attribute__((__always_inline__))
1216 _mm_comneq_epi64(__m128i __A, __m128i __B)
1218 return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B);
1221 static __inline __m128i __attribute__((__always_inline__))
1222 _mm_comfalse_epi64(__m128i __A, __m128i __B)
1224 return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B);
1227 static __inline __m128i __attribute__((__always_inline__))
1228 _mm_comtrue_epi64(__m128i __A, __m128i __B)
1230 return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B);
1233 /* FRCZ */
1234 static __inline __m128 __attribute__((__always_inline__))
1235 _mm_frcz_ps (__m128 __A)
1237 return (__m128) __builtin_ia32_frczps ((__v4sf)__A);
1240 static __inline __m128d __attribute__((__always_inline__))
1241 _mm_frcz_pd (__m128d __A)
1243 return (__m128d) __builtin_ia32_frczpd ((__v2df)__A);
1246 static __inline __m128 __attribute__((__always_inline__))
1247 _mm_frcz_ss (__m128 __A, __m128 __B)
1249 return (__m128) __builtin_ia32_frczss ((__v4sf)__A, (__v4sf)__B);
1252 static __inline __m128d __attribute__((__always_inline__))
1253 _mm_frcz_sd (__m128d __A, __m128d __B)
1255 return (__m128d) __builtin_ia32_frczsd ((__v2df)__A, (__v2df)__B);
1258 #endif /* __SSE5__ */
1260 #endif /* _BMMINTRIN_H_INCLUDED */