Android O SDK.
[android_tools.git] / sdk / build-tools / 26.0.0 / renderscript / clang-include / tmmintrin.h
bloba72796ba4a68b8b1c05ce97a1ca51186eee4c0e4
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
21 *===-----------------------------------------------------------------------===
24 #ifndef __TMMINTRIN_H
25 #define __TMMINTRIN_H
27 #include <pmmintrin.h>
29 /* Define the default attributes for the functions in this file. */
30 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
32 /// \brief Computes the absolute value of each of the packed 8-bit signed
33 /// integers in the source operand and stores the 8-bit unsigned integer
34 /// results in the destination.
35 ///
36 /// \headerfile <x86intrin.h>
37 ///
38 /// This intrinsic corresponds to the \c PABSB instruction.
39 ///
40 /// \param __a
41 /// A 64-bit vector of [8 x i8].
42 /// \returns A 64-bit integer vector containing the absolute values of the
43 /// elements in the operand.
44 static __inline__ __m64 __DEFAULT_FN_ATTRS
45 _mm_abs_pi8(__m64 __a)
47 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
50 /// \brief Computes the absolute value of each of the packed 8-bit signed
51 /// integers in the source operand and stores the 8-bit unsigned integer
52 /// results in the destination.
53 ///
54 /// \headerfile <x86intrin.h>
55 ///
56 /// This intrinsic corresponds to the \c VPABSB instruction.
57 ///
58 /// \param __a
59 /// A 128-bit vector of [16 x i8].
60 /// \returns A 128-bit integer vector containing the absolute values of the
61 /// elements in the operand.
62 static __inline__ __m128i __DEFAULT_FN_ATTRS
63 _mm_abs_epi8(__m128i __a)
65 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
68 /// \brief Computes the absolute value of each of the packed 16-bit signed
69 /// integers in the source operand and stores the 16-bit unsigned integer
70 /// results in the destination.
71 ///
72 /// \headerfile <x86intrin.h>
73 ///
74 /// This intrinsic corresponds to the \c PABSW instruction.
75 ///
76 /// \param __a
77 /// A 64-bit vector of [4 x i16].
78 /// \returns A 64-bit integer vector containing the absolute values of the
79 /// elements in the operand.
80 static __inline__ __m64 __DEFAULT_FN_ATTRS
81 _mm_abs_pi16(__m64 __a)
83 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
86 /// \brief Computes the absolute value of each of the packed 16-bit signed
87 /// integers in the source operand and stores the 16-bit unsigned integer
88 /// results in the destination.
89 ///
90 /// \headerfile <x86intrin.h>
91 ///
92 /// This intrinsic corresponds to the \c VPABSW instruction.
93 ///
94 /// \param __a
95 /// A 128-bit vector of [8 x i16].
96 /// \returns A 128-bit integer vector containing the absolute values of the
97 /// elements in the operand.
98 static __inline__ __m128i __DEFAULT_FN_ATTRS
99 _mm_abs_epi16(__m128i __a)
101 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
104 /// \brief Computes the absolute value of each of the packed 32-bit signed
105 /// integers in the source operand and stores the 32-bit unsigned integer
106 /// results in the destination.
108 /// \headerfile <x86intrin.h>
110 /// This intrinsic corresponds to the \c PABSD instruction.
112 /// \param __a
113 /// A 64-bit vector of [2 x i32].
114 /// \returns A 64-bit integer vector containing the absolute values of the
115 /// elements in the operand.
116 static __inline__ __m64 __DEFAULT_FN_ATTRS
117 _mm_abs_pi32(__m64 __a)
119 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
122 /// \brief Computes the absolute value of each of the packed 32-bit signed
123 /// integers in the source operand and stores the 32-bit unsigned integer
124 /// results in the destination.
126 /// \headerfile <x86intrin.h>
128 /// This intrinsic corresponds to the \c VPABSD instruction.
130 /// \param __a
131 /// A 128-bit vector of [4 x i32].
132 /// \returns A 128-bit integer vector containing the absolute values of the
133 /// elements in the operand.
134 static __inline__ __m128i __DEFAULT_FN_ATTRS
135 _mm_abs_epi32(__m128i __a)
137 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
140 /// \brief Concatenates the two 128-bit integer vector operands, and
141 /// right-shifts the result by the number of bytes specified in the immediate
142 /// operand.
144 /// \headerfile <x86intrin.h>
146 /// \code
147 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
148 /// \endcode
150 /// This intrinsic corresponds to the \c PALIGNR instruction.
152 /// \param a
153 /// A 128-bit vector of [16 x i8] containing one of the source operands.
154 /// \param b
155 /// A 128-bit vector of [16 x i8] containing one of the source operands.
156 /// \param n
157 /// An immediate operand specifying how many bytes to right-shift the result.
158 /// \returns A 128-bit integer vector containing the concatenated right-shifted
159 /// value.
160 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
161 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
162 (__v16qi)(__m128i)(b), (n)); })
164 /// \brief Concatenates the two 64-bit integer vector operands, and right-shifts
165 /// the result by the number of bytes specified in the immediate operand.
167 /// \headerfile <x86intrin.h>
169 /// \code
170 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
171 /// \endcode
173 /// This intrinsic corresponds to the \c PALIGNR instruction.
175 /// \param a
176 /// A 64-bit vector of [8 x i8] containing one of the source operands.
177 /// \param b
178 /// A 64-bit vector of [8 x i8] containing one of the source operands.
179 /// \param n
180 /// An immediate operand specifying how many bytes to right-shift the result.
181 /// \returns A 64-bit integer vector containing the concatenated right-shifted
182 /// value.
183 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
184 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
186 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
187 /// 128-bit vectors of [8 x i16].
189 /// \headerfile <x86intrin.h>
191 /// This intrinsic corresponds to the \c VPHADDW instruction.
193 /// \param __a
194 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
195 /// horizontal sums of the values are stored in the lower bits of the
196 /// destination.
197 /// \param __b
198 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
199 /// horizontal sums of the values are stored in the upper bits of the
200 /// destination.
201 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
202 /// both operands.
203 static __inline__ __m128i __DEFAULT_FN_ATTRS
204 _mm_hadd_epi16(__m128i __a, __m128i __b)
206 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
209 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
210 /// 128-bit vectors of [4 x i32].
212 /// \headerfile <x86intrin.h>
214 /// This intrinsic corresponds to the \c VPHADDD instruction.
216 /// \param __a
217 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
218 /// horizontal sums of the values are stored in the lower bits of the
219 /// destination.
220 /// \param __b
221 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
222 /// horizontal sums of the values are stored in the upper bits of the
223 /// destination.
224 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
225 /// both operands.
226 static __inline__ __m128i __DEFAULT_FN_ATTRS
227 _mm_hadd_epi32(__m128i __a, __m128i __b)
229 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
232 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
233 /// 64-bit vectors of [4 x i16].
235 /// \headerfile <x86intrin.h>
237 /// This intrinsic corresponds to the \c PHADDW instruction.
239 /// \param __a
240 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
241 /// horizontal sums of the values are stored in the lower bits of the
242 /// destination.
243 /// \param __b
244 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
245 /// horizontal sums of the values are stored in the upper bits of the
246 /// destination.
247 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
248 /// operands.
249 static __inline__ __m64 __DEFAULT_FN_ATTRS
250 _mm_hadd_pi16(__m64 __a, __m64 __b)
252 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
255 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
256 /// 64-bit vectors of [2 x i32].
258 /// \headerfile <x86intrin.h>
260 /// This intrinsic corresponds to the \c PHADDD instruction.
262 /// \param __a
263 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
264 /// horizontal sums of the values are stored in the lower bits of the
265 /// destination.
266 /// \param __b
267 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
268 /// horizontal sums of the values are stored in the upper bits of the
269 /// destination.
270 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
271 /// operands.
272 static __inline__ __m64 __DEFAULT_FN_ATTRS
273 _mm_hadd_pi32(__m64 __a, __m64 __b)
275 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
278 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
279 /// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
280 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
282 /// \headerfile <x86intrin.h>
284 /// This intrinsic corresponds to the \c VPHADDSW instruction.
286 /// \param __a
287 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
288 /// horizontal sums of the values are stored in the lower bits of the
289 /// destination.
290 /// \param __b
291 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
292 /// horizontal sums of the values are stored in the upper bits of the
293 /// destination.
294 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
295 /// sums of both operands.
296 static __inline__ __m128i __DEFAULT_FN_ATTRS
297 _mm_hadds_epi16(__m128i __a, __m128i __b)
299 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
302 /// \brief Horizontally adds the adjacent pairs of values contained in 2 packed
303 /// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
304 /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
306 /// \headerfile <x86intrin.h>
308 /// This intrinsic corresponds to the \c PHADDSW instruction.
310 /// \param __a
311 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
312 /// horizontal sums of the values are stored in the lower bits of the
313 /// destination.
314 /// \param __b
315 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
316 /// horizontal sums of the values are stored in the upper bits of the
317 /// destination.
318 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
319 /// sums of both operands.
320 static __inline__ __m64 __DEFAULT_FN_ATTRS
321 _mm_hadds_pi16(__m64 __a, __m64 __b)
323 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
326 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
327 /// packed 128-bit vectors of [8 x i16].
329 /// \headerfile <x86intrin.h>
331 /// This intrinsic corresponds to the \c VPHSUBW instruction.
333 /// \param __a
334 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
335 /// horizontal differences between the values are stored in the lower bits of
336 /// the destination.
337 /// \param __b
338 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
339 /// horizontal differences between the values are stored in the upper bits of
340 /// the destination.
341 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
342 /// of both operands.
343 static __inline__ __m128i __DEFAULT_FN_ATTRS
344 _mm_hsub_epi16(__m128i __a, __m128i __b)
346 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
349 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
350 /// packed 128-bit vectors of [4 x i32].
352 /// \headerfile <x86intrin.h>
354 /// This intrinsic corresponds to the \c VPHSUBD instruction.
356 /// \param __a
357 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
358 /// horizontal differences between the values are stored in the lower bits of
359 /// the destination.
360 /// \param __b
361 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
362 /// horizontal differences between the values are stored in the upper bits of
363 /// the destination.
364 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
365 /// of both operands.
366 static __inline__ __m128i __DEFAULT_FN_ATTRS
367 _mm_hsub_epi32(__m128i __a, __m128i __b)
369 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
372 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
373 /// packed 64-bit vectors of [4 x i16].
375 /// \headerfile <x86intrin.h>
377 /// This intrinsic corresponds to the \c PHSUBW instruction.
379 /// \param __a
380 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
381 /// horizontal differences between the values are stored in the lower bits of
382 /// the destination.
383 /// \param __b
384 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
385 /// horizontal differences between the values are stored in the upper bits of
386 /// the destination.
387 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
388 /// of both operands.
389 static __inline__ __m64 __DEFAULT_FN_ATTRS
390 _mm_hsub_pi16(__m64 __a, __m64 __b)
392 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
395 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
396 /// packed 64-bit vectors of [2 x i32].
398 /// \headerfile <x86intrin.h>
400 /// This intrinsic corresponds to the \c PHSUBD instruction.
402 /// \param __a
403 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
404 /// horizontal differences between the values are stored in the lower bits of
405 /// the destination.
406 /// \param __b
407 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
408 /// horizontal differences between the values are stored in the upper bits of
409 /// the destination.
410 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
411 /// of both operands.
412 static __inline__ __m64 __DEFAULT_FN_ATTRS
413 _mm_hsub_pi32(__m64 __a, __m64 __b)
415 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
418 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
419 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
420 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
421 /// saturated to 8000h.
423 /// \headerfile <x86intrin.h>
425 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
427 /// \param __a
428 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
429 /// horizontal differences between the values are stored in the lower bits of
430 /// the destination.
431 /// \param __b
432 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
433 /// horizontal differences between the values are stored in the upper bits of
434 /// the destination.
435 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
436 /// differences of both operands.
437 static __inline__ __m128i __DEFAULT_FN_ATTRS
438 _mm_hsubs_epi16(__m128i __a, __m128i __b)
440 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
443 /// \brief Horizontally subtracts the adjacent pairs of values contained in 2
444 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
445 /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
446 /// saturated to 8000h.
448 /// \headerfile <x86intrin.h>
450 /// This intrinsic corresponds to the \c PHSUBSW instruction.
452 /// \param __a
453 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
454 /// horizontal differences between the values are stored in the lower bits of
455 /// the destination.
456 /// \param __b
457 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
458 /// horizontal differences between the values are stored in the upper bits of
459 /// the destination.
460 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
461 /// differences of both operands.
462 static __inline__ __m64 __DEFAULT_FN_ATTRS
463 _mm_hsubs_pi16(__m64 __a, __m64 __b)
465 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
468 /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
469 /// values contained in the first source operand and packed 8-bit signed
470 /// integer values contained in the second source operand, adds pairs of
471 /// contiguous products with signed saturation, and writes the 16-bit sums to
472 /// the corresponding bits in the destination. For example, bits [7:0] of
473 /// both operands are multiplied, bits [15:8] of both operands are
474 /// multiplied, and the sum of both results is written to bits [15:0] of the
475 /// destination.
477 /// \headerfile <x86intrin.h>
479 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
481 /// \param __a
482 /// A 128-bit integer vector containing the first source operand.
483 /// \param __b
484 /// A 128-bit integer vector containing the second source operand.
485 /// \returns A 128-bit integer vector containing the sums of products of both
486 /// operands:
487 /// R0 := (__a0 * __b0) + (__a1 * __b1)
488 /// R1 := (__a2 * __b2) + (__a3 * __b3)
489 /// R2 := (__a4 * __b4) + (__a5 * __b5)
490 /// R3 := (__a6 * __b6) + (__a7 * __b7)
491 /// R4 := (__a8 * __b8) + (__a9 * __b9)
492 /// R5 := (__a10 * __b10) + (__a11 * __b11)
493 /// R6 := (__a12 * __b12) + (__a13 * __b13)
494 /// R7 := (__a14 * __b14) + (__a15 * __b15)
495 static __inline__ __m128i __DEFAULT_FN_ATTRS
496 _mm_maddubs_epi16(__m128i __a, __m128i __b)
498 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
501 /// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer
502 /// values contained in the first source operand and packed 8-bit signed
503 /// integer values contained in the second source operand, adds pairs of
504 /// contiguous products with signed saturation, and writes the 16-bit sums to
505 /// the corresponding bits in the destination. For example, bits [7:0] of
506 /// both operands are multiplied, bits [15:8] of both operands are
507 /// multiplied, and the sum of both results is written to bits [15:0] of the
508 /// destination.
510 /// \headerfile <x86intrin.h>
512 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
514 /// \param __a
515 /// A 64-bit integer vector containing the first source operand.
516 /// \param __b
517 /// A 64-bit integer vector containing the second source operand.
518 /// \returns A 64-bit integer vector containing the sums of products of both
519 /// operands:
520 /// R0 := (__a0 * __b0) + (__a1 * __b1)
521 /// R1 := (__a2 * __b2) + (__a3 * __b3)
522 /// R2 := (__a4 * __b4) + (__a5 * __b5)
523 /// R3 := (__a6 * __b6) + (__a7 * __b7)
524 static __inline__ __m64 __DEFAULT_FN_ATTRS
525 _mm_maddubs_pi16(__m64 __a, __m64 __b)
527 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
530 /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
531 /// products to the 18 most significant bits by right-shifting, rounds the
532 /// truncated value by adding 1, and writes bits [16:1] to the destination.
534 /// \headerfile <x86intrin.h>
536 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
538 /// \param __a
539 /// A 128-bit vector of [8 x i16] containing one of the source operands.
540 /// \param __b
541 /// A 128-bit vector of [8 x i16] containing one of the source operands.
542 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
543 /// products of both operands.
544 static __inline__ __m128i __DEFAULT_FN_ATTRS
545 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
547 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
550 /// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit
551 /// products to the 18 most significant bits by right-shifting, rounds the
552 /// truncated value by adding 1, and writes bits [16:1] to the destination.
554 /// \headerfile <x86intrin.h>
556 /// This intrinsic corresponds to the \c PMULHRSW instruction.
558 /// \param __a
559 /// A 64-bit vector of [4 x i16] containing one of the source operands.
560 /// \param __b
561 /// A 64-bit vector of [4 x i16] containing one of the source operands.
562 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
563 /// products of both operands.
564 static __inline__ __m64 __DEFAULT_FN_ATTRS
565 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
567 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
570 /// \brief Copies the 8-bit integers from a 128-bit integer vector to the
571 /// destination or clears 8-bit values in the destination, as specified by
572 /// the second source operand.
574 /// \headerfile <x86intrin.h>
576 /// This intrinsic corresponds to the \c VPSHUFB instruction.
578 /// \param __a
579 /// A 128-bit integer vector containing the values to be copied.
580 /// \param __b
581 /// A 128-bit integer vector containing control bytes corresponding to
582 /// positions in the destination:
583 /// Bit 7:
584 /// 1: Clear the corresponding byte in the destination.
585 /// 0: Copy the selected source byte to the corresponding byte in the
586 /// destination.
587 /// Bits [6:4] Reserved.
588 /// Bits [3:0] select the source byte to be copied.
589 /// \returns A 128-bit integer vector containing the copied or cleared values.
590 static __inline__ __m128i __DEFAULT_FN_ATTRS
591 _mm_shuffle_epi8(__m128i __a, __m128i __b)
593 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
596 /// \brief Copies the 8-bit integers from a 64-bit integer vector to the
597 /// destination or clears 8-bit values in the destination, as specified by
598 /// the second source operand.
600 /// \headerfile <x86intrin.h>
602 /// This intrinsic corresponds to the \c PSHUFB instruction.
604 /// \param __a
605 /// A 64-bit integer vector containing the values to be copied.
606 /// \param __b
607 /// A 64-bit integer vector containing control bytes corresponding to
608 /// positions in the destination:
609 /// Bit 7:
610 /// 1: Clear the corresponding byte in the destination.
611 /// 0: Copy the selected source byte to the corresponding byte in the
612 /// destination.
613 /// Bits [3:0] select the source byte to be copied.
614 /// \returns A 64-bit integer vector containing the copied or cleared values.
615 static __inline__ __m64 __DEFAULT_FN_ATTRS
616 _mm_shuffle_pi8(__m64 __a, __m64 __b)
618 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
621 /// \brief For each 8-bit integer in the first source operand, perform one of
622 /// the following actions as specified by the second source operand: If the
623 /// byte in the second source is negative, calculate the two's complement of
624 /// the corresponding byte in the first source, and write that value to the
625 /// destination. If the byte in the second source is positive, copy the
626 /// corresponding byte from the first source to the destination. If the byte
627 /// in the second source is zero, clear the corresponding byte in the
628 /// destination.
630 /// \headerfile <x86intrin.h>
632 /// This intrinsic corresponds to the \c VPSIGNB instruction.
634 /// \param __a
635 /// A 128-bit integer vector containing the values to be copied.
636 /// \param __b
637 /// A 128-bit integer vector containing control bytes corresponding to
638 /// positions in the destination.
639 /// \returns A 128-bit integer vector containing the resultant values.
640 static __inline__ __m128i __DEFAULT_FN_ATTRS
641 _mm_sign_epi8(__m128i __a, __m128i __b)
643 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
646 /// \brief For each 16-bit integer in the first source operand, perform one of
647 /// the following actions as specified by the second source operand: If the
648 /// word in the second source is negative, calculate the two's complement of
649 /// the corresponding word in the first source, and write that value to the
650 /// destination. If the word in the second source is positive, copy the
651 /// corresponding word from the first source to the destination. If the word
652 /// in the second source is zero, clear the corresponding word in the
653 /// destination.
655 /// \headerfile <x86intrin.h>
657 /// This intrinsic corresponds to the \c VPSIGNW instruction.
659 /// \param __a
660 /// A 128-bit integer vector containing the values to be copied.
661 /// \param __b
662 /// A 128-bit integer vector containing control words corresponding to
663 /// positions in the destination.
664 /// \returns A 128-bit integer vector containing the resultant values.
665 static __inline__ __m128i __DEFAULT_FN_ATTRS
666 _mm_sign_epi16(__m128i __a, __m128i __b)
668 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
671 /// \brief For each 32-bit integer in the first source operand, perform one of
672 /// the following actions as specified by the second source operand: If the
673 /// doubleword in the second source is negative, calculate the two's
674 /// complement of the corresponding word in the first source, and write that
675 /// value to the destination. If the doubleword in the second source is
676 /// positive, copy the corresponding word from the first source to the
677 /// destination. If the doubleword in the second source is zero, clear the
678 /// corresponding word in the destination.
680 /// \headerfile <x86intrin.h>
682 /// This intrinsic corresponds to the \c VPSIGND instruction.
684 /// \param __a
685 /// A 128-bit integer vector containing the values to be copied.
686 /// \param __b
687 /// A 128-bit integer vector containing control doublewords corresponding to
688 /// positions in the destination.
689 /// \returns A 128-bit integer vector containing the resultant values.
690 static __inline__ __m128i __DEFAULT_FN_ATTRS
691 _mm_sign_epi32(__m128i __a, __m128i __b)
693 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
696 /// \brief For each 8-bit integer in the first source operand, perform one of
697 /// the following actions as specified by the second source operand: If the
698 /// byte in the second source is negative, calculate the two's complement of
699 /// the corresponding byte in the first source, and write that value to the
700 /// destination. If the byte in the second source is positive, copy the
701 /// corresponding byte from the first source to the destination. If the byte
702 /// in the second source is zero, clear the corresponding byte in the
703 /// destination.
705 /// \headerfile <x86intrin.h>
707 /// This intrinsic corresponds to the \c PSIGNB instruction.
709 /// \param __a
710 /// A 64-bit integer vector containing the values to be copied.
711 /// \param __b
712 /// A 64-bit integer vector containing control bytes corresponding to
713 /// positions in the destination.
714 /// \returns A 64-bit integer vector containing the resultant values.
715 static __inline__ __m64 __DEFAULT_FN_ATTRS
716 _mm_sign_pi8(__m64 __a, __m64 __b)
718 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
721 /// \brief For each 16-bit integer in the first source operand, perform one of
722 /// the following actions as specified by the second source operand: If the
723 /// word in the second source is negative, calculate the two's complement of
724 /// the corresponding word in the first source, and write that value to the
725 /// destination. If the word in the second source is positive, copy the
726 /// corresponding word from the first source to the destination. If the word
727 /// in the second source is zero, clear the corresponding word in the
728 /// destination.
730 /// \headerfile <x86intrin.h>
732 /// This intrinsic corresponds to the \c PSIGNW instruction.
734 /// \param __a
735 /// A 64-bit integer vector containing the values to be copied.
736 /// \param __b
737 /// A 64-bit integer vector containing control words corresponding to
738 /// positions in the destination.
739 /// \returns A 64-bit integer vector containing the resultant values.
740 static __inline__ __m64 __DEFAULT_FN_ATTRS
741 _mm_sign_pi16(__m64 __a, __m64 __b)
743 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
746 /// \brief For each 32-bit integer in the first source operand, perform one of
747 /// the following actions as specified by the second source operand: If the
748 /// doubleword in the second source is negative, calculate the two's
749 /// complement of the corresponding doubleword in the first source, and
750 /// write that value to the destination. If the doubleword in the second
751 /// source is positive, copy the corresponding doubleword from the first
752 /// source to the destination. If the doubleword in the second source is
753 /// zero, clear the corresponding doubleword in the destination.
755 /// \headerfile <x86intrin.h>
757 /// This intrinsic corresponds to the \c PSIGND instruction.
759 /// \param __a
760 /// A 64-bit integer vector containing the values to be copied.
761 /// \param __b
762 /// A 64-bit integer vector containing two control doublewords corresponding
763 /// to positions in the destination.
764 /// \returns A 64-bit integer vector containing the resultant values.
765 static __inline__ __m64 __DEFAULT_FN_ATTRS
766 _mm_sign_pi32(__m64 __a, __m64 __b)
768 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
771 #undef __DEFAULT_FN_ATTRS
773 #endif /* __TMMINTRIN_H */