1 /*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLBWINTRIN_H
29 #define __AVX512VLBWINTRIN_H
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
34 static __inline __m128i __DEFAULT_FN_ATTRS
36 return (__m128i
)(__v8hi
){ 0, 0, 0, 0, 0, 0, 0, 0 };
41 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
42 _mm_cmpeq_epi8_mask(__m128i __a
, __m128i __b
) {
43 return (__mmask16
)__builtin_ia32_pcmpeqb128_mask((__v16qi
)__a
, (__v16qi
)__b
,
47 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
48 _mm_mask_cmpeq_epi8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
49 return (__mmask16
)__builtin_ia32_pcmpeqb128_mask((__v16qi
)__a
, (__v16qi
)__b
,
53 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
54 _mm_cmpeq_epu8_mask(__m128i __a
, __m128i __b
) {
55 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 0,
59 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
60 _mm_mask_cmpeq_epu8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
61 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 0,
65 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
66 _mm256_cmpeq_epi8_mask(__m256i __a
, __m256i __b
) {
67 return (__mmask32
)__builtin_ia32_pcmpeqb256_mask((__v32qi
)__a
, (__v32qi
)__b
,
71 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
72 _mm256_mask_cmpeq_epi8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
73 return (__mmask32
)__builtin_ia32_pcmpeqb256_mask((__v32qi
)__a
, (__v32qi
)__b
,
77 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
78 _mm256_cmpeq_epu8_mask(__m256i __a
, __m256i __b
) {
79 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 0,
83 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
84 _mm256_mask_cmpeq_epu8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
85 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 0,
89 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
90 _mm_cmpeq_epi16_mask(__m128i __a
, __m128i __b
) {
91 return (__mmask8
)__builtin_ia32_pcmpeqw128_mask((__v8hi
)__a
, (__v8hi
)__b
,
95 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
96 _mm_mask_cmpeq_epi16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
97 return (__mmask8
)__builtin_ia32_pcmpeqw128_mask((__v8hi
)__a
, (__v8hi
)__b
,
101 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
102 _mm_cmpeq_epu16_mask(__m128i __a
, __m128i __b
) {
103 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 0,
107 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
108 _mm_mask_cmpeq_epu16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
109 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 0,
113 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
114 _mm256_cmpeq_epi16_mask(__m256i __a
, __m256i __b
) {
115 return (__mmask16
)__builtin_ia32_pcmpeqw256_mask((__v16hi
)__a
, (__v16hi
)__b
,
119 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
120 _mm256_mask_cmpeq_epi16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
121 return (__mmask16
)__builtin_ia32_pcmpeqw256_mask((__v16hi
)__a
, (__v16hi
)__b
,
125 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
126 _mm256_cmpeq_epu16_mask(__m256i __a
, __m256i __b
) {
127 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 0,
131 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
132 _mm256_mask_cmpeq_epu16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
133 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 0,
137 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
138 _mm_cmpge_epi8_mask(__m128i __a
, __m128i __b
) {
139 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 5,
143 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
144 _mm_mask_cmpge_epi8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
145 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 5,
149 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
150 _mm_cmpge_epu8_mask(__m128i __a
, __m128i __b
) {
151 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 5,
155 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
156 _mm_mask_cmpge_epu8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
157 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 5,
161 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
162 _mm256_cmpge_epi8_mask(__m256i __a
, __m256i __b
) {
163 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 5,
167 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
168 _mm256_mask_cmpge_epi8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
169 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 5,
173 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
174 _mm256_cmpge_epu8_mask(__m256i __a
, __m256i __b
) {
175 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 5,
179 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
180 _mm256_mask_cmpge_epu8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
181 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 5,
185 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
186 _mm_cmpge_epi16_mask(__m128i __a
, __m128i __b
) {
187 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 5,
191 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
192 _mm_mask_cmpge_epi16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
193 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 5,
197 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
198 _mm_cmpge_epu16_mask(__m128i __a
, __m128i __b
) {
199 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 5,
203 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
204 _mm_mask_cmpge_epu16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
205 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 5,
209 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
210 _mm256_cmpge_epi16_mask(__m256i __a
, __m256i __b
) {
211 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 5,
215 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
216 _mm256_mask_cmpge_epi16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
217 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 5,
221 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
222 _mm256_cmpge_epu16_mask(__m256i __a
, __m256i __b
) {
223 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 5,
227 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
228 _mm256_mask_cmpge_epu16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
229 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 5,
233 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
234 _mm_cmpgt_epi8_mask(__m128i __a
, __m128i __b
) {
235 return (__mmask16
)__builtin_ia32_pcmpgtb128_mask((__v16qi
)__a
, (__v16qi
)__b
,
239 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
240 _mm_mask_cmpgt_epi8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
241 return (__mmask16
)__builtin_ia32_pcmpgtb128_mask((__v16qi
)__a
, (__v16qi
)__b
,
245 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
246 _mm_cmpgt_epu8_mask(__m128i __a
, __m128i __b
) {
247 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 6,
251 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
252 _mm_mask_cmpgt_epu8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
253 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 6,
257 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
258 _mm256_cmpgt_epi8_mask(__m256i __a
, __m256i __b
) {
259 return (__mmask32
)__builtin_ia32_pcmpgtb256_mask((__v32qi
)__a
, (__v32qi
)__b
,
263 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
264 _mm256_mask_cmpgt_epi8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
265 return (__mmask32
)__builtin_ia32_pcmpgtb256_mask((__v32qi
)__a
, (__v32qi
)__b
,
269 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
270 _mm256_cmpgt_epu8_mask(__m256i __a
, __m256i __b
) {
271 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 6,
275 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
276 _mm256_mask_cmpgt_epu8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
277 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 6,
281 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
282 _mm_cmpgt_epi16_mask(__m128i __a
, __m128i __b
) {
283 return (__mmask8
)__builtin_ia32_pcmpgtw128_mask((__v8hi
)__a
, (__v8hi
)__b
,
287 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
288 _mm_mask_cmpgt_epi16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
289 return (__mmask8
)__builtin_ia32_pcmpgtw128_mask((__v8hi
)__a
, (__v8hi
)__b
,
293 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
294 _mm_cmpgt_epu16_mask(__m128i __a
, __m128i __b
) {
295 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 6,
299 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
300 _mm_mask_cmpgt_epu16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
301 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 6,
305 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
306 _mm256_cmpgt_epi16_mask(__m256i __a
, __m256i __b
) {
307 return (__mmask16
)__builtin_ia32_pcmpgtw256_mask((__v16hi
)__a
, (__v16hi
)__b
,
311 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
312 _mm256_mask_cmpgt_epi16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
313 return (__mmask16
)__builtin_ia32_pcmpgtw256_mask((__v16hi
)__a
, (__v16hi
)__b
,
317 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
318 _mm256_cmpgt_epu16_mask(__m256i __a
, __m256i __b
) {
319 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 6,
323 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
324 _mm256_mask_cmpgt_epu16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
325 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 6,
329 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
330 _mm_cmple_epi8_mask(__m128i __a
, __m128i __b
) {
331 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 2,
335 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
336 _mm_mask_cmple_epi8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
337 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 2,
341 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
342 _mm_cmple_epu8_mask(__m128i __a
, __m128i __b
) {
343 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 2,
347 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
348 _mm_mask_cmple_epu8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
349 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 2,
353 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
354 _mm256_cmple_epi8_mask(__m256i __a
, __m256i __b
) {
355 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 2,
359 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
360 _mm256_mask_cmple_epi8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
361 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 2,
365 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
366 _mm256_cmple_epu8_mask(__m256i __a
, __m256i __b
) {
367 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 2,
371 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
372 _mm256_mask_cmple_epu8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
373 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 2,
377 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
378 _mm_cmple_epi16_mask(__m128i __a
, __m128i __b
) {
379 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 2,
383 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
384 _mm_mask_cmple_epi16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
385 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 2,
389 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
390 _mm_cmple_epu16_mask(__m128i __a
, __m128i __b
) {
391 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 2,
395 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
396 _mm_mask_cmple_epu16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
397 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 2,
401 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
402 _mm256_cmple_epi16_mask(__m256i __a
, __m256i __b
) {
403 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 2,
407 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
408 _mm256_mask_cmple_epi16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
409 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 2,
413 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
414 _mm256_cmple_epu16_mask(__m256i __a
, __m256i __b
) {
415 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 2,
419 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
420 _mm256_mask_cmple_epu16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
421 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 2,
425 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
426 _mm_cmplt_epi8_mask(__m128i __a
, __m128i __b
) {
427 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 1,
431 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
432 _mm_mask_cmplt_epi8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
433 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 1,
437 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
438 _mm_cmplt_epu8_mask(__m128i __a
, __m128i __b
) {
439 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 1,
443 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
444 _mm_mask_cmplt_epu8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
445 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 1,
449 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
450 _mm256_cmplt_epi8_mask(__m256i __a
, __m256i __b
) {
451 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 1,
455 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
456 _mm256_mask_cmplt_epi8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
457 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 1,
461 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
462 _mm256_cmplt_epu8_mask(__m256i __a
, __m256i __b
) {
463 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 1,
467 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
468 _mm256_mask_cmplt_epu8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
469 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 1,
473 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
474 _mm_cmplt_epi16_mask(__m128i __a
, __m128i __b
) {
475 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 1,
479 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
480 _mm_mask_cmplt_epi16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
481 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 1,
485 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
486 _mm_cmplt_epu16_mask(__m128i __a
, __m128i __b
) {
487 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 1,
491 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
492 _mm_mask_cmplt_epu16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
493 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 1,
497 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
498 _mm256_cmplt_epi16_mask(__m256i __a
, __m256i __b
) {
499 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 1,
503 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
504 _mm256_mask_cmplt_epi16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
505 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 1,
509 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
510 _mm256_cmplt_epu16_mask(__m256i __a
, __m256i __b
) {
511 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 1,
515 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
516 _mm256_mask_cmplt_epu16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
517 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 1,
521 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
522 _mm_cmpneq_epi8_mask(__m128i __a
, __m128i __b
) {
523 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 4,
527 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
528 _mm_mask_cmpneq_epi8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
529 return (__mmask16
)__builtin_ia32_cmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 4,
533 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
534 _mm_cmpneq_epu8_mask(__m128i __a
, __m128i __b
) {
535 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 4,
539 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
540 _mm_mask_cmpneq_epu8_mask(__mmask16 __u
, __m128i __a
, __m128i __b
) {
541 return (__mmask16
)__builtin_ia32_ucmpb128_mask((__v16qi
)__a
, (__v16qi
)__b
, 4,
545 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
546 _mm256_cmpneq_epi8_mask(__m256i __a
, __m256i __b
) {
547 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 4,
551 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
552 _mm256_mask_cmpneq_epi8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
553 return (__mmask32
)__builtin_ia32_cmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 4,
557 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
558 _mm256_cmpneq_epu8_mask(__m256i __a
, __m256i __b
) {
559 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 4,
563 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
564 _mm256_mask_cmpneq_epu8_mask(__mmask32 __u
, __m256i __a
, __m256i __b
) {
565 return (__mmask32
)__builtin_ia32_ucmpb256_mask((__v32qi
)__a
, (__v32qi
)__b
, 4,
569 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
570 _mm_cmpneq_epi16_mask(__m128i __a
, __m128i __b
) {
571 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 4,
575 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
576 _mm_mask_cmpneq_epi16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
577 return (__mmask8
)__builtin_ia32_cmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 4,
581 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
582 _mm_cmpneq_epu16_mask(__m128i __a
, __m128i __b
) {
583 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 4,
587 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
588 _mm_mask_cmpneq_epu16_mask(__mmask8 __u
, __m128i __a
, __m128i __b
) {
589 return (__mmask8
)__builtin_ia32_ucmpw128_mask((__v8hi
)__a
, (__v8hi
)__b
, 4,
593 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
594 _mm256_cmpneq_epi16_mask(__m256i __a
, __m256i __b
) {
595 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 4,
599 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
600 _mm256_mask_cmpneq_epi16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
601 return (__mmask16
)__builtin_ia32_cmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 4,
605 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
606 _mm256_cmpneq_epu16_mask(__m256i __a
, __m256i __b
) {
607 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 4,
611 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
612 _mm256_mask_cmpneq_epu16_mask(__mmask16 __u
, __m256i __a
, __m256i __b
) {
613 return (__mmask16
)__builtin_ia32_ucmpw256_mask((__v16hi
)__a
, (__v16hi
)__b
, 4,
617 static __inline__ __m256i __DEFAULT_FN_ATTRS
618 _mm256_mask_add_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
){
619 return (__m256i
) __builtin_ia32_paddb256_mask ((__v32qi
) __A
,
625 static __inline__ __m256i __DEFAULT_FN_ATTRS
626 _mm256_maskz_add_epi8 (__mmask32 __U
, __m256i __A
, __m256i __B
) {
627 return (__m256i
) __builtin_ia32_paddb256_mask ((__v32qi
) __A
,
630 _mm256_setzero_si256 (),
634 static __inline__ __m256i __DEFAULT_FN_ATTRS
635 _mm256_mask_add_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
636 return (__m256i
) __builtin_ia32_paddw256_mask ((__v16hi
) __A
,
642 static __inline__ __m256i __DEFAULT_FN_ATTRS
643 _mm256_maskz_add_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
) {
644 return (__m256i
) __builtin_ia32_paddw256_mask ((__v16hi
) __A
,
647 _mm256_setzero_si256 (),
651 static __inline__ __m256i __DEFAULT_FN_ATTRS
652 _mm256_mask_sub_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
653 return (__m256i
) __builtin_ia32_psubb256_mask ((__v32qi
) __A
,
659 static __inline__ __m256i __DEFAULT_FN_ATTRS
660 _mm256_maskz_sub_epi8 (__mmask32 __U
, __m256i __A
, __m256i __B
) {
661 return (__m256i
) __builtin_ia32_psubb256_mask ((__v32qi
) __A
,
664 _mm256_setzero_si256 (),
668 static __inline__ __m256i __DEFAULT_FN_ATTRS
669 _mm256_mask_sub_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
670 return (__m256i
) __builtin_ia32_psubw256_mask ((__v16hi
) __A
,
676 static __inline__ __m256i __DEFAULT_FN_ATTRS
677 _mm256_maskz_sub_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
) {
678 return (__m256i
) __builtin_ia32_psubw256_mask ((__v16hi
) __A
,
681 _mm256_setzero_si256 (),
684 static __inline__ __m128i __DEFAULT_FN_ATTRS
685 _mm_mask_add_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
686 return (__m128i
) __builtin_ia32_paddb128_mask ((__v16qi
) __A
,
692 static __inline__ __m128i __DEFAULT_FN_ATTRS
693 _mm_maskz_add_epi8 (__mmask16 __U
, __m128i __A
, __m128i __B
) {
694 return (__m128i
) __builtin_ia32_paddb128_mask ((__v16qi
) __A
,
697 _mm_setzero_si128 (),
701 static __inline__ __m128i __DEFAULT_FN_ATTRS
702 _mm_mask_add_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
703 return (__m128i
) __builtin_ia32_paddw128_mask ((__v8hi
) __A
,
709 static __inline__ __m128i __DEFAULT_FN_ATTRS
710 _mm_maskz_add_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
) {
711 return (__m128i
) __builtin_ia32_paddw128_mask ((__v8hi
) __A
,
714 _mm_setzero_si128 (),
718 static __inline__ __m128i __DEFAULT_FN_ATTRS
719 _mm_mask_sub_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
720 return (__m128i
) __builtin_ia32_psubb128_mask ((__v16qi
) __A
,
726 static __inline__ __m128i __DEFAULT_FN_ATTRS
727 _mm_maskz_sub_epi8 (__mmask16 __U
, __m128i __A
, __m128i __B
) {
728 return (__m128i
) __builtin_ia32_psubb128_mask ((__v16qi
) __A
,
731 _mm_setzero_si128 (),
735 static __inline__ __m128i __DEFAULT_FN_ATTRS
736 _mm_mask_sub_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
737 return (__m128i
) __builtin_ia32_psubw128_mask ((__v8hi
) __A
,
743 static __inline__ __m128i __DEFAULT_FN_ATTRS
744 _mm_maskz_sub_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
) {
745 return (__m128i
) __builtin_ia32_psubw128_mask ((__v8hi
) __A
,
748 _mm_setzero_si128 (),
752 static __inline__ __m256i __DEFAULT_FN_ATTRS
753 _mm256_mask_mullo_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
754 return (__m256i
) __builtin_ia32_pmullw256_mask ((__v16hi
) __A
,
760 static __inline__ __m256i __DEFAULT_FN_ATTRS
761 _mm256_maskz_mullo_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
) {
762 return (__m256i
) __builtin_ia32_pmullw256_mask ((__v16hi
) __A
,
765 _mm256_setzero_si256 (),
769 static __inline__ __m128i __DEFAULT_FN_ATTRS
770 _mm_mask_mullo_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
771 return (__m128i
) __builtin_ia32_pmullw128_mask ((__v8hi
) __A
,
777 static __inline__ __m128i __DEFAULT_FN_ATTRS
778 _mm_maskz_mullo_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
) {
779 return (__m128i
) __builtin_ia32_pmullw128_mask ((__v8hi
) __A
,
782 _mm_setzero_si128 (),
786 static __inline__ __m128i __DEFAULT_FN_ATTRS
787 _mm_mask_blend_epi8 (__mmask16 __U
, __m128i __A
, __m128i __W
)
789 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
794 static __inline__ __m256i __DEFAULT_FN_ATTRS
795 _mm256_mask_blend_epi8 (__mmask32 __U
, __m256i __A
, __m256i __W
)
797 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
802 static __inline__ __m128i __DEFAULT_FN_ATTRS
803 _mm_mask_blend_epi16 (__mmask8 __U
, __m128i __A
, __m128i __W
)
805 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
810 static __inline__ __m256i __DEFAULT_FN_ATTRS
811 _mm256_mask_blend_epi16 (__mmask16 __U
, __m256i __A
, __m256i __W
)
813 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
818 static __inline__ __m128i __DEFAULT_FN_ATTRS
819 _mm_mask_abs_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
)
821 return (__m128i
) __builtin_ia32_pabsb128_mask ((__v16qi
) __A
,
826 static __inline__ __m128i __DEFAULT_FN_ATTRS
827 _mm_maskz_abs_epi8 (__mmask16 __U
, __m128i __A
)
829 return (__m128i
) __builtin_ia32_pabsb128_mask ((__v16qi
) __A
,
830 (__v16qi
) _mm_setzero_si128 (),
834 static __inline__ __m256i __DEFAULT_FN_ATTRS
835 _mm256_mask_abs_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
)
837 return (__m256i
) __builtin_ia32_pabsb256_mask ((__v32qi
) __A
,
842 static __inline__ __m256i __DEFAULT_FN_ATTRS
843 _mm256_maskz_abs_epi8 (__mmask32 __U
, __m256i __A
)
845 return (__m256i
) __builtin_ia32_pabsb256_mask ((__v32qi
) __A
,
846 (__v32qi
) _mm256_setzero_si256 (),
850 static __inline__ __m128i __DEFAULT_FN_ATTRS
851 _mm_mask_abs_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
)
853 return (__m128i
) __builtin_ia32_pabsw128_mask ((__v8hi
) __A
,
858 static __inline__ __m128i __DEFAULT_FN_ATTRS
859 _mm_maskz_abs_epi16 (__mmask8 __U
, __m128i __A
)
861 return (__m128i
) __builtin_ia32_pabsw128_mask ((__v8hi
) __A
,
862 (__v8hi
) _mm_setzero_si128 (),
866 static __inline__ __m256i __DEFAULT_FN_ATTRS
867 _mm256_mask_abs_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
)
869 return (__m256i
) __builtin_ia32_pabsw256_mask ((__v16hi
) __A
,
874 static __inline__ __m256i __DEFAULT_FN_ATTRS
875 _mm256_maskz_abs_epi16 (__mmask16 __U
, __m256i __A
)
877 return (__m256i
) __builtin_ia32_pabsw256_mask ((__v16hi
) __A
,
878 (__v16hi
) _mm256_setzero_si256 (),
882 static __inline__ __m128i __DEFAULT_FN_ATTRS
883 _mm_maskz_packs_epi32 (__mmask8 __M
, __m128i __A
, __m128i __B
)
885 return (__m128i
) __builtin_ia32_packssdw128_mask ((__v4si
) __A
,
887 (__v8hi
) _mm_setzero_si128 (), __M
);
890 static __inline__ __m128i __DEFAULT_FN_ATTRS
891 _mm_mask_packs_epi32 (__m128i __W
, __mmask16 __M
, __m128i __A
,
894 return (__m128i
) __builtin_ia32_packssdw128_mask ((__v4si
) __A
,
899 static __inline__ __m256i __DEFAULT_FN_ATTRS
900 _mm256_maskz_packs_epi32 (__mmask16 __M
, __m256i __A
, __m256i __B
)
902 return (__m256i
) __builtin_ia32_packssdw256_mask ((__v8si
) __A
,
904 (__v16hi
) _mm256_setzero_si256 (),
908 static __inline__ __m256i __DEFAULT_FN_ATTRS
909 _mm256_mask_packs_epi32 (__m256i __W
, __mmask16 __M
, __m256i __A
,
912 return (__m256i
) __builtin_ia32_packssdw256_mask ((__v8si
) __A
,
917 static __inline__ __m128i __DEFAULT_FN_ATTRS
918 _mm_maskz_packs_epi16 (__mmask16 __M
, __m128i __A
, __m128i __B
)
920 return (__m128i
) __builtin_ia32_packsswb128_mask ((__v8hi
) __A
,
922 (__v16qi
) _mm_setzero_si128 (),
926 static __inline__ __m128i __DEFAULT_FN_ATTRS
927 _mm_mask_packs_epi16 (__m128i __W
, __mmask16 __M
, __m128i __A
,
930 return (__m128i
) __builtin_ia32_packsswb128_mask ((__v8hi
) __A
,
936 static __inline__ __m256i __DEFAULT_FN_ATTRS
937 _mm256_maskz_packs_epi16 (__mmask32 __M
, __m256i __A
, __m256i __B
)
939 return (__m256i
) __builtin_ia32_packsswb256_mask ((__v16hi
) __A
,
941 (__v32qi
) _mm256_setzero_si256 (),
945 static __inline__ __m256i __DEFAULT_FN_ATTRS
946 _mm256_mask_packs_epi16 (__m256i __W
, __mmask32 __M
, __m256i __A
,
949 return (__m256i
) __builtin_ia32_packsswb256_mask ((__v16hi
) __A
,
955 static __inline__ __m128i __DEFAULT_FN_ATTRS
956 _mm_maskz_packus_epi32 (__mmask8 __M
, __m128i __A
, __m128i __B
)
958 return (__m128i
) __builtin_ia32_packusdw128_mask ((__v4si
) __A
,
960 (__v8hi
) _mm_setzero_si128 (),
964 static __inline__ __m128i __DEFAULT_FN_ATTRS
965 _mm_mask_packus_epi32 (__m128i __W
, __mmask16 __M
, __m128i __A
,
968 return (__m128i
) __builtin_ia32_packusdw128_mask ((__v4si
) __A
,
973 static __inline__ __m256i __DEFAULT_FN_ATTRS
974 _mm256_maskz_packus_epi32 (__mmask16 __M
, __m256i __A
, __m256i __B
)
976 return (__m256i
) __builtin_ia32_packusdw256_mask ((__v8si
) __A
,
978 (__v16hi
) _mm256_setzero_si256 (),
982 static __inline__ __m256i __DEFAULT_FN_ATTRS
983 _mm256_mask_packus_epi32 (__m256i __W
, __mmask16 __M
, __m256i __A
,
986 return (__m256i
) __builtin_ia32_packusdw256_mask ((__v8si
) __A
,
992 static __inline__ __m128i __DEFAULT_FN_ATTRS
993 _mm_maskz_packus_epi16 (__mmask16 __M
, __m128i __A
, __m128i __B
)
995 return (__m128i
) __builtin_ia32_packuswb128_mask ((__v8hi
) __A
,
997 (__v16qi
) _mm_setzero_si128 (),
1001 static __inline__ __m128i __DEFAULT_FN_ATTRS
1002 _mm_mask_packus_epi16 (__m128i __W
, __mmask16 __M
, __m128i __A
,
1005 return (__m128i
) __builtin_ia32_packuswb128_mask ((__v8hi
) __A
,
1011 static __inline__ __m256i __DEFAULT_FN_ATTRS
1012 _mm256_maskz_packus_epi16 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1014 return (__m256i
) __builtin_ia32_packuswb256_mask ((__v16hi
) __A
,
1016 (__v32qi
) _mm256_setzero_si256 (),
1020 static __inline__ __m256i __DEFAULT_FN_ATTRS
1021 _mm256_mask_packus_epi16 (__m256i __W
, __mmask32 __M
, __m256i __A
,
1024 return (__m256i
) __builtin_ia32_packuswb256_mask ((__v16hi
) __A
,
1030 static __inline__ __m128i __DEFAULT_FN_ATTRS
1031 _mm_mask_adds_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
,
1034 return (__m128i
) __builtin_ia32_paddsb128_mask ((__v16qi
) __A
,
1040 static __inline__ __m128i __DEFAULT_FN_ATTRS
1041 _mm_maskz_adds_epi8 (__mmask16 __U
, __m128i __A
, __m128i __B
)
1043 return (__m128i
) __builtin_ia32_paddsb128_mask ((__v16qi
) __A
,
1045 (__v16qi
) _mm_setzero_si128 (),
1049 static __inline__ __m256i __DEFAULT_FN_ATTRS
1050 _mm256_mask_adds_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
,
1053 return (__m256i
) __builtin_ia32_paddsb256_mask ((__v32qi
) __A
,
1059 static __inline__ __m256i __DEFAULT_FN_ATTRS
1060 _mm256_maskz_adds_epi8 (__mmask32 __U
, __m256i __A
, __m256i __B
)
1062 return (__m256i
) __builtin_ia32_paddsb256_mask ((__v32qi
) __A
,
1064 (__v32qi
) _mm256_setzero_si256 (),
1068 static __inline__ __m128i __DEFAULT_FN_ATTRS
1069 _mm_mask_adds_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
1072 return (__m128i
) __builtin_ia32_paddsw128_mask ((__v8hi
) __A
,
1078 static __inline__ __m128i __DEFAULT_FN_ATTRS
1079 _mm_maskz_adds_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1081 return (__m128i
) __builtin_ia32_paddsw128_mask ((__v8hi
) __A
,
1083 (__v8hi
) _mm_setzero_si128 (),
1087 static __inline__ __m256i __DEFAULT_FN_ATTRS
1088 _mm256_mask_adds_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
1091 return (__m256i
) __builtin_ia32_paddsw256_mask ((__v16hi
) __A
,
1097 static __inline__ __m256i __DEFAULT_FN_ATTRS
1098 _mm256_maskz_adds_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
1100 return (__m256i
) __builtin_ia32_paddsw256_mask ((__v16hi
) __A
,
1102 (__v16hi
) _mm256_setzero_si256 (),
1106 static __inline__ __m128i __DEFAULT_FN_ATTRS
1107 _mm_mask_adds_epu8 (__m128i __W
, __mmask16 __U
, __m128i __A
,
1110 return (__m128i
) __builtin_ia32_paddusb128_mask ((__v16qi
) __A
,
1116 static __inline__ __m128i __DEFAULT_FN_ATTRS
1117 _mm_maskz_adds_epu8 (__mmask16 __U
, __m128i __A
, __m128i __B
)
1119 return (__m128i
) __builtin_ia32_paddusb128_mask ((__v16qi
) __A
,
1121 (__v16qi
) _mm_setzero_si128 (),
1125 static __inline__ __m256i __DEFAULT_FN_ATTRS
1126 _mm256_mask_adds_epu8 (__m256i __W
, __mmask32 __U
, __m256i __A
,
1129 return (__m256i
) __builtin_ia32_paddusb256_mask ((__v32qi
) __A
,
1135 static __inline__ __m256i __DEFAULT_FN_ATTRS
1136 _mm256_maskz_adds_epu8 (__mmask32 __U
, __m256i __A
, __m256i __B
)
1138 return (__m256i
) __builtin_ia32_paddusb256_mask ((__v32qi
) __A
,
1140 (__v32qi
) _mm256_setzero_si256 (),
1144 static __inline__ __m128i __DEFAULT_FN_ATTRS
1145 _mm_mask_adds_epu16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
1148 return (__m128i
) __builtin_ia32_paddusw128_mask ((__v8hi
) __A
,
1154 static __inline__ __m128i __DEFAULT_FN_ATTRS
1155 _mm_maskz_adds_epu16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1157 return (__m128i
) __builtin_ia32_paddusw128_mask ((__v8hi
) __A
,
1159 (__v8hi
) _mm_setzero_si128 (),
1163 static __inline__ __m256i __DEFAULT_FN_ATTRS
1164 _mm256_mask_adds_epu16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
1167 return (__m256i
) __builtin_ia32_paddusw256_mask ((__v16hi
) __A
,
1173 static __inline__ __m256i __DEFAULT_FN_ATTRS
1174 _mm256_maskz_adds_epu16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
1176 return (__m256i
) __builtin_ia32_paddusw256_mask ((__v16hi
) __A
,
1178 (__v16hi
) _mm256_setzero_si256 (),
1182 static __inline__ __m128i __DEFAULT_FN_ATTRS
1183 _mm_mask_avg_epu8 (__m128i __W
, __mmask16 __U
, __m128i __A
,
1186 return (__m128i
) __builtin_ia32_pavgb128_mask ((__v16qi
) __A
,
1192 static __inline__ __m128i __DEFAULT_FN_ATTRS
1193 _mm_maskz_avg_epu8 (__mmask16 __U
, __m128i __A
, __m128i __B
)
1195 return (__m128i
) __builtin_ia32_pavgb128_mask ((__v16qi
) __A
,
1197 (__v16qi
) _mm_setzero_si128 (),
1201 static __inline__ __m256i __DEFAULT_FN_ATTRS
1202 _mm256_mask_avg_epu8 (__m256i __W
, __mmask32 __U
, __m256i __A
,
1205 return (__m256i
) __builtin_ia32_pavgb256_mask ((__v32qi
) __A
,
1211 static __inline__ __m256i __DEFAULT_FN_ATTRS
1212 _mm256_maskz_avg_epu8 (__mmask32 __U
, __m256i __A
, __m256i __B
)
1214 return (__m256i
) __builtin_ia32_pavgb256_mask ((__v32qi
) __A
,
1216 (__v32qi
) _mm256_setzero_si256 (),
1220 static __inline__ __m128i __DEFAULT_FN_ATTRS
1221 _mm_mask_avg_epu16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
1224 return (__m128i
) __builtin_ia32_pavgw128_mask ((__v8hi
) __A
,
1230 static __inline__ __m128i __DEFAULT_FN_ATTRS
1231 _mm_maskz_avg_epu16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1233 return (__m128i
) __builtin_ia32_pavgw128_mask ((__v8hi
) __A
,
1235 (__v8hi
) _mm_setzero_si128 (),
1239 static __inline__ __m256i __DEFAULT_FN_ATTRS
1240 _mm256_mask_avg_epu16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
1243 return (__m256i
) __builtin_ia32_pavgw256_mask ((__v16hi
) __A
,
1249 static __inline__ __m256i __DEFAULT_FN_ATTRS
1250 _mm256_maskz_avg_epu16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
1252 return (__m256i
) __builtin_ia32_pavgw256_mask ((__v16hi
) __A
,
1254 (__v16hi
) _mm256_setzero_si256 (),
1258 static __inline__ __m128i __DEFAULT_FN_ATTRS
1259 _mm_maskz_max_epi8 (__mmask16 __M
, __m128i __A
, __m128i __B
)
1261 return (__m128i
) __builtin_ia32_pmaxsb128_mask ((__v16qi
) __A
,
1263 (__v16qi
) _mm_setzero_si128 (),
1267 static __inline__ __m128i __DEFAULT_FN_ATTRS
1268 _mm_mask_max_epi8 (__m128i __W
, __mmask16 __M
, __m128i __A
,
1271 return (__m128i
) __builtin_ia32_pmaxsb128_mask ((__v16qi
) __A
,
1277 static __inline__ __m256i __DEFAULT_FN_ATTRS
1278 _mm256_maskz_max_epi8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1280 return (__m256i
) __builtin_ia32_pmaxsb256_mask ((__v32qi
) __A
,
1282 (__v32qi
) _mm256_setzero_si256 (),
1286 static __inline__ __m256i __DEFAULT_FN_ATTRS
1287 _mm256_mask_max_epi8 (__m256i __W
, __mmask32 __M
, __m256i __A
,
1290 return (__m256i
) __builtin_ia32_pmaxsb256_mask ((__v32qi
) __A
,
1296 static __inline__ __m128i __DEFAULT_FN_ATTRS
1297 _mm_maskz_max_epi16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
1299 return (__m128i
) __builtin_ia32_pmaxsw128_mask ((__v8hi
) __A
,
1301 (__v8hi
) _mm_setzero_si128 (),
1305 static __inline__ __m128i __DEFAULT_FN_ATTRS
1306 _mm_mask_max_epi16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
1309 return (__m128i
) __builtin_ia32_pmaxsw128_mask ((__v8hi
) __A
,
1315 static __inline__ __m256i __DEFAULT_FN_ATTRS
1316 _mm256_maskz_max_epi16 (__mmask16 __M
, __m256i __A
, __m256i __B
)
1318 return (__m256i
) __builtin_ia32_pmaxsw256_mask ((__v16hi
) __A
,
1320 (__v16hi
) _mm256_setzero_si256 (),
1324 static __inline__ __m256i __DEFAULT_FN_ATTRS
1325 _mm256_mask_max_epi16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
1328 return (__m256i
) __builtin_ia32_pmaxsw256_mask ((__v16hi
) __A
,
1334 static __inline__ __m128i __DEFAULT_FN_ATTRS
1335 _mm_maskz_max_epu8 (__mmask16 __M
, __m128i __A
, __m128i __B
)
1337 return (__m128i
) __builtin_ia32_pmaxub128_mask ((__v16qi
) __A
,
1339 (__v16qi
) _mm_setzero_si128 (),
1343 static __inline__ __m128i __DEFAULT_FN_ATTRS
1344 _mm_mask_max_epu8 (__m128i __W
, __mmask16 __M
, __m128i __A
,
1347 return (__m128i
) __builtin_ia32_pmaxub128_mask ((__v16qi
) __A
,
1353 static __inline__ __m256i __DEFAULT_FN_ATTRS
1354 _mm256_maskz_max_epu8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1356 return (__m256i
) __builtin_ia32_pmaxub256_mask ((__v32qi
) __A
,
1358 (__v32qi
) _mm256_setzero_si256 (),
1362 static __inline__ __m256i __DEFAULT_FN_ATTRS
1363 _mm256_mask_max_epu8 (__m256i __W
, __mmask32 __M
, __m256i __A
,
1366 return (__m256i
) __builtin_ia32_pmaxub256_mask ((__v32qi
) __A
,
1372 static __inline__ __m128i __DEFAULT_FN_ATTRS
1373 _mm_maskz_max_epu16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
1375 return (__m128i
) __builtin_ia32_pmaxuw128_mask ((__v8hi
) __A
,
1377 (__v8hi
) _mm_setzero_si128 (),
1381 static __inline__ __m128i __DEFAULT_FN_ATTRS
1382 _mm_mask_max_epu16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
1385 return (__m128i
) __builtin_ia32_pmaxuw128_mask ((__v8hi
) __A
,
1391 static __inline__ __m256i __DEFAULT_FN_ATTRS
1392 _mm256_maskz_max_epu16 (__mmask16 __M
, __m256i __A
, __m256i __B
)
1394 return (__m256i
) __builtin_ia32_pmaxuw256_mask ((__v16hi
) __A
,
1396 (__v16hi
) _mm256_setzero_si256 (),
1400 static __inline__ __m256i __DEFAULT_FN_ATTRS
1401 _mm256_mask_max_epu16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
1404 return (__m256i
) __builtin_ia32_pmaxuw256_mask ((__v16hi
) __A
,
1410 static __inline__ __m128i __DEFAULT_FN_ATTRS
1411 _mm_maskz_min_epi8 (__mmask16 __M
, __m128i __A
, __m128i __B
)
1413 return (__m128i
) __builtin_ia32_pminsb128_mask ((__v16qi
) __A
,
1415 (__v16qi
) _mm_setzero_si128 (),
1419 static __inline__ __m128i __DEFAULT_FN_ATTRS
1420 _mm_mask_min_epi8 (__m128i __W
, __mmask16 __M
, __m128i __A
,
1423 return (__m128i
) __builtin_ia32_pminsb128_mask ((__v16qi
) __A
,
1429 static __inline__ __m256i __DEFAULT_FN_ATTRS
1430 _mm256_maskz_min_epi8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1432 return (__m256i
) __builtin_ia32_pminsb256_mask ((__v32qi
) __A
,
1434 (__v32qi
) _mm256_setzero_si256 (),
1438 static __inline__ __m256i __DEFAULT_FN_ATTRS
1439 _mm256_mask_min_epi8 (__m256i __W
, __mmask32 __M
, __m256i __A
,
1442 return (__m256i
) __builtin_ia32_pminsb256_mask ((__v32qi
) __A
,
1448 static __inline__ __m128i __DEFAULT_FN_ATTRS
1449 _mm_maskz_min_epi16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
1451 return (__m128i
) __builtin_ia32_pminsw128_mask ((__v8hi
) __A
,
1453 (__v8hi
) _mm_setzero_si128 (),
1457 static __inline__ __m128i __DEFAULT_FN_ATTRS
1458 _mm_mask_min_epi16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
1461 return (__m128i
) __builtin_ia32_pminsw128_mask ((__v8hi
) __A
,
1467 static __inline__ __m256i __DEFAULT_FN_ATTRS
1468 _mm256_maskz_min_epi16 (__mmask16 __M
, __m256i __A
, __m256i __B
)
1470 return (__m256i
) __builtin_ia32_pminsw256_mask ((__v16hi
) __A
,
1472 (__v16hi
) _mm256_setzero_si256 (),
1476 static __inline__ __m256i __DEFAULT_FN_ATTRS
1477 _mm256_mask_min_epi16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
1480 return (__m256i
) __builtin_ia32_pminsw256_mask ((__v16hi
) __A
,
1486 static __inline__ __m128i __DEFAULT_FN_ATTRS
1487 _mm_maskz_min_epu8 (__mmask16 __M
, __m128i __A
, __m128i __B
)
1489 return (__m128i
) __builtin_ia32_pminub128_mask ((__v16qi
) __A
,
1491 (__v16qi
) _mm_setzero_si128 (),
1495 static __inline__ __m128i __DEFAULT_FN_ATTRS
1496 _mm_mask_min_epu8 (__m128i __W
, __mmask16 __M
, __m128i __A
,
1499 return (__m128i
) __builtin_ia32_pminub128_mask ((__v16qi
) __A
,
1505 static __inline__ __m256i __DEFAULT_FN_ATTRS
1506 _mm256_maskz_min_epu8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1508 return (__m256i
) __builtin_ia32_pminub256_mask ((__v32qi
) __A
,
1510 (__v32qi
) _mm256_setzero_si256 (),
1514 static __inline__ __m256i __DEFAULT_FN_ATTRS
1515 _mm256_mask_min_epu8 (__m256i __W
, __mmask32 __M
, __m256i __A
,
1518 return (__m256i
) __builtin_ia32_pminub256_mask ((__v32qi
) __A
,
1524 static __inline__ __m128i __DEFAULT_FN_ATTRS
1525 _mm_maskz_min_epu16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
1527 return (__m128i
) __builtin_ia32_pminuw128_mask ((__v8hi
) __A
,
1529 (__v8hi
) _mm_setzero_si128 (),
1533 static __inline__ __m128i __DEFAULT_FN_ATTRS
1534 _mm_mask_min_epu16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
1537 return (__m128i
) __builtin_ia32_pminuw128_mask ((__v8hi
) __A
,
1543 static __inline__ __m256i __DEFAULT_FN_ATTRS
1544 _mm256_maskz_min_epu16 (__mmask16 __M
, __m256i __A
, __m256i __B
)
1546 return (__m256i
) __builtin_ia32_pminuw256_mask ((__v16hi
) __A
,
1548 (__v16hi
) _mm256_setzero_si256 (),
1552 static __inline__ __m256i __DEFAULT_FN_ATTRS
1553 _mm256_mask_min_epu16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
1556 return (__m256i
) __builtin_ia32_pminuw256_mask ((__v16hi
) __A
,
1562 static __inline__ __m128i __DEFAULT_FN_ATTRS
1563 _mm_mask_shuffle_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
,
1566 return (__m128i
) __builtin_ia32_pshufb128_mask ((__v16qi
) __A
,
1572 static __inline__ __m128i __DEFAULT_FN_ATTRS
1573 _mm_maskz_shuffle_epi8 (__mmask16 __U
, __m128i __A
, __m128i __B
)
1575 return (__m128i
) __builtin_ia32_pshufb128_mask ((__v16qi
) __A
,
1577 (__v16qi
) _mm_setzero_si128 (),
1581 static __inline__ __m256i __DEFAULT_FN_ATTRS
1582 _mm256_mask_shuffle_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
,
1585 return (__m256i
) __builtin_ia32_pshufb256_mask ((__v32qi
) __A
,
1591 static __inline__ __m256i __DEFAULT_FN_ATTRS
1592 _mm256_maskz_shuffle_epi8 (__mmask32 __U
, __m256i __A
, __m256i __B
)
1594 return (__m256i
) __builtin_ia32_pshufb256_mask ((__v32qi
) __A
,
1596 (__v32qi
) _mm256_setzero_si256 (),
1600 static __inline__ __m128i __DEFAULT_FN_ATTRS
1601 _mm_mask_subs_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
,
1604 return (__m128i
) __builtin_ia32_psubsb128_mask ((__v16qi
) __A
,
1610 static __inline__ __m128i __DEFAULT_FN_ATTRS
1611 _mm_maskz_subs_epi8 (__mmask16 __U
, __m128i __A
, __m128i __B
)
1613 return (__m128i
) __builtin_ia32_psubsb128_mask ((__v16qi
) __A
,
1615 (__v16qi
) _mm_setzero_si128 (),
1619 static __inline__ __m256i __DEFAULT_FN_ATTRS
1620 _mm256_mask_subs_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
,
1623 return (__m256i
) __builtin_ia32_psubsb256_mask ((__v32qi
) __A
,
1629 static __inline__ __m256i __DEFAULT_FN_ATTRS
1630 _mm256_maskz_subs_epi8 (__mmask32 __U
, __m256i __A
, __m256i __B
)
1632 return (__m256i
) __builtin_ia32_psubsb256_mask ((__v32qi
) __A
,
1634 (__v32qi
) _mm256_setzero_si256 (),
1638 static __inline__ __m128i __DEFAULT_FN_ATTRS
1639 _mm_mask_subs_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
1642 return (__m128i
) __builtin_ia32_psubsw128_mask ((__v8hi
) __A
,
1648 static __inline__ __m128i __DEFAULT_FN_ATTRS
1649 _mm_maskz_subs_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1651 return (__m128i
) __builtin_ia32_psubsw128_mask ((__v8hi
) __A
,
1653 (__v8hi
) _mm_setzero_si128 (),
1657 static __inline__ __m256i __DEFAULT_FN_ATTRS
1658 _mm256_mask_subs_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
1661 return (__m256i
) __builtin_ia32_psubsw256_mask ((__v16hi
) __A
,
1667 static __inline__ __m256i __DEFAULT_FN_ATTRS
1668 _mm256_maskz_subs_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
1670 return (__m256i
) __builtin_ia32_psubsw256_mask ((__v16hi
) __A
,
1672 (__v16hi
) _mm256_setzero_si256 (),
1676 static __inline__ __m128i __DEFAULT_FN_ATTRS
1677 _mm_mask_subs_epu8 (__m128i __W
, __mmask16 __U
, __m128i __A
,
1680 return (__m128i
) __builtin_ia32_psubusb128_mask ((__v16qi
) __A
,
1686 static __inline__ __m128i __DEFAULT_FN_ATTRS
1687 _mm_maskz_subs_epu8 (__mmask16 __U
, __m128i __A
, __m128i __B
)
1689 return (__m128i
) __builtin_ia32_psubusb128_mask ((__v16qi
) __A
,
1691 (__v16qi
) _mm_setzero_si128 (),
1695 static __inline__ __m256i __DEFAULT_FN_ATTRS
1696 _mm256_mask_subs_epu8 (__m256i __W
, __mmask32 __U
, __m256i __A
,
1699 return (__m256i
) __builtin_ia32_psubusb256_mask ((__v32qi
) __A
,
1705 static __inline__ __m256i __DEFAULT_FN_ATTRS
1706 _mm256_maskz_subs_epu8 (__mmask32 __U
, __m256i __A
, __m256i __B
)
1708 return (__m256i
) __builtin_ia32_psubusb256_mask ((__v32qi
) __A
,
1710 (__v32qi
) _mm256_setzero_si256 (),
1714 static __inline__ __m128i __DEFAULT_FN_ATTRS
1715 _mm_mask_subs_epu16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
1718 return (__m128i
) __builtin_ia32_psubusw128_mask ((__v8hi
) __A
,
1724 static __inline__ __m128i __DEFAULT_FN_ATTRS
1725 _mm_maskz_subs_epu16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1727 return (__m128i
) __builtin_ia32_psubusw128_mask ((__v8hi
) __A
,
1729 (__v8hi
) _mm_setzero_si128 (),
1733 static __inline__ __m256i __DEFAULT_FN_ATTRS
1734 _mm256_mask_subs_epu16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
1737 return (__m256i
) __builtin_ia32_psubusw256_mask ((__v16hi
) __A
,
1743 static __inline__ __m256i __DEFAULT_FN_ATTRS
1744 _mm256_maskz_subs_epu16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
1746 return (__m256i
) __builtin_ia32_psubusw256_mask ((__v16hi
) __A
,
1748 (__v16hi
) _mm256_setzero_si256 (),
1752 static __inline__ __m128i __DEFAULT_FN_ATTRS
1753 _mm_mask2_permutex2var_epi16 (__m128i __A
, __m128i __I
, __mmask8 __U
,
1756 return (__m128i
) __builtin_ia32_vpermi2varhi128_mask ((__v8hi
) __A
,
1757 (__v8hi
) __I
/* idx */ ,
1762 static __inline__ __m256i __DEFAULT_FN_ATTRS
1763 _mm256_mask2_permutex2var_epi16 (__m256i __A
, __m256i __I
,
1764 __mmask16 __U
, __m256i __B
)
1766 return (__m256i
) __builtin_ia32_vpermi2varhi256_mask ((__v16hi
) __A
,
1767 (__v16hi
) __I
/* idx */ ,
1772 static __inline__ __m128i __DEFAULT_FN_ATTRS
1773 _mm_permutex2var_epi16 (__m128i __A
, __m128i __I
, __m128i __B
)
1775 return (__m128i
) __builtin_ia32_vpermt2varhi128_mask ((__v8hi
) __I
/* idx */,
1781 static __inline__ __m128i __DEFAULT_FN_ATTRS
1782 _mm_mask_permutex2var_epi16 (__m128i __A
, __mmask8 __U
, __m128i __I
,
1785 return (__m128i
) __builtin_ia32_vpermt2varhi128_mask ((__v8hi
) __I
/* idx */,
1791 static __inline__ __m128i __DEFAULT_FN_ATTRS
1792 _mm_maskz_permutex2var_epi16 (__mmask8 __U
, __m128i __A
, __m128i __I
,
1795 return (__m128i
) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi
) __I
/* idx */,
1801 static __inline__ __m256i __DEFAULT_FN_ATTRS
1802 _mm256_permutex2var_epi16 (__m256i __A
, __m256i __I
, __m256i __B
)
1804 return (__m256i
) __builtin_ia32_vpermt2varhi256_mask ((__v16hi
) __I
/* idx */,
1810 static __inline__ __m256i __DEFAULT_FN_ATTRS
1811 _mm256_mask_permutex2var_epi16 (__m256i __A
, __mmask16 __U
,
1812 __m256i __I
, __m256i __B
)
1814 return (__m256i
) __builtin_ia32_vpermt2varhi256_mask ((__v16hi
) __I
/* idx */,
1820 static __inline__ __m256i __DEFAULT_FN_ATTRS
1821 _mm256_maskz_permutex2var_epi16 (__mmask16 __U
, __m256i __A
,
1822 __m256i __I
, __m256i __B
)
1824 return (__m256i
) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi
) __I
/* idx */,
1830 static __inline__ __m128i __DEFAULT_FN_ATTRS
1831 _mm_mask_maddubs_epi16 (__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
) {
1832 return (__m128i
) __builtin_ia32_pmaddubsw128_mask ((__v16qi
) __X
,
1838 static __inline__ __m128i __DEFAULT_FN_ATTRS
1839 _mm_maskz_maddubs_epi16 (__mmask8 __U
, __m128i __X
, __m128i __Y
) {
1840 return (__m128i
) __builtin_ia32_pmaddubsw128_mask ((__v16qi
) __X
,
1842 (__v8hi
) _mm_setzero_si128(),
1846 static __inline__ __m256i __DEFAULT_FN_ATTRS
1847 _mm256_mask_maddubs_epi16 (__m256i __W
, __mmask16 __U
, __m256i __X
,
1849 return (__m256i
) __builtin_ia32_pmaddubsw256_mask ((__v32qi
) __X
,
1855 static __inline__ __m256i __DEFAULT_FN_ATTRS
1856 _mm256_maskz_maddubs_epi16 (__mmask16 __U
, __m256i __X
, __m256i __Y
) {
1857 return (__m256i
) __builtin_ia32_pmaddubsw256_mask ((__v32qi
) __X
,
1859 (__v16hi
) _mm256_setzero_si256(),
1863 static __inline__ __m128i __DEFAULT_FN_ATTRS
1864 _mm_mask_madd_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
1866 return (__m128i
) __builtin_ia32_pmaddwd128_mask ((__v8hi
) __A
,
1872 static __inline__ __m128i __DEFAULT_FN_ATTRS
1873 _mm_maskz_madd_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
) {
1874 return (__m128i
) __builtin_ia32_pmaddwd128_mask ((__v8hi
) __A
,
1876 (__v4si
) _mm_setzero_si128(),
1880 static __inline__ __m256i __DEFAULT_FN_ATTRS
1881 _mm256_mask_madd_epi16 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
) {
1882 return (__m256i
) __builtin_ia32_pmaddwd256_mask ((__v16hi
) __A
,
1888 static __inline__ __m256i __DEFAULT_FN_ATTRS
1889 _mm256_maskz_madd_epi16 (__mmask8 __U
, __m256i __A
, __m256i __B
) {
1890 return (__m256i
) __builtin_ia32_pmaddwd256_mask ((__v16hi
) __A
,
1892 (__v8si
) _mm256_setzero_si256(),
1896 static __inline__ __m128i __DEFAULT_FN_ATTRS
1897 _mm_cvtsepi16_epi8 (__m128i __A
) {
1898 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1899 (__v16qi
) _mm_setzero_si128(),
1903 static __inline__ __m128i __DEFAULT_FN_ATTRS
1904 _mm_mask_cvtsepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1905 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1910 static __inline__ __m128i __DEFAULT_FN_ATTRS
1911 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1912 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1913 (__v16qi
) _mm_setzero_si128(),
1917 static __inline__ __m128i __DEFAULT_FN_ATTRS
1918 _mm256_cvtsepi16_epi8 (__m256i __A
) {
1919 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1920 (__v16qi
) _mm_setzero_si128(),
1924 static __inline__ __m128i __DEFAULT_FN_ATTRS
1925 _mm256_mask_cvtsepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1926 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1931 static __inline__ __m128i __DEFAULT_FN_ATTRS
1932 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1933 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1934 (__v16qi
) _mm_setzero_si128(),
1938 static __inline__ __m128i __DEFAULT_FN_ATTRS
1939 _mm_cvtusepi16_epi8 (__m128i __A
) {
1940 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1941 (__v16qi
) _mm_setzero_si128(),
1945 static __inline__ __m128i __DEFAULT_FN_ATTRS
1946 _mm_mask_cvtusepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1947 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1952 static __inline__ __m128i __DEFAULT_FN_ATTRS
1953 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1954 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1955 (__v16qi
) _mm_setzero_si128(),
1959 static __inline__ __m128i __DEFAULT_FN_ATTRS
1960 _mm256_cvtusepi16_epi8 (__m256i __A
) {
1961 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1962 (__v16qi
) _mm_setzero_si128(),
1966 static __inline__ __m128i __DEFAULT_FN_ATTRS
1967 _mm256_mask_cvtusepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1968 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1973 static __inline__ __m128i __DEFAULT_FN_ATTRS
1974 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1975 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1976 (__v16qi
) _mm_setzero_si128(),
1980 static __inline__ __m128i __DEFAULT_FN_ATTRS
1981 _mm_cvtepi16_epi8 (__m128i __A
) {
1983 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1984 (__v16qi
) _mm_setzero_si128(),
1988 static __inline__ __m128i __DEFAULT_FN_ATTRS
1989 _mm_mask_cvtepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1990 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1995 static __inline__ __m128i __DEFAULT_FN_ATTRS
1996 _mm_maskz_cvtepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1997 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1998 (__v16qi
) _mm_setzero_si128(),
2002 static __inline__
void __DEFAULT_FN_ATTRS
2003 _mm_mask_cvtepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
2005 __builtin_ia32_pmovwb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
2009 static __inline__
void __DEFAULT_FN_ATTRS
2010 _mm_mask_cvtsepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
2012 __builtin_ia32_pmovswb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
2015 static __inline__
void __DEFAULT_FN_ATTRS
2016 _mm_mask_cvtusepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
2018 __builtin_ia32_pmovuswb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
2021 static __inline__ __m128i __DEFAULT_FN_ATTRS
2022 _mm256_cvtepi16_epi8 (__m256i __A
) {
2023 return (__m128i
) __builtin_ia32_pmovwb256_mask ((__v16hi
) __A
,
2024 (__v16qi
) _mm_setzero_si128(),
2028 static __inline__ __m128i __DEFAULT_FN_ATTRS
2029 _mm256_mask_cvtepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
2030 return (__m128i
) __builtin_ia32_pmovwb256_mask ((__v16hi
) __A
,
2035 static __inline__ __m128i __DEFAULT_FN_ATTRS
2036 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M
, __m256i __A
) {
2037 return (__m128i
) __builtin_ia32_pmovwb256_mask ((__v16hi
) __A
,
2038 (__v16qi
) _mm_setzero_si128(),
2042 static __inline__
void __DEFAULT_FN_ATTRS
2043 _mm256_mask_cvtepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
2045 __builtin_ia32_pmovwb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
2048 static __inline__
void __DEFAULT_FN_ATTRS
2049 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
2051 __builtin_ia32_pmovswb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
2054 static __inline__
void __DEFAULT_FN_ATTRS
2055 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
2057 __builtin_ia32_pmovuswb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
2059 static __inline__ __m128i __DEFAULT_FN_ATTRS
2060 _mm_mask_mulhrs_epi16 (__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
) {
2061 return (__m128i
) __builtin_ia32_pmulhrsw128_mask ((__v8hi
) __X
,
2067 static __inline__ __m128i __DEFAULT_FN_ATTRS
2068 _mm_maskz_mulhrs_epi16 (__mmask8 __U
, __m128i __X
, __m128i __Y
) {
2069 return (__m128i
) __builtin_ia32_pmulhrsw128_mask ((__v8hi
) __X
,
2071 (__v8hi
) _mm_setzero_si128(),
2075 static __inline__ __m256i __DEFAULT_FN_ATTRS
2076 _mm256_mask_mulhrs_epi16 (__m256i __W
, __mmask16 __U
, __m256i __X
, __m256i __Y
) {
2077 return (__m256i
) __builtin_ia32_pmulhrsw256_mask ((__v16hi
) __X
,
2083 static __inline__ __m256i __DEFAULT_FN_ATTRS
2084 _mm256_maskz_mulhrs_epi16 (__mmask16 __U
, __m256i __X
, __m256i __Y
) {
2085 return (__m256i
) __builtin_ia32_pmulhrsw256_mask ((__v16hi
) __X
,
2087 (__v16hi
) _mm256_setzero_si256(),
2091 static __inline__ __m128i __DEFAULT_FN_ATTRS
2092 _mm_mask_mulhi_epu16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2094 return (__m128i
) __builtin_ia32_pmulhuw128_mask ((__v8hi
) __A
,
2100 static __inline__ __m128i __DEFAULT_FN_ATTRS
2101 _mm_maskz_mulhi_epu16 (__mmask8 __U
, __m128i __A
, __m128i __B
) {
2102 return (__m128i
) __builtin_ia32_pmulhuw128_mask ((__v8hi
) __A
,
2104 (__v8hi
) _mm_setzero_si128(),
2108 static __inline__ __m256i __DEFAULT_FN_ATTRS
2109 _mm256_mask_mulhi_epu16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2111 return (__m256i
) __builtin_ia32_pmulhuw256_mask ((__v16hi
) __A
,
2117 static __inline__ __m256i __DEFAULT_FN_ATTRS
2118 _mm256_maskz_mulhi_epu16 (__mmask16 __U
, __m256i __A
, __m256i __B
) {
2119 return (__m256i
) __builtin_ia32_pmulhuw256_mask ((__v16hi
) __A
,
2121 (__v16hi
) _mm256_setzero_si256(),
2125 static __inline__ __m128i __DEFAULT_FN_ATTRS
2126 _mm_mask_mulhi_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2128 return (__m128i
) __builtin_ia32_pmulhw128_mask ((__v8hi
) __A
,
2134 static __inline__ __m128i __DEFAULT_FN_ATTRS
2135 _mm_maskz_mulhi_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
) {
2136 return (__m128i
) __builtin_ia32_pmulhw128_mask ((__v8hi
) __A
,
2138 (__v8hi
) _mm_setzero_si128(),
2142 static __inline__ __m256i __DEFAULT_FN_ATTRS
2143 _mm256_mask_mulhi_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2145 return (__m256i
) __builtin_ia32_pmulhw256_mask ((__v16hi
) __A
,
2151 static __inline__ __m256i __DEFAULT_FN_ATTRS
2152 _mm256_maskz_mulhi_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
) {
2153 return (__m256i
) __builtin_ia32_pmulhw256_mask ((__v16hi
) __A
,
2155 (__v16hi
) _mm256_setzero_si256(),
2159 static __inline__ __m128i __DEFAULT_FN_ATTRS
2160 _mm_mask_unpackhi_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
2161 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
2162 (__v16qi
)_mm_unpackhi_epi8(__A
, __B
),
2166 static __inline__ __m128i __DEFAULT_FN_ATTRS
2167 _mm_maskz_unpackhi_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
2168 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
2169 (__v16qi
)_mm_unpackhi_epi8(__A
, __B
),
2170 (__v16qi
)_mm_setzero_si128());
2173 static __inline__ __m256i __DEFAULT_FN_ATTRS
2174 _mm256_mask_unpackhi_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
2175 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
2176 (__v32qi
)_mm256_unpackhi_epi8(__A
, __B
),
2180 static __inline__ __m256i __DEFAULT_FN_ATTRS
2181 _mm256_maskz_unpackhi_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
2182 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
2183 (__v32qi
)_mm256_unpackhi_epi8(__A
, __B
),
2184 (__v32qi
)_mm256_setzero_si256());
2187 static __inline__ __m128i __DEFAULT_FN_ATTRS
2188 _mm_mask_unpackhi_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
2189 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2190 (__v8hi
)_mm_unpackhi_epi16(__A
, __B
),
2194 static __inline__ __m128i __DEFAULT_FN_ATTRS
2195 _mm_maskz_unpackhi_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
2196 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2197 (__v8hi
)_mm_unpackhi_epi16(__A
, __B
),
2198 (__v8hi
) _mm_setzero_si128());
2201 static __inline__ __m256i __DEFAULT_FN_ATTRS
2202 _mm256_mask_unpackhi_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
2203 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2204 (__v16hi
)_mm256_unpackhi_epi16(__A
, __B
),
2208 static __inline__ __m256i __DEFAULT_FN_ATTRS
2209 _mm256_maskz_unpackhi_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
2210 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2211 (__v16hi
)_mm256_unpackhi_epi16(__A
, __B
),
2212 (__v16hi
)_mm256_setzero_si256());
2215 static __inline__ __m128i __DEFAULT_FN_ATTRS
2216 _mm_mask_unpacklo_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
2217 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
2218 (__v16qi
)_mm_unpacklo_epi8(__A
, __B
),
2222 static __inline__ __m128i __DEFAULT_FN_ATTRS
2223 _mm_maskz_unpacklo_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
2224 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
2225 (__v16qi
)_mm_unpacklo_epi8(__A
, __B
),
2226 (__v16qi
)_mm_setzero_si128());
2229 static __inline__ __m256i __DEFAULT_FN_ATTRS
2230 _mm256_mask_unpacklo_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
2231 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
2232 (__v32qi
)_mm256_unpacklo_epi8(__A
, __B
),
2236 static __inline__ __m256i __DEFAULT_FN_ATTRS
2237 _mm256_maskz_unpacklo_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
2238 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
2239 (__v32qi
)_mm256_unpacklo_epi8(__A
, __B
),
2240 (__v32qi
)_mm256_setzero_si256());
2243 static __inline__ __m128i __DEFAULT_FN_ATTRS
2244 _mm_mask_unpacklo_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
2245 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2246 (__v8hi
)_mm_unpacklo_epi16(__A
, __B
),
2250 static __inline__ __m128i __DEFAULT_FN_ATTRS
2251 _mm_maskz_unpacklo_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
2252 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2253 (__v8hi
)_mm_unpacklo_epi16(__A
, __B
),
2254 (__v8hi
) _mm_setzero_si128());
2257 static __inline__ __m256i __DEFAULT_FN_ATTRS
2258 _mm256_mask_unpacklo_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
2259 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2260 (__v16hi
)_mm256_unpacklo_epi16(__A
, __B
),
2264 static __inline__ __m256i __DEFAULT_FN_ATTRS
2265 _mm256_maskz_unpacklo_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
2266 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2267 (__v16hi
)_mm256_unpacklo_epi16(__A
, __B
),
2268 (__v16hi
)_mm256_setzero_si256());
2271 static __inline__ __m128i __DEFAULT_FN_ATTRS
2272 _mm_mask_cvtepi8_epi16 (__m128i __W
, __mmask32 __U
, __m128i __A
)
2274 return (__m128i
) __builtin_ia32_pmovsxbw128_mask ((__v16qi
) __A
,
2279 static __inline__ __m128i __DEFAULT_FN_ATTRS
2280 _mm_maskz_cvtepi8_epi16 (__mmask8 __U
, __m128i __A
)
2282 return (__m128i
) __builtin_ia32_pmovsxbw128_mask ((__v16qi
) __A
,
2284 _mm_setzero_si128 (),
2288 static __inline__ __m256i __DEFAULT_FN_ATTRS
2289 _mm256_mask_cvtepi8_epi16 (__m256i __W
, __mmask32 __U
, __m128i __A
)
2291 return (__m256i
) __builtin_ia32_pmovsxbw256_mask ((__v16qi
) __A
,
2296 static __inline__ __m256i __DEFAULT_FN_ATTRS
2297 _mm256_maskz_cvtepi8_epi16 (__mmask16 __U
, __m128i __A
)
2299 return (__m256i
) __builtin_ia32_pmovsxbw256_mask ((__v16qi
) __A
,
2301 _mm256_setzero_si256 (),
2306 static __inline__ __m128i __DEFAULT_FN_ATTRS
2307 _mm_mask_cvtepu8_epi16 (__m128i __W
, __mmask32 __U
, __m128i __A
)
2309 return (__m128i
) __builtin_ia32_pmovzxbw128_mask ((__v16qi
) __A
,
2314 static __inline__ __m128i __DEFAULT_FN_ATTRS
2315 _mm_maskz_cvtepu8_epi16 (__mmask8 __U
, __m128i __A
)
2317 return (__m128i
) __builtin_ia32_pmovzxbw128_mask ((__v16qi
) __A
,
2319 _mm_setzero_si128 (),
2323 static __inline__ __m256i __DEFAULT_FN_ATTRS
2324 _mm256_mask_cvtepu8_epi16 (__m256i __W
, __mmask32 __U
, __m128i __A
)
2326 return (__m256i
) __builtin_ia32_pmovzxbw256_mask ((__v16qi
) __A
,
2331 static __inline__ __m256i __DEFAULT_FN_ATTRS
2332 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U
, __m128i __A
)
2334 return (__m256i
) __builtin_ia32_pmovzxbw256_mask ((__v16qi
) __A
,
2336 _mm256_setzero_si256 (),
2341 #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
2342 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
2343 (__v16qi)(__m128i)(b), (int)(p), \
2346 #define _mm_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
2347 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
2348 (__v16qi)(__m128i)(b), (int)(p), \
2351 #define _mm_cmp_epu8_mask(a, b, p) __extension__ ({ \
2352 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
2353 (__v16qi)(__m128i)(b), (int)(p), \
2356 #define _mm_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
2357 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
2358 (__v16qi)(__m128i)(b), (int)(p), \
2361 #define _mm256_cmp_epi8_mask(a, b, p) __extension__ ({ \
2362 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
2363 (__v32qi)(__m256i)(b), (int)(p), \
2366 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
2367 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
2368 (__v32qi)(__m256i)(b), (int)(p), \
2371 #define _mm256_cmp_epu8_mask(a, b, p) __extension__ ({ \
2372 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
2373 (__v32qi)(__m256i)(b), (int)(p), \
2376 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
2377 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
2378 (__v32qi)(__m256i)(b), (int)(p), \
2381 #define _mm_cmp_epi16_mask(a, b, p) __extension__ ({ \
2382 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
2383 (__v8hi)(__m128i)(b), (int)(p), \
2386 #define _mm_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
2387 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
2388 (__v8hi)(__m128i)(b), (int)(p), \
2391 #define _mm_cmp_epu16_mask(a, b, p) __extension__ ({ \
2392 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
2393 (__v8hi)(__m128i)(b), (int)(p), \
2396 #define _mm_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
2397 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
2398 (__v8hi)(__m128i)(b), (int)(p), \
2401 #define _mm256_cmp_epi16_mask(a, b, p) __extension__ ({ \
2402 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
2403 (__v16hi)(__m256i)(b), (int)(p), \
2406 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
2407 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
2408 (__v16hi)(__m256i)(b), (int)(p), \
2411 #define _mm256_cmp_epu16_mask(a, b, p) __extension__ ({ \
2412 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
2413 (__v16hi)(__m256i)(b), (int)(p), \
2416 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
2417 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
2418 (__v16hi)(__m256i)(b), (int)(p), \
2421 #define _mm_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
2422 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2423 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
2424 (__v8hi)(__m128i)(W)); })
2426 #define _mm_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
2427 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2428 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
2429 (__v8hi)_mm_setzero_hi()); })
2431 #define _mm256_mask_shufflehi_epi16(W, U, A, imm) __extension__ ({ \
2432 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2433 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
2434 (__v16hi)(__m256i)(W)); })
2436 #define _mm256_maskz_shufflehi_epi16(U, A, imm) __extension__ ({ \
2437 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2438 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
2439 (__v16hi)_mm256_setzero_si256()); })
2441 #define _mm_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
2442 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2443 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
2444 (__v8hi)(__m128i)(W)); })
2446 #define _mm_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
2447 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2448 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
2449 (__v8hi)_mm_setzero_hi()); })
2451 #define _mm256_mask_shufflelo_epi16(W, U, A, imm) __extension__ ({ \
2452 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2453 (__v16hi)_mm256_shufflelo_epi16((A), \
2455 (__v16hi)(__m256i)(W)); })
2457 #define _mm256_maskz_shufflelo_epi16(U, A, imm) __extension__ ({ \
2458 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2459 (__v16hi)_mm256_shufflelo_epi16((A), \
2461 (__v16hi)_mm256_setzero_si256()); })
2463 static __inline__ __m256i __DEFAULT_FN_ATTRS
2464 _mm256_sllv_epi16 (__m256i __A
, __m256i __B
)
2466 return (__m256i
) __builtin_ia32_psllv16hi_mask ((__v16hi
) __A
,
2469 _mm256_setzero_si256 (),
2473 static __inline__ __m256i __DEFAULT_FN_ATTRS
2474 _mm256_mask_sllv_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2477 return (__m256i
) __builtin_ia32_psllv16hi_mask ((__v16hi
) __A
,
2483 static __inline__ __m256i __DEFAULT_FN_ATTRS
2484 _mm256_maskz_sllv_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
2486 return (__m256i
) __builtin_ia32_psllv16hi_mask ((__v16hi
) __A
,
2489 _mm256_setzero_si256 (),
2493 static __inline__ __m128i __DEFAULT_FN_ATTRS
2494 _mm_sllv_epi16 (__m128i __A
, __m128i __B
)
2496 return (__m128i
) __builtin_ia32_psllv8hi_mask ((__v8hi
) __A
,
2503 static __inline__ __m128i __DEFAULT_FN_ATTRS
2504 _mm_mask_sllv_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2507 return (__m128i
) __builtin_ia32_psllv8hi_mask ((__v8hi
) __A
,
2513 static __inline__ __m128i __DEFAULT_FN_ATTRS
2514 _mm_maskz_sllv_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2516 return (__m128i
) __builtin_ia32_psllv8hi_mask ((__v8hi
) __A
,
2519 _mm_setzero_si128 (),
2523 static __inline__ __m128i __DEFAULT_FN_ATTRS
2524 _mm_mask_sll_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2527 return (__m128i
) __builtin_ia32_psllw128_mask ((__v8hi
) __A
,
2533 static __inline__ __m128i __DEFAULT_FN_ATTRS
2534 _mm_maskz_sll_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2536 return (__m128i
) __builtin_ia32_psllw128_mask ((__v8hi
) __A
,
2539 _mm_setzero_si128 (),
2543 static __inline__ __m256i __DEFAULT_FN_ATTRS
2544 _mm256_mask_sll_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2547 return (__m256i
) __builtin_ia32_psllw256_mask ((__v16hi
) __A
,
2553 static __inline__ __m256i __DEFAULT_FN_ATTRS
2554 _mm256_maskz_sll_epi16 (__mmask16 __U
, __m256i __A
, __m128i __B
)
2556 return (__m256i
) __builtin_ia32_psllw256_mask ((__v16hi
) __A
,
2559 _mm256_setzero_si256 (),
2563 #define _mm_mask_slli_epi16(W, U, A, B) __extension__ ({ \
2564 (__m128i)__builtin_ia32_psllwi128_mask((__v8hi)(__m128i)(A), (int)(B), \
2565 (__v8hi)(__m128i)(W), \
2568 #define _mm_maskz_slli_epi16(U, A, B) __extension__ ({ \
2569 (__m128i)__builtin_ia32_psllwi128_mask((__v8hi)(__m128i)(A), (int)(B), \
2570 (__v8hi)_mm_setzero_si128(), \
2573 #define _mm256_mask_slli_epi16(W, U, A, B) __extension__ ({ \
2574 (__m256i)__builtin_ia32_psllwi256_mask((__v16hi)(__m256i)(A), (int)(B), \
2575 (__v16hi)(__m256i)(W), \
2578 #define _mm256_maskz_slli_epi16(U, A, B) __extension__ ({ \
2579 (__m256i)__builtin_ia32_psllwi256_mask((__v16hi)(__m256i)(A), (int)(B), \
2580 (__v16hi)_mm256_setzero_si256(), \
2585 static __inline__ __m256i __DEFAULT_FN_ATTRS
2586 _mm256_srlv_epi16 (__m256i __A
, __m256i __B
)
2588 return (__m256i
) __builtin_ia32_psrlv16hi_mask ((__v16hi
) __A
,
2591 _mm256_setzero_si256 (),
2595 static __inline__ __m256i __DEFAULT_FN_ATTRS
2596 _mm256_mask_srlv_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2599 return (__m256i
) __builtin_ia32_psrlv16hi_mask ((__v16hi
) __A
,
2605 static __inline__ __m256i __DEFAULT_FN_ATTRS
2606 _mm256_maskz_srlv_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
2608 return (__m256i
) __builtin_ia32_psrlv16hi_mask ((__v16hi
) __A
,
2611 _mm256_setzero_si256 (),
2615 static __inline__ __m128i __DEFAULT_FN_ATTRS
2616 _mm_srlv_epi16 (__m128i __A
, __m128i __B
)
2618 return (__m128i
) __builtin_ia32_psrlv8hi_mask ((__v8hi
) __A
,
2625 static __inline__ __m128i __DEFAULT_FN_ATTRS
2626 _mm_mask_srlv_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2629 return (__m128i
) __builtin_ia32_psrlv8hi_mask ((__v8hi
) __A
,
2635 static __inline__ __m128i __DEFAULT_FN_ATTRS
2636 _mm_maskz_srlv_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2638 return (__m128i
) __builtin_ia32_psrlv8hi_mask ((__v8hi
) __A
,
2641 _mm_setzero_si128 (),
2645 static __inline__ __m256i __DEFAULT_FN_ATTRS
2646 _mm256_srav_epi16 (__m256i __A
, __m256i __B
)
2648 return (__m256i
) __builtin_ia32_psrav16hi_mask ((__v16hi
) __A
,
2651 _mm256_setzero_si256 (),
2655 static __inline__ __m256i __DEFAULT_FN_ATTRS
2656 _mm256_mask_srav_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2659 return (__m256i
) __builtin_ia32_psrav16hi_mask ((__v16hi
) __A
,
2665 static __inline__ __m256i __DEFAULT_FN_ATTRS
2666 _mm256_maskz_srav_epi16 (__mmask16 __U
, __m256i __A
, __m256i __B
)
2668 return (__m256i
) __builtin_ia32_psrav16hi_mask ((__v16hi
) __A
,
2671 _mm256_setzero_si256 (),
2675 static __inline__ __m128i __DEFAULT_FN_ATTRS
2676 _mm_srav_epi16 (__m128i __A
, __m128i __B
)
2678 return (__m128i
) __builtin_ia32_psrav8hi_mask ((__v8hi
) __A
,
2685 static __inline__ __m128i __DEFAULT_FN_ATTRS
2686 _mm_mask_srav_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2689 return (__m128i
) __builtin_ia32_psrav8hi_mask ((__v8hi
) __A
,
2695 static __inline__ __m128i __DEFAULT_FN_ATTRS
2696 _mm_maskz_srav_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2698 return (__m128i
) __builtin_ia32_psrav8hi_mask ((__v8hi
) __A
,
2701 _mm_setzero_si128 (),
2705 static __inline__ __m128i __DEFAULT_FN_ATTRS
2706 _mm_mask_sra_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2709 return (__m128i
) __builtin_ia32_psraw128_mask ((__v8hi
) __A
,
2715 static __inline__ __m128i __DEFAULT_FN_ATTRS
2716 _mm_maskz_sra_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2718 return (__m128i
) __builtin_ia32_psraw128_mask ((__v8hi
) __A
,
2721 _mm_setzero_si128 (),
2725 static __inline__ __m256i __DEFAULT_FN_ATTRS
2726 _mm256_mask_sra_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2729 return (__m256i
) __builtin_ia32_psraw256_mask ((__v16hi
) __A
,
2735 static __inline__ __m256i __DEFAULT_FN_ATTRS
2736 _mm256_maskz_sra_epi16 (__mmask16 __U
, __m256i __A
, __m128i __B
)
2738 return (__m256i
) __builtin_ia32_psraw256_mask ((__v16hi
) __A
,
2741 _mm256_setzero_si256 (),
2745 #define _mm_mask_srai_epi16(W, U, A, imm) __extension__ ({ \
2746 (__m128i)__builtin_ia32_psrawi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
2747 (__v8hi)(__m128i)(W), \
2750 #define _mm_maskz_srai_epi16(U, A, imm) __extension__ ({ \
2751 (__m128i)__builtin_ia32_psrawi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
2752 (__v8hi)_mm_setzero_si128(), \
2755 #define _mm256_mask_srai_epi16(W, U, A, imm) __extension__ ({ \
2756 (__m256i)__builtin_ia32_psrawi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
2757 (__v16hi)(__m256i)(W), \
2760 #define _mm256_maskz_srai_epi16(U, A, imm) __extension__ ({ \
2761 (__m256i)__builtin_ia32_psrawi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
2762 (__v16hi)_mm256_setzero_si256(), \
2765 static __inline__ __m128i __DEFAULT_FN_ATTRS
2766 _mm_mask_srl_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
,
2769 return (__m128i
) __builtin_ia32_psrlw128_mask ((__v8hi
) __A
,
2775 static __inline__ __m128i __DEFAULT_FN_ATTRS
2776 _mm_maskz_srl_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2778 return (__m128i
) __builtin_ia32_psrlw128_mask ((__v8hi
) __A
,
2781 _mm_setzero_si128 (),
2785 static __inline__ __m256i __DEFAULT_FN_ATTRS
2786 _mm256_mask_srl_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
,
2789 return (__m256i
) __builtin_ia32_psrlw256_mask ((__v16hi
) __A
,
2795 static __inline__ __m256i __DEFAULT_FN_ATTRS
2796 _mm256_maskz_srl_epi16 (__mmask16 __U
, __m256i __A
, __m128i __B
)
2798 return (__m256i
) __builtin_ia32_psrlw256_mask ((__v16hi
) __A
,
2801 _mm256_setzero_si256 (),
2805 #define _mm_mask_srli_epi16(W, U, A, imm) __extension__ ({ \
2806 (__m128i)__builtin_ia32_psrlwi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
2807 (__v8hi)(__m128i)(W), \
2810 #define _mm_maskz_srli_epi16(U, A, imm) __extension__ ({ \
2811 (__m128i)__builtin_ia32_psrlwi128_mask((__v8hi)(__m128i)(A), (int)(imm), \
2812 (__v8hi)_mm_setzero_si128(), \
2815 #define _mm256_mask_srli_epi16(W, U, A, imm) __extension__ ({ \
2816 (__m256i)__builtin_ia32_psrlwi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
2817 (__v16hi)(__m256i)(W), \
2820 #define _mm256_maskz_srli_epi16(U, A, imm) __extension__ ({ \
2821 (__m256i)__builtin_ia32_psrlwi256_mask((__v16hi)(__m256i)(A), (int)(imm), \
2822 (__v16hi)_mm256_setzero_si256(), \
2825 static __inline__ __m128i __DEFAULT_FN_ATTRS
2826 _mm_mask_mov_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
)
2828 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
2833 static __inline__ __m128i __DEFAULT_FN_ATTRS
2834 _mm_maskz_mov_epi16 (__mmask8 __U
, __m128i __A
)
2836 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
2838 (__v8hi
) _mm_setzero_hi ());
2841 static __inline__ __m256i __DEFAULT_FN_ATTRS
2842 _mm256_mask_mov_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
)
2844 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
2849 static __inline__ __m256i __DEFAULT_FN_ATTRS
2850 _mm256_maskz_mov_epi16 (__mmask16 __U
, __m256i __A
)
2852 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
2854 (__v16hi
) _mm256_setzero_si256 ());
2857 static __inline__ __m128i __DEFAULT_FN_ATTRS
2858 _mm_mask_mov_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
)
2860 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
2865 static __inline__ __m128i __DEFAULT_FN_ATTRS
2866 _mm_maskz_mov_epi8 (__mmask16 __U
, __m128i __A
)
2868 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
2870 (__v16qi
) _mm_setzero_hi ());
2873 static __inline__ __m256i __DEFAULT_FN_ATTRS
2874 _mm256_mask_mov_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
)
2876 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
2881 static __inline__ __m256i __DEFAULT_FN_ATTRS
2882 _mm256_maskz_mov_epi8 (__mmask32 __U
, __m256i __A
)
2884 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
2886 (__v32qi
) _mm256_setzero_si256 ());
2890 static __inline__ __m128i __DEFAULT_FN_ATTRS
2891 _mm_mask_set1_epi8 (__m128i __O
, __mmask16 __M
, char __A
)
2893 return (__m128i
) __builtin_ia32_pbroadcastb128_gpr_mask (__A
,
2898 static __inline__ __m128i __DEFAULT_FN_ATTRS
2899 _mm_maskz_set1_epi8 (__mmask16 __M
, char __A
)
2901 return (__m128i
) __builtin_ia32_pbroadcastb128_gpr_mask (__A
,
2903 _mm_setzero_si128 (),
2907 static __inline__ __m256i __DEFAULT_FN_ATTRS
2908 _mm256_mask_set1_epi8 (__m256i __O
, __mmask32 __M
, char __A
)
2910 return (__m256i
) __builtin_ia32_pbroadcastb256_gpr_mask (__A
,
2915 static __inline__ __m256i __DEFAULT_FN_ATTRS
2916 _mm256_maskz_set1_epi8 (__mmask32 __M
, char __A
)
2918 return (__m256i
) __builtin_ia32_pbroadcastb256_gpr_mask (__A
,
2920 _mm256_setzero_si256 (),
2924 static __inline__ __m128i __DEFAULT_FN_ATTRS
2925 _mm_mask_loadu_epi16 (__m128i __W
, __mmask8 __U
, void const *__P
)
2927 return (__m128i
) __builtin_ia32_loaddquhi128_mask ((__v8hi
*) __P
,
2932 static __inline__ __m128i __DEFAULT_FN_ATTRS
2933 _mm_maskz_loadu_epi16 (__mmask8 __U
, void const *__P
)
2935 return (__m128i
) __builtin_ia32_loaddquhi128_mask ((__v8hi
*) __P
,
2941 static __inline__ __m256i __DEFAULT_FN_ATTRS
2942 _mm256_mask_loadu_epi16 (__m256i __W
, __mmask16 __U
, void const *__P
)
2944 return (__m256i
) __builtin_ia32_loaddquhi256_mask ((__v16hi
*) __P
,
2949 static __inline__ __m256i __DEFAULT_FN_ATTRS
2950 _mm256_maskz_loadu_epi16 (__mmask16 __U
, void const *__P
)
2952 return (__m256i
) __builtin_ia32_loaddquhi256_mask ((__v16hi
*) __P
,
2954 _mm256_setzero_si256 (),
2958 static __inline__ __m128i __DEFAULT_FN_ATTRS
2959 _mm_mask_loadu_epi8 (__m128i __W
, __mmask16 __U
, void const *__P
)
2961 return (__m128i
) __builtin_ia32_loaddquqi128_mask ((__v16qi
*) __P
,
2966 static __inline__ __m128i __DEFAULT_FN_ATTRS
2967 _mm_maskz_loadu_epi8 (__mmask16 __U
, void const *__P
)
2969 return (__m128i
) __builtin_ia32_loaddquqi128_mask ((__v16qi
*) __P
,
2971 _mm_setzero_si128 (),
2975 static __inline__ __m256i __DEFAULT_FN_ATTRS
2976 _mm256_mask_loadu_epi8 (__m256i __W
, __mmask32 __U
, void const *__P
)
2978 return (__m256i
) __builtin_ia32_loaddquqi256_mask ((__v32qi
*) __P
,
2983 static __inline__ __m256i __DEFAULT_FN_ATTRS
2984 _mm256_maskz_loadu_epi8 (__mmask32 __U
, void const *__P
)
2986 return (__m256i
) __builtin_ia32_loaddquqi256_mask ((__v32qi
*) __P
,
2988 _mm256_setzero_si256 (),
2991 static __inline__
void __DEFAULT_FN_ATTRS
2992 _mm_mask_storeu_epi16 (void *__P
, __mmask8 __U
, __m128i __A
)
2994 __builtin_ia32_storedquhi128_mask ((__v8hi
*) __P
,
2999 static __inline__
void __DEFAULT_FN_ATTRS
3000 _mm256_mask_storeu_epi16 (void *__P
, __mmask16 __U
, __m256i __A
)
3002 __builtin_ia32_storedquhi256_mask ((__v16hi
*) __P
,
3007 static __inline__
void __DEFAULT_FN_ATTRS
3008 _mm_mask_storeu_epi8 (void *__P
, __mmask16 __U
, __m128i __A
)
3010 __builtin_ia32_storedquqi128_mask ((__v16qi
*) __P
,
3015 static __inline__
void __DEFAULT_FN_ATTRS
3016 _mm256_mask_storeu_epi8 (void *__P
, __mmask32 __U
, __m256i __A
)
3018 __builtin_ia32_storedquqi256_mask ((__v32qi
*) __P
,
3023 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3024 _mm_test_epi8_mask (__m128i __A
, __m128i __B
)
3026 return (__mmask16
) __builtin_ia32_ptestmb128 ((__v16qi
) __A
,
3031 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3032 _mm_mask_test_epi8_mask (__mmask16 __U
, __m128i __A
, __m128i __B
)
3034 return (__mmask16
) __builtin_ia32_ptestmb128 ((__v16qi
) __A
,
3035 (__v16qi
) __B
, __U
);
3038 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
3039 _mm256_test_epi8_mask (__m256i __A
, __m256i __B
)
3041 return (__mmask32
) __builtin_ia32_ptestmb256 ((__v32qi
) __A
,
3046 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
3047 _mm256_mask_test_epi8_mask (__mmask32 __U
, __m256i __A
, __m256i __B
)
3049 return (__mmask32
) __builtin_ia32_ptestmb256 ((__v32qi
) __A
,
3050 (__v32qi
) __B
, __U
);
3053 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3054 _mm_test_epi16_mask (__m128i __A
, __m128i __B
)
3056 return (__mmask8
) __builtin_ia32_ptestmw128 ((__v8hi
) __A
,
3061 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3062 _mm_mask_test_epi16_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
3064 return (__mmask8
) __builtin_ia32_ptestmw128 ((__v8hi
) __A
,
3068 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3069 _mm256_test_epi16_mask (__m256i __A
, __m256i __B
)
3071 return (__mmask16
) __builtin_ia32_ptestmw256 ((__v16hi
) __A
,
3076 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3077 _mm256_mask_test_epi16_mask (__mmask16 __U
, __m256i __A
, __m256i __B
)
3079 return (__mmask16
) __builtin_ia32_ptestmw256 ((__v16hi
) __A
,
3080 (__v16hi
) __B
, __U
);
3083 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3084 _mm_testn_epi8_mask (__m128i __A
, __m128i __B
)
3086 return (__mmask16
) __builtin_ia32_ptestnmb128 ((__v16qi
) __A
,
3091 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3092 _mm_mask_testn_epi8_mask (__mmask16 __U
, __m128i __A
, __m128i __B
)
3094 return (__mmask16
) __builtin_ia32_ptestnmb128 ((__v16qi
) __A
,
3095 (__v16qi
) __B
, __U
);
3098 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
3099 _mm256_testn_epi8_mask (__m256i __A
, __m256i __B
)
3101 return (__mmask32
) __builtin_ia32_ptestnmb256 ((__v32qi
) __A
,
3106 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
3107 _mm256_mask_testn_epi8_mask (__mmask32 __U
, __m256i __A
, __m256i __B
)
3109 return (__mmask32
) __builtin_ia32_ptestnmb256 ((__v32qi
) __A
,
3110 (__v32qi
) __B
, __U
);
3113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3114 _mm_testn_epi16_mask (__m128i __A
, __m128i __B
)
3116 return (__mmask8
) __builtin_ia32_ptestnmw128 ((__v8hi
) __A
,
3121 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3122 _mm_mask_testn_epi16_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
3124 return (__mmask8
) __builtin_ia32_ptestnmw128 ((__v8hi
) __A
,
3128 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3129 _mm256_testn_epi16_mask (__m256i __A
, __m256i __B
)
3131 return (__mmask16
) __builtin_ia32_ptestnmw256 ((__v16hi
) __A
,
3136 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3137 _mm256_mask_testn_epi16_mask (__mmask16 __U
, __m256i __A
, __m256i __B
)
3139 return (__mmask16
) __builtin_ia32_ptestnmw256 ((__v16hi
) __A
,
3140 (__v16hi
) __B
, __U
);
3143 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3144 _mm_movepi8_mask (__m128i __A
)
3146 return (__mmask16
) __builtin_ia32_cvtb2mask128 ((__v16qi
) __A
);
3149 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
3150 _mm256_movepi8_mask (__m256i __A
)
3152 return (__mmask32
) __builtin_ia32_cvtb2mask256 ((__v32qi
) __A
);
3155 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
3156 _mm_movepi16_mask (__m128i __A
)
3158 return (__mmask8
) __builtin_ia32_cvtw2mask128 ((__v8hi
) __A
);
3161 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
3162 _mm256_movepi16_mask (__m256i __A
)
3164 return (__mmask16
) __builtin_ia32_cvtw2mask256 ((__v16hi
) __A
);
3167 static __inline__ __m128i __DEFAULT_FN_ATTRS
3168 _mm_movm_epi8 (__mmask16 __A
)
3170 return (__m128i
) __builtin_ia32_cvtmask2b128 (__A
);
3173 static __inline__ __m256i __DEFAULT_FN_ATTRS
3174 _mm256_movm_epi8 (__mmask32 __A
)
3176 return (__m256i
) __builtin_ia32_cvtmask2b256 (__A
);
3179 static __inline__ __m128i __DEFAULT_FN_ATTRS
3180 _mm_movm_epi16 (__mmask8 __A
)
3182 return (__m128i
) __builtin_ia32_cvtmask2w128 (__A
);
3185 static __inline__ __m256i __DEFAULT_FN_ATTRS
3186 _mm256_movm_epi16 (__mmask16 __A
)
3188 return (__m256i
) __builtin_ia32_cvtmask2w256 (__A
);
3191 static __inline__ __m128i __DEFAULT_FN_ATTRS
3192 _mm_mask_broadcastb_epi8 (__m128i __O
, __mmask16 __M
, __m128i __A
)
3194 return (__m128i
)__builtin_ia32_selectb_128(__M
,
3195 (__v16qi
) _mm_broadcastb_epi8(__A
),
3199 static __inline__ __m128i __DEFAULT_FN_ATTRS
3200 _mm_maskz_broadcastb_epi8 (__mmask16 __M
, __m128i __A
)
3202 return (__m128i
)__builtin_ia32_selectb_128(__M
,
3203 (__v16qi
) _mm_broadcastb_epi8(__A
),
3204 (__v16qi
) _mm_setzero_si128());
3207 static __inline__ __m256i __DEFAULT_FN_ATTRS
3208 _mm256_mask_broadcastb_epi8 (__m256i __O
, __mmask32 __M
, __m128i __A
)
3210 return (__m256i
)__builtin_ia32_selectb_256(__M
,
3211 (__v32qi
) _mm256_broadcastb_epi8(__A
),
3215 static __inline__ __m256i __DEFAULT_FN_ATTRS
3216 _mm256_maskz_broadcastb_epi8 (__mmask32 __M
, __m128i __A
)
3218 return (__m256i
)__builtin_ia32_selectb_256(__M
,
3219 (__v32qi
) _mm256_broadcastb_epi8(__A
),
3220 (__v32qi
) _mm256_setzero_si256());
3223 static __inline__ __m128i __DEFAULT_FN_ATTRS
3224 _mm_mask_broadcastw_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
3226 return (__m128i
)__builtin_ia32_selectw_128(__M
,
3227 (__v8hi
) _mm_broadcastw_epi16(__A
),
3231 static __inline__ __m128i __DEFAULT_FN_ATTRS
3232 _mm_maskz_broadcastw_epi16 (__mmask8 __M
, __m128i __A
)
3234 return (__m128i
)__builtin_ia32_selectw_128(__M
,
3235 (__v8hi
) _mm_broadcastw_epi16(__A
),
3236 (__v8hi
) _mm_setzero_si128());
3239 static __inline__ __m256i __DEFAULT_FN_ATTRS
3240 _mm256_mask_broadcastw_epi16 (__m256i __O
, __mmask16 __M
, __m128i __A
)
3242 return (__m256i
)__builtin_ia32_selectw_256(__M
,
3243 (__v16hi
) _mm256_broadcastw_epi16(__A
),
3247 static __inline__ __m256i __DEFAULT_FN_ATTRS
3248 _mm256_maskz_broadcastw_epi16 (__mmask16 __M
, __m128i __A
)
3250 return (__m256i
)__builtin_ia32_selectw_256(__M
,
3251 (__v16hi
) _mm256_broadcastw_epi16(__A
),
3252 (__v16hi
) _mm256_setzero_si256());
3255 static __inline__ __m256i __DEFAULT_FN_ATTRS
3256 _mm256_mask_set1_epi16 (__m256i __O
, __mmask16 __M
, short __A
)
3258 return (__m256i
) __builtin_ia32_pbroadcastw256_gpr_mask (__A
,
3263 static __inline__ __m256i __DEFAULT_FN_ATTRS
3264 _mm256_maskz_set1_epi16 (__mmask16 __M
, short __A
)
3266 return (__m256i
) __builtin_ia32_pbroadcastw256_gpr_mask (__A
,
3267 (__v16hi
) _mm256_setzero_si256 (),
3271 static __inline__ __m128i __DEFAULT_FN_ATTRS
3272 _mm_mask_set1_epi16 (__m128i __O
, __mmask8 __M
, short __A
)
3274 return (__m128i
) __builtin_ia32_pbroadcastw128_gpr_mask (__A
,
3279 static __inline__ __m128i __DEFAULT_FN_ATTRS
3280 _mm_maskz_set1_epi16 (__mmask8 __M
, short __A
)
3282 return (__m128i
) __builtin_ia32_pbroadcastw128_gpr_mask (__A
,
3283 (__v8hi
) _mm_setzero_si128 (),
3287 static __inline__ __m128i __DEFAULT_FN_ATTRS
3288 _mm_permutexvar_epi16 (__m128i __A
, __m128i __B
)
3290 return (__m128i
) __builtin_ia32_permvarhi128_mask ((__v8hi
) __B
,
3292 (__v8hi
) _mm_undefined_si128 (),
3296 static __inline__ __m128i __DEFAULT_FN_ATTRS
3297 _mm_maskz_permutexvar_epi16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
3299 return (__m128i
) __builtin_ia32_permvarhi128_mask ((__v8hi
) __B
,
3301 (__v8hi
) _mm_setzero_si128 (),
3305 static __inline__ __m128i __DEFAULT_FN_ATTRS
3306 _mm_mask_permutexvar_epi16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
3309 return (__m128i
) __builtin_ia32_permvarhi128_mask ((__v8hi
) __B
,
3315 static __inline__ __m256i __DEFAULT_FN_ATTRS
3316 _mm256_permutexvar_epi16 (__m256i __A
, __m256i __B
)
3318 return (__m256i
) __builtin_ia32_permvarhi256_mask ((__v16hi
) __B
,
3320 (__v16hi
) _mm256_undefined_si256 (),
3324 static __inline__ __m256i __DEFAULT_FN_ATTRS
3325 _mm256_maskz_permutexvar_epi16 (__mmask16 __M
, __m256i __A
,
3328 return (__m256i
) __builtin_ia32_permvarhi256_mask ((__v16hi
) __B
,
3330 (__v16hi
) _mm256_setzero_si256 (),
3334 static __inline__ __m256i __DEFAULT_FN_ATTRS
3335 _mm256_mask_permutexvar_epi16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
3338 return (__m256i
) __builtin_ia32_permvarhi256_mask ((__v16hi
) __B
,
3344 #define _mm_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \
3345 (__m128i)__builtin_ia32_palignr128_mask((__v16qi)(__m128i)(A), \
3346 (__v16qi)(__m128i)(B), (int)(N), \
3347 (__v16qi)(__m128i)(W), \
3350 #define _mm_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \
3351 (__m128i)__builtin_ia32_palignr128_mask((__v16qi)(__m128i)(A), \
3352 (__v16qi)(__m128i)(B), (int)(N), \
3353 (__v16qi)_mm_setzero_si128(), \
3356 #define _mm256_mask_alignr_epi8(W, U, A, B, N) __extension__ ({ \
3357 (__m256i)__builtin_ia32_palignr256_mask((__v32qi)(__m256i)(A), \
3358 (__v32qi)(__m256i)(B), (int)(N), \
3359 (__v32qi)(__m256i)(W), \
3362 #define _mm256_maskz_alignr_epi8(U, A, B, N) __extension__ ({ \
3363 (__m256i)__builtin_ia32_palignr256_mask((__v32qi)(__m256i)(A), \
3364 (__v32qi)(__m256i)(B), (int)(N), \
3365 (__v32qi)_mm256_setzero_si256(), \
3368 #define _mm_dbsad_epu8(A, B, imm) __extension__ ({ \
3369 (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \
3370 (__v16qi)(__m128i)(B), (int)(imm), \
3371 (__v8hi)_mm_setzero_hi(), \
3374 #define _mm_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \
3375 (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \
3376 (__v16qi)(__m128i)(B), (int)(imm), \
3377 (__v8hi)(__m128i)(W), \
3380 #define _mm_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \
3381 (__m128i)__builtin_ia32_dbpsadbw128_mask((__v16qi)(__m128i)(A), \
3382 (__v16qi)(__m128i)(B), (int)(imm), \
3383 (__v8hi)_mm_setzero_si128(), \
3386 #define _mm256_dbsad_epu8(A, B, imm) __extension__ ({ \
3387 (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \
3388 (__v32qi)(__m256i)(B), (int)(imm), \
3389 (__v16hi)_mm256_setzero_si256(), \
3392 #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) __extension__ ({ \
3393 (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \
3394 (__v32qi)(__m256i)(B), (int)(imm), \
3395 (__v16hi)(__m256i)(W), \
3398 #define _mm256_maskz_dbsad_epu8(U, A, B, imm) __extension__ ({ \
3399 (__m256i)__builtin_ia32_dbpsadbw256_mask((__v32qi)(__m256i)(A), \
3400 (__v32qi)(__m256i)(B), (int)(imm), \
3401 (__v16hi)_mm256_setzero_si256(), \
3404 #undef __DEFAULT_FN_ATTRS
3406 #endif /* __AVX512VLBWINTRIN_H */