2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
15 #include "config/aom_dsp_rtcd.h"
17 #include "test/acm_random.h"
18 // Inlining not forced for the compiler due to some tests calling
19 // SIMD_INLINE functions via function pointers
21 #define SIMD_INLINE static inline
22 #include "aom_dsp/aom_simd.h"
23 #include "aom_dsp/simd/v256_intrinsics_c.h"
25 // Machine tuned code goes into this file. This file is included from
26 // simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros
27 // ARCH (=neon, sse2, ssse3, etc), SIMD_NAMESPACE and ARCH_POSTFIX().
30 // Disable "value of intrinsic immediate argument 'value' is out of range
31 // 'lowerbound - upperbound'" warning. Visual Studio emits this warning though
32 // the parameters are conditionally checked in e.g., v256_shr_n_byte. Adding a
33 // mask doesn't always appear to be sufficient.
34 #pragma warning(disable : 4556)
37 using libaom_test::ACMRandom
;
39 namespace SIMD_NAMESPACE
{
41 // Wrap templates around intrinsics using immediate values
43 v64
imm_v64_shl_n_byte(v64 a
) {
44 return v64_shl_n_byte(a
, shift
);
47 v64
imm_v64_shr_n_byte(v64 a
) {
48 return v64_shr_n_byte(a
, shift
);
51 v64
imm_v64_shl_n_8(v64 a
) {
52 return v64_shl_n_8(a
, shift
);
55 v64
imm_v64_shr_n_u8(v64 a
) {
56 return v64_shr_n_u8(a
, shift
);
59 v64
imm_v64_shr_n_s8(v64 a
) {
60 return v64_shr_n_s8(a
, shift
);
63 v64
imm_v64_shl_n_16(v64 a
) {
64 return v64_shl_n_16(a
, shift
);
67 v64
imm_v64_shr_n_u16(v64 a
) {
68 return v64_shr_n_u16(a
, shift
);
71 v64
imm_v64_shr_n_s16(v64 a
) {
72 return v64_shr_n_s16(a
, shift
);
75 v64
imm_v64_shl_n_32(v64 a
) {
76 return v64_shl_n_32(a
, shift
);
79 v64
imm_v64_shr_n_u32(v64 a
) {
80 return v64_shr_n_u32(a
, shift
);
83 v64
imm_v64_shr_n_s32(v64 a
) {
84 return v64_shr_n_s32(a
, shift
);
87 v64
imm_v64_align(v64 a
, v64 b
) {
88 return v64_align(a
, b
, shift
);
91 // Wrap templates around corresponding C implementations of the above
93 c_v64
c_imm_v64_shl_n_byte(c_v64 a
) {
94 return c_v64_shl_n_byte(a
, shift
);
97 c_v64
c_imm_v64_shr_n_byte(c_v64 a
) {
98 return c_v64_shr_n_byte(a
, shift
);
101 c_v64
c_imm_v64_shl_n_8(c_v64 a
) {
102 return c_v64_shl_n_8(a
, shift
);
105 c_v64
c_imm_v64_shr_n_u8(c_v64 a
) {
106 return c_v64_shr_n_u8(a
, shift
);
109 c_v64
c_imm_v64_shr_n_s8(c_v64 a
) {
110 return c_v64_shr_n_s8(a
, shift
);
113 c_v64
c_imm_v64_shl_n_16(c_v64 a
) {
114 return c_v64_shl_n_16(a
, shift
);
117 c_v64
c_imm_v64_shr_n_u16(c_v64 a
) {
118 return c_v64_shr_n_u16(a
, shift
);
121 c_v64
c_imm_v64_shr_n_s16(c_v64 a
) {
122 return c_v64_shr_n_s16(a
, shift
);
125 c_v64
c_imm_v64_shl_n_32(c_v64 a
) {
126 return c_v64_shl_n_32(a
, shift
);
129 c_v64
c_imm_v64_shr_n_u32(c_v64 a
) {
130 return c_v64_shr_n_u32(a
, shift
);
133 c_v64
c_imm_v64_shr_n_s32(c_v64 a
) {
134 return c_v64_shr_n_s32(a
, shift
);
137 c_v64
c_imm_v64_align(c_v64 a
, c_v64 b
) {
138 return c_v64_align(a
, b
, shift
);
142 v128
imm_v128_shl_n_byte(v128 a
) {
143 return v128_shl_n_byte(a
, shift
);
146 v128
imm_v128_shr_n_byte(v128 a
) {
147 return v128_shr_n_byte(a
, shift
);
150 v128
imm_v128_shl_n_8(v128 a
) {
151 return v128_shl_n_8(a
, shift
);
154 v128
imm_v128_shr_n_u8(v128 a
) {
155 return v128_shr_n_u8(a
, shift
);
158 v128
imm_v128_shr_n_s8(v128 a
) {
159 return v128_shr_n_s8(a
, shift
);
162 v128
imm_v128_shl_n_16(v128 a
) {
163 return v128_shl_n_16(a
, shift
);
166 v128
imm_v128_shr_n_u16(v128 a
) {
167 return v128_shr_n_u16(a
, shift
);
170 v128
imm_v128_shr_n_s16(v128 a
) {
171 return v128_shr_n_s16(a
, shift
);
174 v128
imm_v128_shl_n_32(v128 a
) {
175 return v128_shl_n_32(a
, shift
);
178 v128
imm_v128_shr_n_u32(v128 a
) {
179 return v128_shr_n_u32(a
, shift
);
182 v128
imm_v128_shr_n_s32(v128 a
) {
183 return v128_shr_n_s32(a
, shift
);
186 v128
imm_v128_shl_n_64(v128 a
) {
187 return v128_shl_n_64(a
, shift
);
190 v128
imm_v128_shr_n_u64(v128 a
) {
191 return v128_shr_n_u64(a
, shift
);
194 v128
imm_v128_shr_n_s64(v128 a
) {
195 return v128_shr_n_s64(a
, shift
);
198 v128
imm_v128_align(v128 a
, v128 b
) {
199 return v128_align(a
, b
, shift
);
203 c_v128
c_imm_v128_shl_n_byte(c_v128 a
) {
204 return c_v128_shl_n_byte(a
, shift
);
207 c_v128
c_imm_v128_shr_n_byte(c_v128 a
) {
208 return c_v128_shr_n_byte(a
, shift
);
211 c_v128
c_imm_v128_shl_n_8(c_v128 a
) {
212 return c_v128_shl_n_8(a
, shift
);
215 c_v128
c_imm_v128_shr_n_u8(c_v128 a
) {
216 return c_v128_shr_n_u8(a
, shift
);
219 c_v128
c_imm_v128_shr_n_s8(c_v128 a
) {
220 return c_v128_shr_n_s8(a
, shift
);
223 c_v128
c_imm_v128_shl_n_16(c_v128 a
) {
224 return c_v128_shl_n_16(a
, shift
);
227 c_v128
c_imm_v128_shr_n_u16(c_v128 a
) {
228 return c_v128_shr_n_u16(a
, shift
);
231 c_v128
c_imm_v128_shr_n_s16(c_v128 a
) {
232 return c_v128_shr_n_s16(a
, shift
);
235 c_v128
c_imm_v128_shl_n_32(c_v128 a
) {
236 return c_v128_shl_n_32(a
, shift
);
239 c_v128
c_imm_v128_shr_n_u32(c_v128 a
) {
240 return c_v128_shr_n_u32(a
, shift
);
243 c_v128
c_imm_v128_shr_n_s32(c_v128 a
) {
244 return c_v128_shr_n_s32(a
, shift
);
247 c_v128
c_imm_v128_shl_n_64(c_v128 a
) {
248 return c_v128_shl_n_64(a
, shift
);
251 c_v128
c_imm_v128_shr_n_u64(c_v128 a
) {
252 return c_v128_shr_n_u64(a
, shift
);
255 c_v128
c_imm_v128_shr_n_s64(c_v128 a
) {
256 return c_v128_shr_n_s64(a
, shift
);
259 c_v128
c_imm_v128_align(c_v128 a
, c_v128 b
) {
260 return c_v128_align(a
, b
, shift
);
264 v256
imm_v256_shl_n_word(v256 a
) {
265 return v256_shl_n_word(a
, shift
);
268 v256
imm_v256_shr_n_word(v256 a
) {
269 return v256_shr_n_word(a
, shift
);
272 v256
imm_v256_shl_n_byte(v256 a
) {
273 return v256_shl_n_byte(a
, shift
);
276 v256
imm_v256_shr_n_byte(v256 a
) {
277 return v256_shr_n_byte(a
, shift
);
280 v256
imm_v256_shl_n_8(v256 a
) {
281 return v256_shl_n_8(a
, shift
);
284 v256
imm_v256_shr_n_u8(v256 a
) {
285 return v256_shr_n_u8(a
, shift
);
288 v256
imm_v256_shr_n_s8(v256 a
) {
289 return v256_shr_n_s8(a
, shift
);
292 v256
imm_v256_shl_n_16(v256 a
) {
293 return v256_shl_n_16(a
, shift
);
296 v256
imm_v256_shr_n_u16(v256 a
) {
297 return v256_shr_n_u16(a
, shift
);
300 v256
imm_v256_shr_n_s16(v256 a
) {
301 return v256_shr_n_s16(a
, shift
);
304 v256
imm_v256_shl_n_32(v256 a
) {
305 return v256_shl_n_32(a
, shift
);
308 v256
imm_v256_shr_n_u32(v256 a
) {
309 return v256_shr_n_u32(a
, shift
);
312 v256
imm_v256_shr_n_s32(v256 a
) {
313 return v256_shr_n_s32(a
, shift
);
316 v256
imm_v256_shl_n_64(v256 a
) {
317 return v256_shl_n_64(a
, shift
);
320 v256
imm_v256_shr_n_u64(v256 a
) {
321 return v256_shr_n_u64(a
, shift
);
324 v256
imm_v256_shr_n_s64(v256 a
) {
325 return v256_shr_n_s64(a
, shift
);
328 v256
imm_v256_align(v256 a
, v256 b
) {
329 return v256_align(a
, b
, shift
);
333 c_v256
c_imm_v256_shl_n_word(c_v256 a
) {
334 return c_v256_shl_n_word(a
, shift
);
337 c_v256
c_imm_v256_shr_n_word(c_v256 a
) {
338 return c_v256_shr_n_word(a
, shift
);
341 c_v256
c_imm_v256_shl_n_byte(c_v256 a
) {
342 return c_v256_shl_n_byte(a
, shift
);
345 c_v256
c_imm_v256_shr_n_byte(c_v256 a
) {
346 return c_v256_shr_n_byte(a
, shift
);
349 c_v256
c_imm_v256_shl_n_8(c_v256 a
) {
350 return c_v256_shl_n_8(a
, shift
);
353 c_v256
c_imm_v256_shr_n_u8(c_v256 a
) {
354 return c_v256_shr_n_u8(a
, shift
);
357 c_v256
c_imm_v256_shr_n_s8(c_v256 a
) {
358 return c_v256_shr_n_s8(a
, shift
);
361 c_v256
c_imm_v256_shl_n_16(c_v256 a
) {
362 return c_v256_shl_n_16(a
, shift
);
365 c_v256
c_imm_v256_shr_n_u16(c_v256 a
) {
366 return c_v256_shr_n_u16(a
, shift
);
369 c_v256
c_imm_v256_shr_n_s16(c_v256 a
) {
370 return c_v256_shr_n_s16(a
, shift
);
373 c_v256
c_imm_v256_shl_n_32(c_v256 a
) {
374 return c_v256_shl_n_32(a
, shift
);
377 c_v256
c_imm_v256_shr_n_u32(c_v256 a
) {
378 return c_v256_shr_n_u32(a
, shift
);
381 c_v256
c_imm_v256_shr_n_s32(c_v256 a
) {
382 return c_v256_shr_n_s32(a
, shift
);
385 c_v256
c_imm_v256_shl_n_64(c_v256 a
) {
386 return c_v256_shl_n_64(a
, shift
);
389 c_v256
c_imm_v256_shr_n_u64(c_v256 a
) {
390 return c_v256_shr_n_u64(a
, shift
);
393 c_v256
c_imm_v256_shr_n_s64(c_v256 a
) {
394 return c_v256_shr_n_s64(a
, shift
);
397 c_v256
c_imm_v256_align(c_v256 a
, c_v256 b
) {
398 return c_v256_align(a
, b
, shift
);
401 // Wrappers around the the SAD and SSD functions
402 uint32_t v64_sad_u8(v64 a
, v64 b
) {
403 return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a
, b
));
405 uint32_t v64_ssd_u8(v64 a
, v64 b
) {
406 return v64_ssd_u8_sum(::v64_ssd_u8(v64_ssd_u8_init(), a
, b
));
409 uint32_t c_v64_sad_u8(c_v64 a
, c_v64 b
) {
410 return c_v64_sad_u8_sum(::c_v64_sad_u8(c_v64_sad_u8_init(), a
, b
));
412 uint32_t c_v64_ssd_u8(c_v64 a
, c_v64 b
) {
413 return c_v64_ssd_u8_sum(::c_v64_ssd_u8(c_v64_ssd_u8_init(), a
, b
));
415 uint32_t v128_sad_u8(v128 a
, v128 b
) {
416 return v128_sad_u8_sum(::v128_sad_u8(v128_sad_u8_init(), a
, b
));
418 uint32_t v128_ssd_u8(v128 a
, v128 b
) {
419 return v128_ssd_u8_sum(::v128_ssd_u8(v128_ssd_u8_init(), a
, b
));
421 uint32_t c_v128_sad_u8(c_v128 a
, c_v128 b
) {
422 return c_v128_sad_u8_sum(::c_v128_sad_u8(c_v128_sad_u8_init(), a
, b
));
424 uint32_t c_v128_ssd_u8(c_v128 a
, c_v128 b
) {
425 return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a
, b
));
427 uint32_t v128_sad_u16(v128 a
, v128 b
) {
428 return v128_sad_u16_sum(::v128_sad_u16(v128_sad_u16_init(), a
, b
));
430 uint64_t v128_ssd_s16(v128 a
, v128 b
) {
431 return v128_ssd_s16_sum(::v128_ssd_s16(v128_ssd_s16_init(), a
, b
));
433 uint32_t c_v128_sad_u16(c_v128 a
, c_v128 b
) {
434 return c_v128_sad_u16_sum(::c_v128_sad_u16(c_v128_sad_u16_init(), a
, b
));
436 uint64_t c_v128_ssd_s16(c_v128 a
, c_v128 b
) {
437 return c_v128_ssd_s16_sum(::c_v128_ssd_s16(c_v128_ssd_s16_init(), a
, b
));
439 uint32_t v256_sad_u8(v256 a
, v256 b
) {
440 return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a
, b
));
442 uint32_t v256_ssd_u8(v256 a
, v256 b
) {
443 return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a
, b
));
445 uint32_t c_v256_sad_u8(c_v256 a
, c_v256 b
) {
446 return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a
, b
));
448 uint32_t c_v256_ssd_u8(c_v256 a
, c_v256 b
) {
449 return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a
, b
));
451 uint32_t v256_sad_u16(v256 a
, v256 b
) {
452 return v256_sad_u16_sum(::v256_sad_u16(v256_sad_u16_init(), a
, b
));
454 uint64_t v256_ssd_s16(v256 a
, v256 b
) {
455 return v256_ssd_s16_sum(::v256_ssd_s16(v256_ssd_s16_init(), a
, b
));
457 uint32_t c_v256_sad_u16(c_v256 a
, c_v256 b
) {
458 return c_v256_sad_u16_sum(::c_v256_sad_u16(c_v256_sad_u16_init(), a
, b
));
460 uint64_t c_v256_ssd_s16(c_v256 a
, c_v256 b
) {
461 return c_v256_ssd_s16_sum(::c_v256_ssd_s16(c_v256_ssd_s16_init(), a
, b
));
466 typedef void (*fptr
)();
475 { #name, reinterpret_cast < fptr>(c_##name), reinterpret_cast < fptr>(name) }
477 const mapping m
[] = { MAP(v64_sad_u8
),
498 MAP(v64_pack_s32_u16
),
499 MAP(v64_pack_s32_s16
),
500 MAP(v64_pack_s16_u8
),
501 MAP(v64_pack_s16_s8
),
532 MAP(imm_v64_align
<1>),
533 MAP(imm_v64_align
<2>),
534 MAP(imm_v64_align
<3>),
535 MAP(imm_v64_align
<4>),
536 MAP(imm_v64_align
<5>),
537 MAP(imm_v64_align
<6>),
538 MAP(imm_v64_align
<7>),
541 MAP(v64_unpacklo_u8_s16
),
542 MAP(v64_unpackhi_u8_s16
),
543 MAP(v64_unpacklo_s8_s16
),
544 MAP(v64_unpackhi_s8_s16
),
545 MAP(v64_unpacklo_u16_s32
),
546 MAP(v64_unpacklo_s16_s32
),
547 MAP(v64_unpackhi_u16_s32
),
548 MAP(v64_unpackhi_s16_s32
),
549 MAP(imm_v64_shr_n_byte
<1>),
550 MAP(imm_v64_shr_n_byte
<2>),
551 MAP(imm_v64_shr_n_byte
<3>),
552 MAP(imm_v64_shr_n_byte
<4>),
553 MAP(imm_v64_shr_n_byte
<5>),
554 MAP(imm_v64_shr_n_byte
<6>),
555 MAP(imm_v64_shr_n_byte
<7>),
556 MAP(imm_v64_shl_n_byte
<1>),
557 MAP(imm_v64_shl_n_byte
<2>),
558 MAP(imm_v64_shl_n_byte
<3>),
559 MAP(imm_v64_shl_n_byte
<4>),
560 MAP(imm_v64_shl_n_byte
<5>),
561 MAP(imm_v64_shl_n_byte
<6>),
562 MAP(imm_v64_shl_n_byte
<7>),
563 MAP(imm_v64_shl_n_8
<1>),
564 MAP(imm_v64_shl_n_8
<2>),
565 MAP(imm_v64_shl_n_8
<3>),
566 MAP(imm_v64_shl_n_8
<4>),
567 MAP(imm_v64_shl_n_8
<5>),
568 MAP(imm_v64_shl_n_8
<6>),
569 MAP(imm_v64_shl_n_8
<7>),
570 MAP(imm_v64_shr_n_u8
<1>),
571 MAP(imm_v64_shr_n_u8
<2>),
572 MAP(imm_v64_shr_n_u8
<3>),
573 MAP(imm_v64_shr_n_u8
<4>),
574 MAP(imm_v64_shr_n_u8
<5>),
575 MAP(imm_v64_shr_n_u8
<6>),
576 MAP(imm_v64_shr_n_u8
<7>),
577 MAP(imm_v64_shr_n_s8
<1>),
578 MAP(imm_v64_shr_n_s8
<2>),
579 MAP(imm_v64_shr_n_s8
<3>),
580 MAP(imm_v64_shr_n_s8
<4>),
581 MAP(imm_v64_shr_n_s8
<5>),
582 MAP(imm_v64_shr_n_s8
<6>),
583 MAP(imm_v64_shr_n_s8
<7>),
584 MAP(imm_v64_shl_n_16
<1>),
585 MAP(imm_v64_shl_n_16
<2>),
586 MAP(imm_v64_shl_n_16
<4>),
587 MAP(imm_v64_shl_n_16
<6>),
588 MAP(imm_v64_shl_n_16
<8>),
589 MAP(imm_v64_shl_n_16
<10>),
590 MAP(imm_v64_shl_n_16
<12>),
591 MAP(imm_v64_shl_n_16
<14>),
592 MAP(imm_v64_shr_n_u16
<1>),
593 MAP(imm_v64_shr_n_u16
<2>),
594 MAP(imm_v64_shr_n_u16
<4>),
595 MAP(imm_v64_shr_n_u16
<6>),
596 MAP(imm_v64_shr_n_u16
<8>),
597 MAP(imm_v64_shr_n_u16
<10>),
598 MAP(imm_v64_shr_n_u16
<12>),
599 MAP(imm_v64_shr_n_u16
<14>),
600 MAP(imm_v64_shr_n_s16
<1>),
601 MAP(imm_v64_shr_n_s16
<2>),
602 MAP(imm_v64_shr_n_s16
<4>),
603 MAP(imm_v64_shr_n_s16
<6>),
604 MAP(imm_v64_shr_n_s16
<8>),
605 MAP(imm_v64_shr_n_s16
<10>),
606 MAP(imm_v64_shr_n_s16
<12>),
607 MAP(imm_v64_shr_n_s16
<14>),
608 MAP(imm_v64_shl_n_32
<1>),
609 MAP(imm_v64_shl_n_32
<4>),
610 MAP(imm_v64_shl_n_32
<8>),
611 MAP(imm_v64_shl_n_32
<12>),
612 MAP(imm_v64_shl_n_32
<16>),
613 MAP(imm_v64_shl_n_32
<20>),
614 MAP(imm_v64_shl_n_32
<24>),
615 MAP(imm_v64_shl_n_32
<28>),
616 MAP(imm_v64_shr_n_u32
<1>),
617 MAP(imm_v64_shr_n_u32
<4>),
618 MAP(imm_v64_shr_n_u32
<8>),
619 MAP(imm_v64_shr_n_u32
<12>),
620 MAP(imm_v64_shr_n_u32
<16>),
621 MAP(imm_v64_shr_n_u32
<20>),
622 MAP(imm_v64_shr_n_u32
<24>),
623 MAP(imm_v64_shr_n_u32
<28>),
624 MAP(imm_v64_shr_n_s32
<1>),
625 MAP(imm_v64_shr_n_s32
<4>),
626 MAP(imm_v64_shr_n_s32
<8>),
627 MAP(imm_v64_shr_n_s32
<12>),
628 MAP(imm_v64_shr_n_s32
<16>),
629 MAP(imm_v64_shr_n_s32
<20>),
630 MAP(imm_v64_shr_n_s32
<24>),
631 MAP(imm_v64_shr_n_s32
<28>),
685 MAP(v128_unziphi_16
),
686 MAP(v128_unziplo_16
),
687 MAP(v128_unziphi_32
),
688 MAP(v128_unziplo_32
),
689 MAP(v128_pack_s32_u16
),
690 MAP(v128_pack_s32_s16
),
691 MAP(v128_pack_s16_u8
),
692 MAP(v128_pack_s16_s8
),
724 MAP(imm_v128_align
<1>),
725 MAP(imm_v128_align
<2>),
726 MAP(imm_v128_align
<3>),
727 MAP(imm_v128_align
<4>),
728 MAP(imm_v128_align
<5>),
729 MAP(imm_v128_align
<6>),
730 MAP(imm_v128_align
<7>),
731 MAP(imm_v128_align
<8>),
732 MAP(imm_v128_align
<9>),
733 MAP(imm_v128_align
<10>),
734 MAP(imm_v128_align
<11>),
735 MAP(imm_v128_align
<12>),
736 MAP(imm_v128_align
<13>),
737 MAP(imm_v128_align
<14>),
738 MAP(imm_v128_align
<15>),
743 MAP(v128_unpacklo_u16_s32
),
744 MAP(v128_unpacklo_s16_s32
),
745 MAP(v128_unpackhi_u16_s32
),
746 MAP(v128_unpackhi_s16_s32
),
747 MAP(imm_v128_shr_n_byte
<1>),
748 MAP(imm_v128_shr_n_byte
<2>),
749 MAP(imm_v128_shr_n_byte
<3>),
750 MAP(imm_v128_shr_n_byte
<4>),
751 MAP(imm_v128_shr_n_byte
<5>),
752 MAP(imm_v128_shr_n_byte
<6>),
753 MAP(imm_v128_shr_n_byte
<7>),
754 MAP(imm_v128_shr_n_byte
<8>),
755 MAP(imm_v128_shr_n_byte
<9>),
756 MAP(imm_v128_shr_n_byte
<10>),
757 MAP(imm_v128_shr_n_byte
<11>),
758 MAP(imm_v128_shr_n_byte
<12>),
759 MAP(imm_v128_shr_n_byte
<13>),
760 MAP(imm_v128_shr_n_byte
<14>),
761 MAP(imm_v128_shr_n_byte
<15>),
762 MAP(imm_v128_shl_n_byte
<1>),
763 MAP(imm_v128_shl_n_byte
<2>),
764 MAP(imm_v128_shl_n_byte
<3>),
765 MAP(imm_v128_shl_n_byte
<4>),
766 MAP(imm_v128_shl_n_byte
<5>),
767 MAP(imm_v128_shl_n_byte
<6>),
768 MAP(imm_v128_shl_n_byte
<7>),
769 MAP(imm_v128_shl_n_byte
<8>),
770 MAP(imm_v128_shl_n_byte
<9>),
771 MAP(imm_v128_shl_n_byte
<10>),
772 MAP(imm_v128_shl_n_byte
<11>),
773 MAP(imm_v128_shl_n_byte
<12>),
774 MAP(imm_v128_shl_n_byte
<13>),
775 MAP(imm_v128_shl_n_byte
<14>),
776 MAP(imm_v128_shl_n_byte
<15>),
777 MAP(imm_v128_shl_n_8
<1>),
778 MAP(imm_v128_shl_n_8
<2>),
779 MAP(imm_v128_shl_n_8
<3>),
780 MAP(imm_v128_shl_n_8
<4>),
781 MAP(imm_v128_shl_n_8
<5>),
782 MAP(imm_v128_shl_n_8
<6>),
783 MAP(imm_v128_shl_n_8
<7>),
784 MAP(imm_v128_shr_n_u8
<1>),
785 MAP(imm_v128_shr_n_u8
<2>),
786 MAP(imm_v128_shr_n_u8
<3>),
787 MAP(imm_v128_shr_n_u8
<4>),
788 MAP(imm_v128_shr_n_u8
<5>),
789 MAP(imm_v128_shr_n_u8
<6>),
790 MAP(imm_v128_shr_n_u8
<7>),
791 MAP(imm_v128_shr_n_s8
<1>),
792 MAP(imm_v128_shr_n_s8
<2>),
793 MAP(imm_v128_shr_n_s8
<3>),
794 MAP(imm_v128_shr_n_s8
<4>),
795 MAP(imm_v128_shr_n_s8
<5>),
796 MAP(imm_v128_shr_n_s8
<6>),
797 MAP(imm_v128_shr_n_s8
<7>),
798 MAP(imm_v128_shl_n_16
<1>),
799 MAP(imm_v128_shl_n_16
<2>),
800 MAP(imm_v128_shl_n_16
<4>),
801 MAP(imm_v128_shl_n_16
<6>),
802 MAP(imm_v128_shl_n_16
<8>),
803 MAP(imm_v128_shl_n_16
<10>),
804 MAP(imm_v128_shl_n_16
<12>),
805 MAP(imm_v128_shl_n_16
<14>),
806 MAP(imm_v128_shr_n_u16
<1>),
807 MAP(imm_v128_shr_n_u16
<2>),
808 MAP(imm_v128_shr_n_u16
<4>),
809 MAP(imm_v128_shr_n_u16
<6>),
810 MAP(imm_v128_shr_n_u16
<8>),
811 MAP(imm_v128_shr_n_u16
<10>),
812 MAP(imm_v128_shr_n_u16
<12>),
813 MAP(imm_v128_shr_n_u16
<14>),
814 MAP(imm_v128_shr_n_s16
<1>),
815 MAP(imm_v128_shr_n_s16
<2>),
816 MAP(imm_v128_shr_n_s16
<4>),
817 MAP(imm_v128_shr_n_s16
<6>),
818 MAP(imm_v128_shr_n_s16
<8>),
819 MAP(imm_v128_shr_n_s16
<10>),
820 MAP(imm_v128_shr_n_s16
<12>),
821 MAP(imm_v128_shr_n_s16
<14>),
822 MAP(imm_v128_shl_n_32
<1>),
823 MAP(imm_v128_shl_n_32
<4>),
824 MAP(imm_v128_shl_n_32
<8>),
825 MAP(imm_v128_shl_n_32
<12>),
826 MAP(imm_v128_shl_n_32
<16>),
827 MAP(imm_v128_shl_n_32
<20>),
828 MAP(imm_v128_shl_n_32
<24>),
829 MAP(imm_v128_shl_n_32
<28>),
830 MAP(imm_v128_shr_n_u32
<1>),
831 MAP(imm_v128_shr_n_u32
<4>),
832 MAP(imm_v128_shr_n_u32
<8>),
833 MAP(imm_v128_shr_n_u32
<12>),
834 MAP(imm_v128_shr_n_u32
<16>),
835 MAP(imm_v128_shr_n_u32
<20>),
836 MAP(imm_v128_shr_n_u32
<24>),
837 MAP(imm_v128_shr_n_u32
<28>),
838 MAP(imm_v128_shr_n_s32
<1>),
839 MAP(imm_v128_shr_n_s32
<4>),
840 MAP(imm_v128_shr_n_s32
<8>),
841 MAP(imm_v128_shr_n_s32
<12>),
842 MAP(imm_v128_shr_n_s32
<16>),
843 MAP(imm_v128_shr_n_s32
<20>),
844 MAP(imm_v128_shr_n_s32
<24>),
845 MAP(imm_v128_shr_n_s32
<28>),
846 MAP(imm_v128_shl_n_64
<1>),
847 MAP(imm_v128_shl_n_64
<4>),
848 MAP(imm_v128_shl_n_64
<8>),
849 MAP(imm_v128_shl_n_64
<12>),
850 MAP(imm_v128_shl_n_64
<16>),
851 MAP(imm_v128_shl_n_64
<20>),
852 MAP(imm_v128_shl_n_64
<24>),
853 MAP(imm_v128_shl_n_64
<28>),
854 MAP(imm_v128_shl_n_64
<32>),
855 MAP(imm_v128_shl_n_64
<36>),
856 MAP(imm_v128_shl_n_64
<40>),
857 MAP(imm_v128_shl_n_64
<44>),
858 MAP(imm_v128_shl_n_64
<48>),
859 MAP(imm_v128_shl_n_64
<52>),
860 MAP(imm_v128_shl_n_64
<56>),
861 MAP(imm_v128_shl_n_64
<60>),
862 MAP(imm_v128_shr_n_u64
<1>),
863 MAP(imm_v128_shr_n_u64
<4>),
864 MAP(imm_v128_shr_n_u64
<8>),
865 MAP(imm_v128_shr_n_u64
<12>),
866 MAP(imm_v128_shr_n_u64
<16>),
867 MAP(imm_v128_shr_n_u64
<20>),
868 MAP(imm_v128_shr_n_u64
<24>),
869 MAP(imm_v128_shr_n_u64
<28>),
870 MAP(imm_v128_shr_n_u64
<32>),
871 MAP(imm_v128_shr_n_u64
<36>),
872 MAP(imm_v128_shr_n_u64
<40>),
873 MAP(imm_v128_shr_n_u64
<44>),
874 MAP(imm_v128_shr_n_u64
<48>),
875 MAP(imm_v128_shr_n_u64
<52>),
876 MAP(imm_v128_shr_n_u64
<56>),
877 MAP(imm_v128_shr_n_u64
<60>),
878 MAP(imm_v128_shr_n_s64
<1>),
879 MAP(imm_v128_shr_n_s64
<4>),
880 MAP(imm_v128_shr_n_s64
<8>),
881 MAP(imm_v128_shr_n_s64
<12>),
882 MAP(imm_v128_shr_n_s64
<16>),
883 MAP(imm_v128_shr_n_s64
<20>),
884 MAP(imm_v128_shr_n_s64
<24>),
885 MAP(imm_v128_shr_n_s64
<28>),
886 MAP(imm_v128_shr_n_s64
<32>),
887 MAP(imm_v128_shr_n_s64
<36>),
888 MAP(imm_v128_shr_n_s64
<40>),
889 MAP(imm_v128_shr_n_s64
<44>),
890 MAP(imm_v128_shr_n_s64
<48>),
891 MAP(imm_v128_shr_n_s64
<52>),
892 MAP(imm_v128_shr_n_s64
<56>),
893 MAP(imm_v128_shr_n_s64
<60>),
899 MAP(v128_unpack_u8_s16
),
900 MAP(v128_unpack_s8_s16
),
901 MAP(v128_unpack_u16_s32
),
902 MAP(v128_unpack_s16_s32
),
924 MAP(v128_movemask_8
),
930 MAP(v128_unpacklo_u8_s16
),
931 MAP(v128_unpackhi_u8_s16
),
932 MAP(v128_unpacklo_s8_s16
),
933 MAP(v128_unpackhi_s8_s16
),
935 MAP(u32_load_unaligned
),
936 MAP(u32_store_unaligned
),
937 MAP(v64_load_unaligned
),
938 MAP(v64_store_unaligned
),
939 MAP(v128_load_unaligned
),
940 MAP(v128_store_unaligned
),
975 MAP(v256_unziphi_16
),
976 MAP(v256_unziplo_16
),
977 MAP(v256_unziphi_32
),
978 MAP(v256_unziplo_32
),
979 MAP(v256_unziphi_64
),
980 MAP(v256_unziplo_64
),
981 MAP(v256_pack_s32_u16
),
982 MAP(v256_pack_s32_s16
),
983 MAP(v256_pack_s16_u8
),
984 MAP(v256_pack_s16_s8
),
1009 MAP(v256_cmpgt_s16
),
1010 MAP(v256_cmplt_s16
),
1012 MAP(v256_cmpgt_s32
),
1013 MAP(v256_cmplt_s32
),
1015 MAP(v256_shuffle_8
),
1016 MAP(v256_pshuffle_8
),
1017 MAP(v256_wideshuffle_8
),
1018 MAP(imm_v256_align
<1>),
1019 MAP(imm_v256_align
<2>),
1020 MAP(imm_v256_align
<3>),
1021 MAP(imm_v256_align
<4>),
1022 MAP(imm_v256_align
<5>),
1023 MAP(imm_v256_align
<6>),
1024 MAP(imm_v256_align
<7>),
1025 MAP(imm_v256_align
<8>),
1026 MAP(imm_v256_align
<9>),
1027 MAP(imm_v256_align
<10>),
1028 MAP(imm_v256_align
<11>),
1029 MAP(imm_v256_align
<12>),
1030 MAP(imm_v256_align
<13>),
1031 MAP(imm_v256_align
<14>),
1032 MAP(imm_v256_align
<15>),
1033 MAP(imm_v256_align
<16>),
1034 MAP(imm_v256_align
<17>),
1035 MAP(imm_v256_align
<18>),
1036 MAP(imm_v256_align
<19>),
1037 MAP(imm_v256_align
<20>),
1038 MAP(imm_v256_align
<21>),
1039 MAP(imm_v256_align
<22>),
1040 MAP(imm_v256_align
<23>),
1041 MAP(imm_v256_align
<24>),
1042 MAP(imm_v256_align
<25>),
1043 MAP(imm_v256_align
<26>),
1044 MAP(imm_v256_align
<27>),
1045 MAP(imm_v256_align
<28>),
1046 MAP(imm_v256_align
<29>),
1047 MAP(imm_v256_align
<30>),
1048 MAP(imm_v256_align
<31>),
1049 MAP(v256_from_v128
),
1054 MAP(v256_unpack_u8_s16
),
1055 MAP(v256_unpack_s8_s16
),
1056 MAP(v256_unpack_u16_s32
),
1057 MAP(v256_unpack_s16_s32
),
1074 MAP(v256_unpacklo_u16_s32
),
1075 MAP(v256_unpacklo_s16_s32
),
1076 MAP(v256_unpackhi_u16_s32
),
1077 MAP(v256_unpackhi_s16_s32
),
1078 MAP(imm_v256_shr_n_word
<1>),
1079 MAP(imm_v256_shr_n_word
<2>),
1080 MAP(imm_v256_shr_n_word
<3>),
1081 MAP(imm_v256_shr_n_word
<4>),
1082 MAP(imm_v256_shr_n_word
<5>),
1083 MAP(imm_v256_shr_n_word
<6>),
1084 MAP(imm_v256_shr_n_word
<7>),
1085 MAP(imm_v256_shr_n_word
<8>),
1086 MAP(imm_v256_shr_n_word
<9>),
1087 MAP(imm_v256_shr_n_word
<10>),
1088 MAP(imm_v256_shr_n_word
<11>),
1089 MAP(imm_v256_shr_n_word
<12>),
1090 MAP(imm_v256_shr_n_word
<13>),
1091 MAP(imm_v256_shr_n_word
<14>),
1092 MAP(imm_v256_shr_n_word
<15>),
1093 MAP(imm_v256_shl_n_word
<1>),
1094 MAP(imm_v256_shl_n_word
<2>),
1095 MAP(imm_v256_shl_n_word
<3>),
1096 MAP(imm_v256_shl_n_word
<4>),
1097 MAP(imm_v256_shl_n_word
<5>),
1098 MAP(imm_v256_shl_n_word
<6>),
1099 MAP(imm_v256_shl_n_word
<7>),
1100 MAP(imm_v256_shl_n_word
<8>),
1101 MAP(imm_v256_shl_n_word
<9>),
1102 MAP(imm_v256_shl_n_word
<10>),
1103 MAP(imm_v256_shl_n_word
<11>),
1104 MAP(imm_v256_shl_n_word
<12>),
1105 MAP(imm_v256_shl_n_word
<13>),
1106 MAP(imm_v256_shl_n_word
<14>),
1107 MAP(imm_v256_shl_n_word
<15>),
1108 MAP(imm_v256_shr_n_byte
<1>),
1109 MAP(imm_v256_shr_n_byte
<2>),
1110 MAP(imm_v256_shr_n_byte
<3>),
1111 MAP(imm_v256_shr_n_byte
<4>),
1112 MAP(imm_v256_shr_n_byte
<5>),
1113 MAP(imm_v256_shr_n_byte
<6>),
1114 MAP(imm_v256_shr_n_byte
<7>),
1115 MAP(imm_v256_shr_n_byte
<8>),
1116 MAP(imm_v256_shr_n_byte
<9>),
1117 MAP(imm_v256_shr_n_byte
<10>),
1118 MAP(imm_v256_shr_n_byte
<11>),
1119 MAP(imm_v256_shr_n_byte
<12>),
1120 MAP(imm_v256_shr_n_byte
<13>),
1121 MAP(imm_v256_shr_n_byte
<14>),
1122 MAP(imm_v256_shr_n_byte
<15>),
1123 MAP(imm_v256_shr_n_byte
<16>),
1124 MAP(imm_v256_shr_n_byte
<17>),
1125 MAP(imm_v256_shr_n_byte
<18>),
1126 MAP(imm_v256_shr_n_byte
<19>),
1127 MAP(imm_v256_shr_n_byte
<20>),
1128 MAP(imm_v256_shr_n_byte
<21>),
1129 MAP(imm_v256_shr_n_byte
<22>),
1130 MAP(imm_v256_shr_n_byte
<23>),
1131 MAP(imm_v256_shr_n_byte
<24>),
1132 MAP(imm_v256_shr_n_byte
<25>),
1133 MAP(imm_v256_shr_n_byte
<26>),
1134 MAP(imm_v256_shr_n_byte
<27>),
1135 MAP(imm_v256_shr_n_byte
<28>),
1136 MAP(imm_v256_shr_n_byte
<29>),
1137 MAP(imm_v256_shr_n_byte
<30>),
1138 MAP(imm_v256_shr_n_byte
<31>),
1139 MAP(imm_v256_shl_n_byte
<1>),
1140 MAP(imm_v256_shl_n_byte
<2>),
1141 MAP(imm_v256_shl_n_byte
<3>),
1142 MAP(imm_v256_shl_n_byte
<4>),
1143 MAP(imm_v256_shl_n_byte
<5>),
1144 MAP(imm_v256_shl_n_byte
<6>),
1145 MAP(imm_v256_shl_n_byte
<7>),
1146 MAP(imm_v256_shl_n_byte
<8>),
1147 MAP(imm_v256_shl_n_byte
<9>),
1148 MAP(imm_v256_shl_n_byte
<10>),
1149 MAP(imm_v256_shl_n_byte
<11>),
1150 MAP(imm_v256_shl_n_byte
<12>),
1151 MAP(imm_v256_shl_n_byte
<13>),
1152 MAP(imm_v256_shl_n_byte
<14>),
1153 MAP(imm_v256_shl_n_byte
<15>),
1154 MAP(imm_v256_shl_n_byte
<16>),
1155 MAP(imm_v256_shl_n_byte
<17>),
1156 MAP(imm_v256_shl_n_byte
<18>),
1157 MAP(imm_v256_shl_n_byte
<19>),
1158 MAP(imm_v256_shl_n_byte
<20>),
1159 MAP(imm_v256_shl_n_byte
<21>),
1160 MAP(imm_v256_shl_n_byte
<22>),
1161 MAP(imm_v256_shl_n_byte
<23>),
1162 MAP(imm_v256_shl_n_byte
<24>),
1163 MAP(imm_v256_shl_n_byte
<25>),
1164 MAP(imm_v256_shl_n_byte
<26>),
1165 MAP(imm_v256_shl_n_byte
<27>),
1166 MAP(imm_v256_shl_n_byte
<28>),
1167 MAP(imm_v256_shl_n_byte
<29>),
1168 MAP(imm_v256_shl_n_byte
<30>),
1169 MAP(imm_v256_shl_n_byte
<31>),
1170 MAP(imm_v256_shl_n_8
<1>),
1171 MAP(imm_v256_shl_n_8
<2>),
1172 MAP(imm_v256_shl_n_8
<3>),
1173 MAP(imm_v256_shl_n_8
<4>),
1174 MAP(imm_v256_shl_n_8
<5>),
1175 MAP(imm_v256_shl_n_8
<6>),
1176 MAP(imm_v256_shl_n_8
<7>),
1177 MAP(imm_v256_shr_n_u8
<1>),
1178 MAP(imm_v256_shr_n_u8
<2>),
1179 MAP(imm_v256_shr_n_u8
<3>),
1180 MAP(imm_v256_shr_n_u8
<4>),
1181 MAP(imm_v256_shr_n_u8
<5>),
1182 MAP(imm_v256_shr_n_u8
<6>),
1183 MAP(imm_v256_shr_n_u8
<7>),
1184 MAP(imm_v256_shr_n_s8
<1>),
1185 MAP(imm_v256_shr_n_s8
<2>),
1186 MAP(imm_v256_shr_n_s8
<3>),
1187 MAP(imm_v256_shr_n_s8
<4>),
1188 MAP(imm_v256_shr_n_s8
<5>),
1189 MAP(imm_v256_shr_n_s8
<6>),
1190 MAP(imm_v256_shr_n_s8
<7>),
1191 MAP(imm_v256_shl_n_16
<1>),
1192 MAP(imm_v256_shl_n_16
<2>),
1193 MAP(imm_v256_shl_n_16
<4>),
1194 MAP(imm_v256_shl_n_16
<6>),
1195 MAP(imm_v256_shl_n_16
<8>),
1196 MAP(imm_v256_shl_n_16
<10>),
1197 MAP(imm_v256_shl_n_16
<12>),
1198 MAP(imm_v256_shl_n_16
<14>),
1199 MAP(imm_v256_shr_n_u16
<1>),
1200 MAP(imm_v256_shr_n_u16
<2>),
1201 MAP(imm_v256_shr_n_u16
<4>),
1202 MAP(imm_v256_shr_n_u16
<6>),
1203 MAP(imm_v256_shr_n_u16
<8>),
1204 MAP(imm_v256_shr_n_u16
<10>),
1205 MAP(imm_v256_shr_n_u16
<12>),
1206 MAP(imm_v256_shr_n_u16
<14>),
1207 MAP(imm_v256_shr_n_s16
<1>),
1208 MAP(imm_v256_shr_n_s16
<2>),
1209 MAP(imm_v256_shr_n_s16
<4>),
1210 MAP(imm_v256_shr_n_s16
<6>),
1211 MAP(imm_v256_shr_n_s16
<8>),
1212 MAP(imm_v256_shr_n_s16
<10>),
1213 MAP(imm_v256_shr_n_s16
<12>),
1214 MAP(imm_v256_shr_n_s16
<14>),
1215 MAP(imm_v256_shl_n_32
<1>),
1216 MAP(imm_v256_shl_n_32
<4>),
1217 MAP(imm_v256_shl_n_32
<8>),
1218 MAP(imm_v256_shl_n_32
<12>),
1219 MAP(imm_v256_shl_n_32
<16>),
1220 MAP(imm_v256_shl_n_32
<20>),
1221 MAP(imm_v256_shl_n_32
<24>),
1222 MAP(imm_v256_shl_n_32
<28>),
1223 MAP(imm_v256_shr_n_u32
<1>),
1224 MAP(imm_v256_shr_n_u32
<4>),
1225 MAP(imm_v256_shr_n_u32
<8>),
1226 MAP(imm_v256_shr_n_u32
<12>),
1227 MAP(imm_v256_shr_n_u32
<16>),
1228 MAP(imm_v256_shr_n_u32
<20>),
1229 MAP(imm_v256_shr_n_u32
<24>),
1230 MAP(imm_v256_shr_n_u32
<28>),
1231 MAP(imm_v256_shr_n_s32
<1>),
1232 MAP(imm_v256_shr_n_s32
<4>),
1233 MAP(imm_v256_shr_n_s32
<8>),
1234 MAP(imm_v256_shr_n_s32
<12>),
1235 MAP(imm_v256_shr_n_s32
<16>),
1236 MAP(imm_v256_shr_n_s32
<20>),
1237 MAP(imm_v256_shr_n_s32
<24>),
1238 MAP(imm_v256_shr_n_s32
<28>),
1239 MAP(imm_v256_shl_n_64
<1>),
1240 MAP(imm_v256_shl_n_64
<4>),
1241 MAP(imm_v256_shl_n_64
<8>),
1242 MAP(imm_v256_shl_n_64
<12>),
1243 MAP(imm_v256_shl_n_64
<16>),
1244 MAP(imm_v256_shl_n_64
<20>),
1245 MAP(imm_v256_shl_n_64
<24>),
1246 MAP(imm_v256_shl_n_64
<28>),
1247 MAP(imm_v256_shl_n_64
<32>),
1248 MAP(imm_v256_shl_n_64
<36>),
1249 MAP(imm_v256_shl_n_64
<40>),
1250 MAP(imm_v256_shl_n_64
<44>),
1251 MAP(imm_v256_shl_n_64
<48>),
1252 MAP(imm_v256_shl_n_64
<52>),
1253 MAP(imm_v256_shl_n_64
<56>),
1254 MAP(imm_v256_shl_n_64
<60>),
1255 MAP(imm_v256_shr_n_u64
<1>),
1256 MAP(imm_v256_shr_n_u64
<4>),
1257 MAP(imm_v256_shr_n_u64
<8>),
1258 MAP(imm_v256_shr_n_u64
<12>),
1259 MAP(imm_v256_shr_n_u64
<16>),
1260 MAP(imm_v256_shr_n_u64
<20>),
1261 MAP(imm_v256_shr_n_u64
<24>),
1262 MAP(imm_v256_shr_n_u64
<28>),
1263 MAP(imm_v256_shr_n_u64
<32>),
1264 MAP(imm_v256_shr_n_u64
<36>),
1265 MAP(imm_v256_shr_n_u64
<40>),
1266 MAP(imm_v256_shr_n_u64
<44>),
1267 MAP(imm_v256_shr_n_u64
<48>),
1268 MAP(imm_v256_shr_n_u64
<52>),
1269 MAP(imm_v256_shr_n_u64
<56>),
1270 MAP(imm_v256_shr_n_u64
<60>),
1271 MAP(imm_v256_shr_n_s64
<1>),
1272 MAP(imm_v256_shr_n_s64
<4>),
1273 MAP(imm_v256_shr_n_s64
<8>),
1274 MAP(imm_v256_shr_n_s64
<12>),
1275 MAP(imm_v256_shr_n_s64
<16>),
1276 MAP(imm_v256_shr_n_s64
<20>),
1277 MAP(imm_v256_shr_n_s64
<24>),
1278 MAP(imm_v256_shr_n_s64
<28>),
1279 MAP(imm_v256_shr_n_s64
<32>),
1280 MAP(imm_v256_shr_n_s64
<36>),
1281 MAP(imm_v256_shr_n_s64
<40>),
1282 MAP(imm_v256_shr_n_s64
<44>),
1283 MAP(imm_v256_shr_n_s64
<48>),
1284 MAP(imm_v256_shr_n_s64
<52>),
1285 MAP(imm_v256_shr_n_s64
<56>),
1286 MAP(imm_v256_shr_n_s64
<60>),
1287 MAP(v256_movemask_8
),
1297 MAP(v256_ziplo_128
),
1298 MAP(v256_ziphi_128
),
1299 MAP(v256_unpacklo_u8_s16
),
1300 MAP(v256_unpackhi_u8_s16
),
1301 MAP(v256_unpacklo_s8_s16
),
1302 MAP(v256_unpackhi_s8_s16
),
1304 { nullptr, nullptr, nullptr } };
1307 // Map reference functions to machine tuned functions. Since the
1308 // functions depend on machine tuned types, the non-machine tuned
1309 // instantiations of the test can't refer to these functions directly,
1310 // so we refer to them by name and do the mapping here.
1311 void Map(const char *name
, fptr
*ref
, fptr
*simd
) {
1313 for (i
= 0; m
[i
].name
&& strcmp(name
, m
[i
].name
); i
++) {
1320 // Used for printing errors in TestSimd1Arg, TestSimd2Args and TestSimd3Args
1321 std::string
Print(const uint8_t *a
, int size
) {
1322 std::string text
= "0x";
1323 for (int i
= 0; i
< size
; i
++) {
1324 const uint8_t c
= a
[!CONFIG_BIG_ENDIAN
? size
- 1 - i
: i
];
1325 // Same as snprintf(..., ..., "%02x", c)
1326 text
+= (c
>> 4) + '0' + ((c
>> 4) > 9) * ('a' - '0' - 10);
1327 text
+= (c
& 15) + '0' + ((c
& 15) > 9) * ('a' - '0' - 10);
1333 // Used in TestSimd1Arg, TestSimd2Args and TestSimd3Args to restrict argument
1335 void SetMask(uint8_t *s
, int size
, uint32_t mask
, uint32_t maskwidth
) {
1336 switch (maskwidth
) {
1341 for (int i
= 0; i
< size
; i
++) s
[i
] &= mask
;
1345 uint16_t *t
= reinterpret_cast<uint16_t *>(s
);
1346 assert(!(reinterpret_cast<uintptr_t>(s
) & 1));
1347 for (int i
= 0; i
< size
/ 2; i
++) t
[i
] &= mask
;
1351 uint32_t *t
= reinterpret_cast<uint32_t *>(s
);
1352 assert(!(reinterpret_cast<uintptr_t>(s
) & 3));
1353 for (int i
= 0; i
< size
/ 4; i
++) t
[i
] &= mask
;
1357 uint64_t *t
= reinterpret_cast<uint64_t *>(s
);
1358 assert(!(reinterpret_cast<uintptr_t>(s
) & 7));
1359 for (int i
= 0; i
< size
/ 8; i
++) t
[i
] &= mask
;
1363 FAIL() << "Unsupported mask width";
1369 // We need some extra load/store functions
1370 void u64_store_aligned(void *p
, uint64_t a
) {
1371 v64_store_aligned(p
, v64_from_64(a
));
1373 void s32_store_aligned(void *p
, int32_t a
) {
1374 u32_store_aligned(p
, static_cast<uint32_t>(a
));
1376 void s64_store_aligned(void *p
, int64_t a
) {
1377 v64_store_aligned(p
, v64_from_64(static_cast<uint64_t>(a
)));
1380 void c_u64_store_aligned(void *p
, uint64_t a
) {
1381 c_v64_store_aligned(p
, c_v64_from_64(a
));
1384 void c_s32_store_aligned(void *p
, int32_t a
) {
1385 c_u32_store_aligned(p
, static_cast<uint32_t>(a
));
1388 void c_s64_store_aligned(void *p
, int64_t a
) {
1389 c_v64_store_aligned(p
, c_v64_from_64(static_cast<uint64_t>(a
)));
1392 uint64_t u64_load_aligned(const void *p
) {
1393 return v64_u64(v64_load_aligned(p
));
1395 uint16_t u16_load_aligned(const void *p
) {
1396 return *(reinterpret_cast<const uint16_t *>(p
));
1398 uint8_t u8_load_aligned(const void *p
) {
1399 return *(reinterpret_cast<const uint8_t *>(p
));
1402 uint64_t c_u64_load_aligned(const void *p
) {
1403 return c_v64_u64(c_v64_load_aligned(p
));
1405 uint16_t c_u16_load_aligned(const void *p
) {
1406 return *(reinterpret_cast<const uint16_t *>(p
));
1408 uint8_t c_u8_load_aligned(const void *p
) {
1409 return *(reinterpret_cast<const uint8_t *>(p
));
1412 // CompareSimd1Arg, CompareSimd2Args and CompareSimd3Args compare
1413 // intrinsics taking 1, 2 or 3 arguments respectively with their
1414 // corresponding C reference. Ideally, the loads and stores should
1415 // have gone into the template parameter list, but v64 and v128 could
1416 // be typedef'ed to the same type (which is the case on x86) and then
1417 // we can't instantiate both v64 and v128, so the function return and
1418 // argument types, including the always differing types in the C
1419 // equivalent are used instead. The function arguments must be void
1420 // pointers and then go through a cast to avoid matching errors in the
1421 // branches eliminated by the typeid tests in the calling function.
1422 template <typename Ret
, typename Arg
, typename CRet
, typename CArg
>
1423 int CompareSimd1Arg(fptr store
, fptr load
, fptr simd
, void *d
, fptr c_store
,
1424 fptr c_load
, fptr c_simd
, void *ref_d
, const void *a
) {
1425 void (*const my_store
)(void *, Ret
) = (void (*const)(void *, Ret
))store
;
1426 Arg (*const my_load
)(const void *) = (Arg(*const)(const void *))load
;
1427 Ret (*const my_simd
)(Arg
) = (Ret(*const)(Arg
))simd
;
1428 void (*const my_c_store
)(void *, CRet
) = (void (*const)(void *, CRet
))c_store
;
1429 CArg (*const my_c_load
)(const void *) = (CArg(*const)(const void *))c_load
;
1430 CRet (*const my_c_simd
)(CArg
) = (CRet(*const)(CArg
))c_simd
;
1432 // Call reference and intrinsic
1433 my_c_store(ref_d
, my_c_simd(my_c_load(a
)));
1434 my_store(d
, my_simd(my_load(a
)));
1437 return memcmp(ref_d
, d
, sizeof(CRet
));
1440 template <typename Ret
, typename Arg1
, typename Arg2
, typename CRet
,
1441 typename CArg1
, typename CArg2
>
1442 int CompareSimd2Args(fptr store
, fptr load1
, fptr load2
, fptr simd
, void *d
,
1443 fptr c_store
, fptr c_load1
, fptr c_load2
, fptr c_simd
,
1444 void *ref_d
, const void *a
, const void *b
) {
1445 void (*const my_store
)(void *, Ret
) = (void (*const)(void *, Ret
))store
;
1446 Arg1 (*const my_load1
)(const void *) = (Arg1(*const)(const void *))load1
;
1447 Arg2 (*const my_load2
)(const void *) = (Arg2(*const)(const void *))load2
;
1448 Ret (*const my_simd
)(Arg1
, Arg2
) = (Ret(*const)(Arg1
, Arg2
))simd
;
1449 void (*const my_c_store
)(void *, CRet
) = (void (*const)(void *, CRet
))c_store
;
1450 CArg1 (*const my_c_load1
)(const void *) =
1451 (CArg1(*const)(const void *))c_load1
;
1452 CArg2 (*const my_c_load2
)(const void *) =
1453 (CArg2(*const)(const void *))c_load2
;
1454 CRet (*const my_c_simd
)(CArg1
, CArg2
) = (CRet(*const)(CArg1
, CArg2
))c_simd
;
1456 // Call reference and intrinsic
1457 my_c_store(ref_d
, my_c_simd(my_c_load1(a
), my_c_load2(b
)));
1458 my_store(d
, my_simd(my_load1(a
), my_load2(b
)));
1461 return memcmp(ref_d
, d
, sizeof(CRet
));
1464 template <typename Ret
, typename Arg1
, typename Arg2
, typename Arg3
,
1465 typename CRet
, typename CArg1
, typename CArg2
, typename CArg3
>
1466 int CompareSimd3Args(fptr store
, fptr load1
, fptr load2
, fptr load3
, fptr simd
,
1467 void *d
, fptr c_store
, fptr c_load1
, fptr c_load2
,
1468 fptr c_load3
, fptr c_simd
, void *ref_d
, const void *a
,
1469 const void *b
, const void *c
) {
1470 void (*const my_store
)(void *, Ret
) = (void (*const)(void *, Ret
))store
;
1471 Arg1 (*const my_load1
)(const void *) = (Arg1(*const)(const void *))load1
;
1472 Arg2 (*const my_load2
)(const void *) = (Arg2(*const)(const void *))load2
;
1473 Arg3 (*const my_load3
)(const void *) = (Arg3(*const)(const void *))load3
;
1474 Ret (*const my_simd
)(Arg1
, Arg2
, Arg3
) = (Ret(*const)(Arg1
, Arg2
, Arg3
))simd
;
1475 void (*const my_c_store
)(void *, CRet
) = (void (*const)(void *, CRet
))c_store
;
1476 CArg1 (*const my_c_load1
)(const void *) =
1477 (CArg1(*const)(const void *))c_load1
;
1478 CArg2 (*const my_c_load2
)(const void *) =
1479 (CArg2(*const)(const void *))c_load2
;
1480 CArg3 (*const my_c_load3
)(const void *) =
1481 (CArg3(*const)(const void *))c_load3
;
1482 CRet (*const my_c_simd
)(CArg1
, CArg2
, CArg3
) =
1483 (CRet(*const)(CArg1
, CArg2
, CArg3
))c_simd
;
1485 // Call reference and intrinsic
1486 my_c_store(ref_d
, my_c_simd(my_c_load1(a
), my_c_load2(b
), my_c_load3(c
)));
1487 my_store(d
, my_simd(my_load1(a
), my_load2(b
), my_load3(c
)));
1490 return memcmp(ref_d
, d
, sizeof(CRet
));
1495 template <typename CRet
, typename CArg
>
1496 void TestSimd1Arg(uint32_t iterations
, uint32_t mask
, uint32_t maskwidth
,
1498 ACMRandom
rnd(ACMRandom::DeterministicSeed());
1502 DECLARE_ALIGNED(32, uint8_t, s
[32]);
1503 DECLARE_ALIGNED(32, uint8_t, d
[32]);
1504 DECLARE_ALIGNED(32, uint8_t, ref_d
[32]);
1505 assert(sizeof(CArg
) <= 32 && sizeof(CRet
) <= 32);
1506 memset(ref_d
, 0, sizeof(ref_d
));
1507 memset(d
, 0, sizeof(d
));
1509 Map(name
, &ref_simd
, &simd
);
1510 if (simd
== nullptr || ref_simd
== nullptr) {
1511 FAIL() << "Internal error: Unknown intrinsic function " << name
;
1513 for (unsigned int count
= 0;
1514 count
< iterations
&& !error
&& !testing::Test::HasFailure(); count
++) {
1515 for (unsigned int c
= 0; c
< sizeof(CArg
); c
++) s
[c
] = rnd
.Rand8();
1518 SetMask(s
, sizeof(CArg
), mask
, maskwidth
);
1521 if (typeid(CRet
) == typeid(c_v64
) && typeid(CArg
) == typeid(c_v64
)) {
1523 error
= CompareSimd1Arg
<v64
, v64
, c_v64
, c_v64
>(
1524 reinterpret_cast<fptr
>(v64_store_aligned
),
1525 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1526 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1527 reinterpret_cast<fptr
>(c_v64_load_aligned
), ref_simd
, ref_d
, s
);
1528 } else if (typeid(CRet
) == typeid(c_v64
) &&
1529 typeid(CArg
) == typeid(uint8_t)) {
1531 error
= CompareSimd1Arg
<v64
, uint8_t, c_v64
, uint8_t>(
1532 reinterpret_cast<fptr
>(v64_store_aligned
),
1533 reinterpret_cast<fptr
>(u8_load_aligned
), simd
, d
,
1534 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1535 reinterpret_cast<fptr
>(c_u8_load_aligned
), ref_simd
, ref_d
, s
);
1536 } else if (typeid(CRet
) == typeid(c_v64
) &&
1537 typeid(CArg
) == typeid(uint16_t)) {
1539 error
= CompareSimd1Arg
<v64
, uint16_t, c_v64
, uint16_t>(
1540 reinterpret_cast<fptr
>(v64_store_aligned
),
1541 reinterpret_cast<fptr
>(u16_load_aligned
), simd
, d
,
1542 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1543 reinterpret_cast<fptr
>(c_u16_load_aligned
), ref_simd
, ref_d
, s
);
1544 } else if (typeid(CRet
) == typeid(c_v64
) &&
1545 typeid(CArg
) == typeid(uint32_t)) {
1547 error
= CompareSimd1Arg
<v64
, uint32_t, c_v64
, uint32_t>(
1548 reinterpret_cast<fptr
>(v64_store_aligned
),
1549 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1550 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1551 reinterpret_cast<fptr
>(c_u32_load_aligned
), ref_simd
, ref_d
, s
);
1552 } else if (typeid(CRet
) == typeid(uint64_t) &&
1553 typeid(CArg
) == typeid(c_v64
)) {
1555 error
= CompareSimd1Arg
<uint64_t, v64
, uint64_t, c_v64
>(
1556 reinterpret_cast<fptr
>(u64_store_aligned
),
1557 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1558 reinterpret_cast<fptr
>(c_u64_store_aligned
),
1559 reinterpret_cast<fptr
>(c_v64_load_aligned
), ref_simd
, ref_d
, s
);
1560 } else if (typeid(CRet
) == typeid(int64_t) &&
1561 typeid(CArg
) == typeid(c_v64
)) {
1563 error
= CompareSimd1Arg
<int64_t, v64
, int64_t, c_v64
>(
1564 reinterpret_cast<fptr
>(s64_store_aligned
),
1565 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1566 reinterpret_cast<fptr
>(c_s64_store_aligned
),
1567 reinterpret_cast<fptr
>(c_v64_load_aligned
), ref_simd
, ref_d
, s
);
1568 } else if (typeid(CRet
) == typeid(uint32_t) &&
1569 typeid(CArg
) == typeid(c_v64
)) {
1571 error
= CompareSimd1Arg
<uint32_t, v64
, uint32_t, c_v64
>(
1572 reinterpret_cast<fptr
>(u32_store_aligned
),
1573 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1574 reinterpret_cast<fptr
>(c_u32_store_aligned
),
1575 reinterpret_cast<fptr
>(c_v64_load_aligned
), ref_simd
, ref_d
, s
);
1576 } else if (typeid(CRet
) == typeid(int32_t) &&
1577 typeid(CArg
) == typeid(c_v64
)) {
1579 error
= CompareSimd1Arg
<int32_t, v64
, int32_t, c_v64
>(
1580 reinterpret_cast<fptr
>(s32_store_aligned
),
1581 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1582 reinterpret_cast<fptr
>(c_s32_store_aligned
),
1583 reinterpret_cast<fptr
>(c_v64_load_aligned
), ref_simd
, ref_d
, s
);
1584 } else if (typeid(CRet
) == typeid(uint32_t) &&
1585 typeid(CArg
) == typeid(c_v128
)) {
1587 error
= CompareSimd1Arg
<uint32_t, v128
, uint32_t, c_v128
>(
1588 reinterpret_cast<fptr
>(u32_store_aligned
),
1589 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1590 reinterpret_cast<fptr
>(c_u32_store_aligned
),
1591 reinterpret_cast<fptr
>(c_v128_load_aligned
), ref_simd
, ref_d
, s
);
1592 } else if (typeid(CRet
) == typeid(uint64_t) &&
1593 typeid(CArg
) == typeid(c_v128
)) {
1595 error
= CompareSimd1Arg
<uint64_t, v128
, uint64_t, c_v128
>(
1596 reinterpret_cast<fptr
>(u64_store_aligned
),
1597 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1598 reinterpret_cast<fptr
>(c_u64_store_aligned
),
1599 reinterpret_cast<fptr
>(c_v128_load_aligned
), ref_simd
, ref_d
, s
);
1600 } else if (typeid(CRet
) == typeid(uint64_t) &&
1601 typeid(CArg
) == typeid(c_v256
)) {
1603 error
= CompareSimd1Arg
<uint64_t, v256
, uint64_t, c_v256
>(
1604 reinterpret_cast<fptr
>(u64_store_aligned
),
1605 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1606 reinterpret_cast<fptr
>(c_u64_store_aligned
),
1607 reinterpret_cast<fptr
>(c_v256_load_aligned
), ref_simd
, ref_d
, s
);
1608 } else if (typeid(CRet
) == typeid(c_v64
) &&
1609 typeid(CArg
) == typeid(c_v128
)) {
1611 error
= CompareSimd1Arg
<v64
, v128
, c_v64
, c_v128
>(
1612 reinterpret_cast<fptr
>(v64_store_aligned
),
1613 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1614 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1615 reinterpret_cast<fptr
>(c_v128_load_aligned
), ref_simd
, ref_d
, s
);
1616 } else if (typeid(CRet
) == typeid(c_v128
) &&
1617 typeid(CArg
) == typeid(c_v128
)) {
1619 error
= CompareSimd1Arg
<v128
, v128
, c_v128
, c_v128
>(
1620 reinterpret_cast<fptr
>(v128_store_aligned
),
1621 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1622 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1623 reinterpret_cast<fptr
>(c_v128_load_aligned
), ref_simd
, ref_d
, s
);
1624 } else if (typeid(CRet
) == typeid(c_v128
) &&
1625 typeid(CArg
) == typeid(c_v64
)) {
1627 error
= CompareSimd1Arg
<v128
, v64
, c_v128
, c_v64
>(
1628 reinterpret_cast<fptr
>(v128_store_aligned
),
1629 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1630 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1631 reinterpret_cast<fptr
>(c_v64_load_aligned
), ref_simd
, ref_d
, s
);
1632 } else if (typeid(CRet
) == typeid(c_v128
) &&
1633 typeid(CArg
) == typeid(uint8_t)) {
1635 error
= CompareSimd1Arg
<v128
, uint8_t, c_v128
, uint8_t>(
1636 reinterpret_cast<fptr
>(v128_store_aligned
),
1637 reinterpret_cast<fptr
>(u8_load_aligned
), simd
, d
,
1638 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1639 reinterpret_cast<fptr
>(c_u8_load_aligned
), ref_simd
, ref_d
, s
);
1640 } else if (typeid(CRet
) == typeid(c_v128
) &&
1641 typeid(CArg
) == typeid(uint16_t)) {
1643 error
= CompareSimd1Arg
<v128
, uint16_t, c_v128
, uint16_t>(
1644 reinterpret_cast<fptr
>(v128_store_aligned
),
1645 reinterpret_cast<fptr
>(u16_load_aligned
), simd
, d
,
1646 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1647 reinterpret_cast<fptr
>(c_u16_load_aligned
), ref_simd
, ref_d
, s
);
1648 } else if (typeid(CRet
) == typeid(c_v128
) &&
1649 typeid(CArg
) == typeid(uint32_t)) {
1651 error
= CompareSimd1Arg
<v128
, uint32_t, c_v128
, uint32_t>(
1652 reinterpret_cast<fptr
>(v128_store_aligned
),
1653 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1654 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1655 reinterpret_cast<fptr
>(c_u32_load_aligned
), ref_simd
, ref_d
, s
);
1656 } else if (typeid(CRet
) == typeid(c_v128
) &&
1657 typeid(CArg
) == typeid(uint64_t)) {
1659 error
= CompareSimd1Arg
<v128
, uint64_t, c_v128
, uint64_t>(
1660 reinterpret_cast<fptr
>(v128_store_aligned
),
1661 reinterpret_cast<fptr
>(u64_load_aligned
), simd
, d
,
1662 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1663 reinterpret_cast<fptr
>(c_u64_load_aligned
), ref_simd
, ref_d
, s
);
1664 } else if (typeid(CRet
) == typeid(c_v256
) &&
1665 typeid(CArg
) == typeid(c_v256
)) {
1667 error
= CompareSimd1Arg
<v256
, v256
, c_v256
, c_v256
>(
1668 reinterpret_cast<fptr
>(v256_store_aligned
),
1669 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1670 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1671 reinterpret_cast<fptr
>(c_v256_load_aligned
), ref_simd
, ref_d
, s
);
1672 } else if (typeid(CRet
) == typeid(c_v256
) &&
1673 typeid(CArg
) == typeid(c_v128
)) {
1675 error
= CompareSimd1Arg
<v256
, v128
, c_v256
, c_v128
>(
1676 reinterpret_cast<fptr
>(v256_store_aligned
),
1677 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1678 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1679 reinterpret_cast<fptr
>(c_v128_load_aligned
), ref_simd
, ref_d
, s
);
1680 } else if (typeid(CRet
) == typeid(c_v256
) &&
1681 typeid(CArg
) == typeid(uint8_t)) {
1683 error
= CompareSimd1Arg
<v256
, uint8_t, c_v256
, uint8_t>(
1684 reinterpret_cast<fptr
>(v256_store_aligned
),
1685 reinterpret_cast<fptr
>(u8_load_aligned
), simd
, d
,
1686 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1687 reinterpret_cast<fptr
>(c_u8_load_aligned
), ref_simd
, ref_d
, s
);
1688 } else if (typeid(CRet
) == typeid(c_v256
) &&
1689 typeid(CArg
) == typeid(uint16_t)) {
1691 error
= CompareSimd1Arg
<v256
, uint16_t, c_v256
, uint16_t>(
1692 reinterpret_cast<fptr
>(v256_store_aligned
),
1693 reinterpret_cast<fptr
>(u16_load_aligned
), simd
, d
,
1694 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1695 reinterpret_cast<fptr
>(c_u16_load_aligned
), ref_simd
, ref_d
, s
);
1696 } else if (typeid(CRet
) == typeid(c_v256
) &&
1697 typeid(CArg
) == typeid(uint32_t)) {
1699 error
= CompareSimd1Arg
<v256
, uint32_t, c_v256
, uint32_t>(
1700 reinterpret_cast<fptr
>(v256_store_aligned
),
1701 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1702 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1703 reinterpret_cast<fptr
>(c_u32_load_aligned
), ref_simd
, ref_d
, s
);
1704 } else if (typeid(CRet
) == typeid(c_v256
) &&
1705 typeid(CArg
) == typeid(uint64_t)) {
1707 error
= CompareSimd1Arg
<v256
, uint64_t, c_v256
, uint64_t>(
1708 reinterpret_cast<fptr
>(v256_store_aligned
),
1709 reinterpret_cast<fptr
>(u64_load_aligned
), simd
, d
,
1710 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1711 reinterpret_cast<fptr
>(c_u64_load_aligned
), ref_simd
, ref_d
, s
);
1712 } else if (typeid(CRet
) == typeid(uint32_t) &&
1713 typeid(CArg
) == typeid(c_v256
)) {
1715 error
= CompareSimd1Arg
<uint32_t, v256
, uint32_t, c_v256
>(
1716 reinterpret_cast<fptr
>(u32_store_aligned
),
1717 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1718 reinterpret_cast<fptr
>(c_u32_store_aligned
),
1719 reinterpret_cast<fptr
>(c_v256_load_aligned
), ref_simd
, ref_d
, s
);
1720 } else if (typeid(CRet
) == typeid(c_v64
) &&
1721 typeid(CArg
) == typeid(c_v256
)) {
1723 error
= CompareSimd1Arg
<v64
, v256
, c_v64
, c_v256
>(
1724 reinterpret_cast<fptr
>(v64_store_aligned
),
1725 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1726 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1727 reinterpret_cast<fptr
>(c_v256_load_aligned
), ref_simd
, ref_d
, s
);
1729 FAIL() << "Internal error: Unknown intrinsic function "
1730 << typeid(CRet
).name() << " " << name
<< "(" << typeid(CArg
).name()
1735 EXPECT_EQ(0, error
) << "Error: mismatch for " << name
<< "("
1736 << Print(s
, sizeof(CArg
)) << ") -> "
1737 << Print(d
, sizeof(CRet
)) << " (simd), "
1738 << Print(ref_d
, sizeof(CRet
)) << " (ref)";
1741 template <typename CRet
, typename CArg1
, typename CArg2
>
1742 void TestSimd2Args(uint32_t iterations
, uint32_t mask
, uint32_t maskwidth
,
1744 ACMRandom
rnd(ACMRandom::DeterministicSeed());
1748 DECLARE_ALIGNED(32, uint8_t, s1
[32]);
1749 DECLARE_ALIGNED(32, uint8_t, s2
[32]);
1750 DECLARE_ALIGNED(32, uint8_t, d
[32]);
1751 DECLARE_ALIGNED(32, uint8_t, ref_d
[32]);
1752 assert(sizeof(CArg1
) <= 32 && sizeof(CArg2
) <= 32 && sizeof(CRet
) <= 32);
1753 memset(ref_d
, 0, sizeof(ref_d
));
1754 memset(d
, 0, sizeof(d
));
1756 Map(name
, &ref_simd
, &simd
);
1757 if (simd
== nullptr || ref_simd
== nullptr) {
1758 FAIL() << "Internal error: Unknown intrinsic function " << name
;
1761 for (unsigned int count
= 0;
1762 count
< iterations
&& !error
&& !testing::Test::HasFailure(); count
++) {
1763 for (unsigned int c
= 0; c
< sizeof(CArg1
); c
++) s1
[c
] = rnd
.Rand8();
1765 for (unsigned int c
= 0; c
< sizeof(CArg2
); c
++) s2
[c
] = rnd
.Rand8();
1767 if (maskwidth
) SetMask(s2
, sizeof(CArg2
), mask
, maskwidth
);
1769 if (typeid(CRet
) == typeid(c_v64
) && typeid(CArg1
) == typeid(c_v64
) &&
1770 typeid(CArg2
) == typeid(c_v64
)) {
1772 error
= CompareSimd2Args
<v64
, v64
, v64
, c_v64
, c_v64
, c_v64
>(
1773 reinterpret_cast<fptr
>(v64_store_aligned
),
1774 reinterpret_cast<fptr
>(v64_load_aligned
),
1775 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1776 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1777 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1778 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1779 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1780 } else if (typeid(CRet
) == typeid(c_v64
) &&
1781 typeid(CArg1
) == typeid(uint32_t) &&
1782 typeid(CArg2
) == typeid(uint32_t)) {
1785 CompareSimd2Args
<v64
, uint32_t, uint32_t, c_v64
, uint32_t, uint32_t>(
1786 reinterpret_cast<fptr
>(v64_store_aligned
),
1787 reinterpret_cast<fptr
>(u32_load_aligned
),
1788 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1789 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1790 reinterpret_cast<fptr
>(c_u32_load_aligned
),
1791 reinterpret_cast<fptr
>(c_u32_load_aligned
),
1792 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1793 } else if (typeid(CRet
) == typeid(uint32_t) &&
1794 typeid(CArg1
) == typeid(c_v64
) &&
1795 typeid(CArg2
) == typeid(c_v64
)) {
1797 error
= CompareSimd2Args
<uint32_t, v64
, v64
, uint32_t, c_v64
, c_v64
>(
1798 reinterpret_cast<fptr
>(u32_store_aligned
),
1799 reinterpret_cast<fptr
>(v64_load_aligned
),
1800 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1801 reinterpret_cast<fptr
>(c_u32_store_aligned
),
1802 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1803 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1804 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1805 } else if (typeid(CRet
) == typeid(int64_t) &&
1806 typeid(CArg1
) == typeid(c_v64
) &&
1807 typeid(CArg2
) == typeid(c_v64
)) {
1809 error
= CompareSimd2Args
<int64_t, v64
, v64
, int64_t, c_v64
, c_v64
>(
1810 reinterpret_cast<fptr
>(s64_store_aligned
),
1811 reinterpret_cast<fptr
>(v64_load_aligned
),
1812 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1813 reinterpret_cast<fptr
>(c_s64_store_aligned
),
1814 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1815 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1816 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1817 } else if (typeid(CRet
) == typeid(c_v64
) &&
1818 typeid(CArg1
) == typeid(c_v64
) &&
1819 typeid(CArg2
) == typeid(uint32_t)) {
1821 error
= CompareSimd2Args
<v64
, v64
, uint32_t, c_v64
, c_v64
, uint32_t>(
1822 reinterpret_cast<fptr
>(v64_store_aligned
),
1823 reinterpret_cast<fptr
>(v64_load_aligned
),
1824 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1825 reinterpret_cast<fptr
>(c_v64_store_aligned
),
1826 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1827 reinterpret_cast<fptr
>(c_u32_load_aligned
),
1828 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1829 } else if (typeid(CRet
) == typeid(c_v128
) &&
1830 typeid(CArg1
) == typeid(c_v128
) &&
1831 typeid(CArg2
) == typeid(c_v128
)) {
1833 error
= CompareSimd2Args
<v128
, v128
, v128
, c_v128
, c_v128
, c_v128
>(
1834 reinterpret_cast<fptr
>(v128_store_aligned
),
1835 reinterpret_cast<fptr
>(v128_load_aligned
),
1836 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1837 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1838 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1839 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1840 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1841 } else if (typeid(CRet
) == typeid(uint32_t) &&
1842 typeid(CArg1
) == typeid(c_v128
) &&
1843 typeid(CArg2
) == typeid(c_v128
)) {
1845 error
= CompareSimd2Args
<uint32_t, v128
, v128
, uint32_t, c_v128
, c_v128
>(
1846 reinterpret_cast<fptr
>(u32_store_aligned
),
1847 reinterpret_cast<fptr
>(v128_load_aligned
),
1848 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1849 reinterpret_cast<fptr
>(c_u32_store_aligned
),
1850 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1851 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1852 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1853 } else if (typeid(CRet
) == typeid(uint64_t) &&
1854 typeid(CArg1
) == typeid(c_v128
) &&
1855 typeid(CArg2
) == typeid(c_v128
)) {
1857 error
= CompareSimd2Args
<uint64_t, v128
, v128
, uint64_t, c_v128
, c_v128
>(
1858 reinterpret_cast<fptr
>(u64_store_aligned
),
1859 reinterpret_cast<fptr
>(v128_load_aligned
),
1860 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1861 reinterpret_cast<fptr
>(c_u64_store_aligned
),
1862 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1863 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1864 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1865 } else if (typeid(CRet
) == typeid(int64_t) &&
1866 typeid(CArg1
) == typeid(c_v128
) &&
1867 typeid(CArg2
) == typeid(c_v128
)) {
1869 error
= CompareSimd2Args
<int64_t, v128
, v128
, int64_t, c_v128
, c_v128
>(
1870 reinterpret_cast<fptr
>(s64_store_aligned
),
1871 reinterpret_cast<fptr
>(v128_load_aligned
),
1872 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1873 reinterpret_cast<fptr
>(c_s64_store_aligned
),
1874 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1875 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1876 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1877 } else if (typeid(CRet
) == typeid(c_v128
) &&
1878 typeid(CArg1
) == typeid(uint64_t) &&
1879 typeid(CArg2
) == typeid(uint64_t)) {
1881 error
= CompareSimd2Args
<v128
, uint64_t, uint64_t, c_v128
, uint64_t,
1883 reinterpret_cast<fptr
>(v128_store_aligned
),
1884 reinterpret_cast<fptr
>(u64_load_aligned
),
1885 reinterpret_cast<fptr
>(u64_load_aligned
), simd
, d
,
1886 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1887 reinterpret_cast<fptr
>(c_u64_load_aligned
),
1888 reinterpret_cast<fptr
>(c_u64_load_aligned
),
1889 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1890 } else if (typeid(CRet
) == typeid(c_v128
) &&
1891 typeid(CArg1
) == typeid(c_v64
) &&
1892 typeid(CArg2
) == typeid(c_v64
)) {
1894 error
= CompareSimd2Args
<v128
, v64
, v64
, c_v128
, c_v64
, c_v64
>(
1895 reinterpret_cast<fptr
>(v128_store_aligned
),
1896 reinterpret_cast<fptr
>(v64_load_aligned
),
1897 reinterpret_cast<fptr
>(v64_load_aligned
), simd
, d
,
1898 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1899 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1900 reinterpret_cast<fptr
>(c_v64_load_aligned
),
1901 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1902 } else if (typeid(CRet
) == typeid(c_v128
) &&
1903 typeid(CArg1
) == typeid(c_v128
) &&
1904 typeid(CArg2
) == typeid(uint32_t)) {
1906 error
= CompareSimd2Args
<v128
, v128
, uint32_t, c_v128
, c_v128
, uint32_t>(
1907 reinterpret_cast<fptr
>(v128_store_aligned
),
1908 reinterpret_cast<fptr
>(v128_load_aligned
),
1909 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1910 reinterpret_cast<fptr
>(c_v128_store_aligned
),
1911 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1912 reinterpret_cast<fptr
>(c_u32_load_aligned
),
1913 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1914 } else if (typeid(CRet
) == typeid(c_v256
) &&
1915 typeid(CArg1
) == typeid(c_v256
) &&
1916 typeid(CArg2
) == typeid(c_v256
)) {
1918 error
= CompareSimd2Args
<v256
, v256
, v256
, c_v256
, c_v256
, c_v256
>(
1919 reinterpret_cast<fptr
>(v256_store_aligned
),
1920 reinterpret_cast<fptr
>(v256_load_aligned
),
1921 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1922 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1923 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1924 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1925 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1926 } else if (typeid(CRet
) == typeid(uint64_t) &&
1927 typeid(CArg1
) == typeid(c_v256
) &&
1928 typeid(CArg2
) == typeid(c_v256
)) {
1930 error
= CompareSimd2Args
<uint64_t, v256
, v256
, uint64_t, c_v256
, c_v256
>(
1931 reinterpret_cast<fptr
>(u64_store_aligned
),
1932 reinterpret_cast<fptr
>(v256_load_aligned
),
1933 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1934 reinterpret_cast<fptr
>(c_u64_store_aligned
),
1935 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1936 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1937 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1938 } else if (typeid(CRet
) == typeid(int64_t) &&
1939 typeid(CArg1
) == typeid(c_v256
) &&
1940 typeid(CArg2
) == typeid(c_v256
)) {
1942 error
= CompareSimd2Args
<int64_t, v256
, v256
, int64_t, c_v256
, c_v256
>(
1943 reinterpret_cast<fptr
>(s64_store_aligned
),
1944 reinterpret_cast<fptr
>(v256_load_aligned
),
1945 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1946 reinterpret_cast<fptr
>(c_s64_store_aligned
),
1947 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1948 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1949 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1950 } else if (typeid(CRet
) == typeid(uint32_t) &&
1951 typeid(CArg1
) == typeid(c_v256
) &&
1952 typeid(CArg2
) == typeid(c_v256
)) {
1954 error
= CompareSimd2Args
<uint32_t, v256
, v256
, uint32_t, c_v256
, c_v256
>(
1955 reinterpret_cast<fptr
>(u32_store_aligned
),
1956 reinterpret_cast<fptr
>(v256_load_aligned
),
1957 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
1958 reinterpret_cast<fptr
>(c_u32_store_aligned
),
1959 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1960 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1961 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1962 } else if (typeid(CRet
) == typeid(c_v256
) &&
1963 typeid(CArg1
) == typeid(c_v128
) &&
1964 typeid(CArg2
) == typeid(c_v128
)) {
1966 error
= CompareSimd2Args
<v256
, v128
, v128
, c_v256
, c_v128
, c_v128
>(
1967 reinterpret_cast<fptr
>(v256_store_aligned
),
1968 reinterpret_cast<fptr
>(v128_load_aligned
),
1969 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
1970 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1971 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1972 reinterpret_cast<fptr
>(c_v128_load_aligned
),
1973 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1974 } else if (typeid(CRet
) == typeid(c_v256
) &&
1975 typeid(CArg1
) == typeid(c_v256
) &&
1976 typeid(CArg2
) == typeid(uint32_t)) {
1978 error
= CompareSimd2Args
<v256
, v256
, uint32_t, c_v256
, c_v256
, uint32_t>(
1979 reinterpret_cast<fptr
>(v256_store_aligned
),
1980 reinterpret_cast<fptr
>(v256_load_aligned
),
1981 reinterpret_cast<fptr
>(u32_load_aligned
), simd
, d
,
1982 reinterpret_cast<fptr
>(c_v256_store_aligned
),
1983 reinterpret_cast<fptr
>(c_v256_load_aligned
),
1984 reinterpret_cast<fptr
>(c_u32_load_aligned
),
1985 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
);
1988 FAIL() << "Internal error: Unknown intrinsic function "
1989 << typeid(CRet
).name() << " " << name
<< "("
1990 << typeid(CArg1
).name() << ", " << typeid(CArg2
).name() << ")";
1994 EXPECT_EQ(0, error
) << "Error: mismatch for " << name
<< "("
1995 << Print(s1
, sizeof(CArg1
)) << ", "
1996 << Print(s2
, sizeof(CArg2
)) << ") -> "
1997 << Print(d
, sizeof(CRet
)) << " (simd), "
1998 << Print(ref_d
, sizeof(CRet
)) << " (ref)";
2001 template <typename CRet
, typename CArg1
, typename CArg2
, typename CArg3
>
2002 void TestSimd3Args(uint32_t iterations
, uint32_t mask
, uint32_t maskwidth
,
2004 ACMRandom
rnd(ACMRandom::DeterministicSeed());
2008 DECLARE_ALIGNED(32, uint8_t, s1
[32]);
2009 DECLARE_ALIGNED(32, uint8_t, s2
[32]);
2010 DECLARE_ALIGNED(32, uint8_t, s3
[32]);
2011 DECLARE_ALIGNED(32, uint8_t, d
[32]);
2012 DECLARE_ALIGNED(32, uint8_t, ref_d
[32]);
2013 assert(sizeof(CArg1
) <= 32 && sizeof(CArg2
) <= 32 && sizeof(CArg3
) <= 32 &&
2014 sizeof(CRet
) <= 32);
2015 memset(ref_d
, 0, sizeof(ref_d
));
2016 memset(d
, 0, sizeof(d
));
2018 Map(name
, &ref_simd
, &simd
);
2019 if (simd
== nullptr || ref_simd
== nullptr) {
2020 FAIL() << "Internal error: Unknown intrinsic function " << name
;
2023 for (unsigned int count
= 0;
2024 count
< iterations
&& !error
&& !testing::Test::HasFailure(); count
++) {
2025 for (unsigned int c
= 0; c
< sizeof(CArg1
); c
++) s1
[c
] = rnd
.Rand8();
2027 for (unsigned int c
= 0; c
< sizeof(CArg2
); c
++) s2
[c
] = rnd
.Rand8();
2029 for (unsigned int c
= 0; c
< sizeof(CArg3
); c
++) s3
[c
] = rnd
.Rand8();
2031 if (maskwidth
) SetMask(s3
, sizeof(CArg3
), mask
, maskwidth
);
2033 if (typeid(CRet
) == typeid(c_v128
) && typeid(CArg1
) == typeid(c_v128
) &&
2034 typeid(CArg2
) == typeid(c_v128
) && typeid(CArg3
) == typeid(c_v128
)) {
2035 // V128_V128V128V128
2036 error
= CompareSimd3Args
<v128
, v128
, v128
, v128
, c_v128
, c_v128
, c_v128
,
2038 reinterpret_cast<fptr
>(v128_store_aligned
),
2039 reinterpret_cast<fptr
>(v128_load_aligned
),
2040 reinterpret_cast<fptr
>(v128_load_aligned
),
2041 reinterpret_cast<fptr
>(v128_load_aligned
), simd
, d
,
2042 reinterpret_cast<fptr
>(c_v128_store_aligned
),
2043 reinterpret_cast<fptr
>(c_v128_load_aligned
),
2044 reinterpret_cast<fptr
>(c_v128_load_aligned
),
2045 reinterpret_cast<fptr
>(c_v128_load_aligned
),
2046 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
, s3
);
2047 } else if (typeid(CRet
) == typeid(c_v256
) &&
2048 typeid(CArg1
) == typeid(c_v256
) &&
2049 typeid(CArg2
) == typeid(c_v256
) &&
2050 typeid(CArg3
) == typeid(c_v256
)) {
2051 // V256_V256V256V256
2052 error
= CompareSimd3Args
<v256
, v256
, v256
, v256
, c_v256
, c_v256
, c_v256
,
2054 reinterpret_cast<fptr
>(v256_store_aligned
),
2055 reinterpret_cast<fptr
>(v256_load_aligned
),
2056 reinterpret_cast<fptr
>(v256_load_aligned
),
2057 reinterpret_cast<fptr
>(v256_load_aligned
), simd
, d
,
2058 reinterpret_cast<fptr
>(c_v256_store_aligned
),
2059 reinterpret_cast<fptr
>(c_v256_load_aligned
),
2060 reinterpret_cast<fptr
>(c_v256_load_aligned
),
2061 reinterpret_cast<fptr
>(c_v256_load_aligned
),
2062 reinterpret_cast<fptr
>(ref_simd
), ref_d
, s1
, s2
, s3
);
2064 FAIL() << "Internal error: Unknown intrinsic function "
2065 << typeid(CRet
).name() << " " << name
<< "("
2066 << typeid(CArg1
).name() << ", " << typeid(CArg2
).name() << ", "
2067 << typeid(CArg3
).name() << ")";
2071 EXPECT_EQ(0, error
) << "Error: mismatch for " << name
<< "("
2072 << Print(s1
, sizeof(CArg1
)) << ", "
2073 << Print(s2
, sizeof(CArg2
)) << ", "
2074 << Print(s3
, sizeof(CArg3
)) << ") -> "
2075 << Print(d
, sizeof(CRet
)) << " (simd), "
2076 << Print(ref_d
, sizeof(CRet
)) << " (ref)";
2079 // Instantiations to make the functions callable from another files
2080 template void TestSimd1Arg
<c_v64
, uint8_t>(uint32_t, uint32_t, uint32_t,
2082 template void TestSimd1Arg
<c_v64
, uint16_t>(uint32_t, uint32_t, uint32_t,
2084 template void TestSimd1Arg
<c_v64
, uint32_t>(uint32_t, uint32_t, uint32_t,
2086 template void TestSimd1Arg
<c_v64
, c_v64
>(uint32_t, uint32_t, uint32_t,
2088 template void TestSimd1Arg
<uint32_t, c_v64
>(uint32_t, uint32_t, uint32_t,
2090 template void TestSimd1Arg
<int32_t, c_v64
>(uint32_t, uint32_t, uint32_t,
2092 template void TestSimd1Arg
<uint64_t, c_v64
>(uint32_t, uint32_t, uint32_t,
2094 template void TestSimd1Arg
<int64_t, c_v64
>(uint32_t, uint32_t, uint32_t,
2096 template void TestSimd2Args
<c_v64
, uint32_t, uint32_t>(uint32_t, uint32_t,
2097 uint32_t, const char *);
2098 template void TestSimd2Args
<c_v64
, c_v64
, c_v64
>(uint32_t, uint32_t, uint32_t,
2100 template void TestSimd2Args
<c_v64
, c_v64
, uint32_t>(uint32_t, uint32_t,
2101 uint32_t, const char *);
2102 template void TestSimd2Args
<int64_t, c_v64
, c_v64
>(uint32_t, uint32_t, uint32_t,
2104 template void TestSimd2Args
<uint32_t, c_v64
, c_v64
>(uint32_t, uint32_t,
2105 uint32_t, const char *);
2106 template void TestSimd1Arg
<c_v128
, c_v128
>(uint32_t, uint32_t, uint32_t,
2108 template void TestSimd1Arg
<c_v128
, uint8_t>(uint32_t, uint32_t, uint32_t,
2110 template void TestSimd1Arg
<c_v128
, uint16_t>(uint32_t, uint32_t, uint32_t,
2112 template void TestSimd1Arg
<c_v128
, uint32_t>(uint32_t, uint32_t, uint32_t,
2114 template void TestSimd1Arg
<c_v128
, uint64_t>(uint32_t, uint32_t, uint32_t,
2116 template void TestSimd1Arg
<c_v128
, c_v64
>(uint32_t, uint32_t, uint32_t,
2118 template void TestSimd1Arg
<uint32_t, c_v128
>(uint32_t, uint32_t, uint32_t,
2120 template void TestSimd1Arg
<uint64_t, c_v128
>(uint32_t, uint32_t, uint32_t,
2122 template void TestSimd1Arg
<c_v64
, c_v128
>(uint32_t, uint32_t, uint32_t,
2124 template void TestSimd2Args
<c_v128
, c_v128
, c_v128
>(uint32_t, uint32_t,
2125 uint32_t, const char *);
2126 template void TestSimd2Args
<c_v128
, c_v128
, uint32_t>(uint32_t, uint32_t,
2127 uint32_t, const char *);
2128 template void TestSimd2Args
<c_v128
, uint64_t, uint64_t>(uint32_t, uint32_t,
2129 uint32_t, const char *);
2130 template void TestSimd2Args
<c_v128
, c_v64
, c_v64
>(uint32_t, uint32_t, uint32_t,
2132 template void TestSimd2Args
<uint64_t, c_v128
, c_v128
>(uint32_t, uint32_t,
2133 uint32_t, const char *);
2134 template void TestSimd2Args
<int64_t, c_v128
, c_v128
>(uint32_t, uint32_t,
2135 uint32_t, const char *);
2136 template void TestSimd2Args
<uint32_t, c_v128
, c_v128
>(uint32_t, uint32_t,
2137 uint32_t, const char *);
2138 template void TestSimd3Args
<c_v128
, c_v128
, c_v128
, c_v128
>(uint32_t, uint32_t,
2141 template void TestSimd1Arg
<c_v256
, c_v128
>(uint32_t, uint32_t, uint32_t,
2143 template void TestSimd1Arg
<c_v256
, c_v256
>(uint32_t, uint32_t, uint32_t,
2145 template void TestSimd1Arg
<uint64_t, c_v256
>(uint32_t, uint32_t, uint32_t,
2147 template void TestSimd1Arg
<c_v256
, uint8_t>(uint32_t, uint32_t, uint32_t,
2149 template void TestSimd1Arg
<c_v256
, uint16_t>(uint32_t, uint32_t, uint32_t,
2151 template void TestSimd1Arg
<c_v256
, uint32_t>(uint32_t, uint32_t, uint32_t,
2153 template void TestSimd1Arg
<c_v256
, uint64_t>(uint32_t, uint32_t, uint32_t,
2155 template void TestSimd1Arg
<uint32_t, c_v256
>(uint32_t, uint32_t, uint32_t,
2157 template void TestSimd1Arg
<c_v64
, c_v256
>(uint32_t, uint32_t, uint32_t,
2159 template void TestSimd2Args
<c_v256
, c_v128
, c_v128
>(uint32_t, uint32_t,
2160 uint32_t, const char *);
2161 template void TestSimd2Args
<c_v256
, c_v256
, c_v256
>(uint32_t, uint32_t,
2162 uint32_t, const char *);
2163 template void TestSimd2Args
<c_v256
, c_v256
, uint32_t>(uint32_t, uint32_t,
2164 uint32_t, const char *);
2165 template void TestSimd2Args
<uint64_t, c_v256
, c_v256
>(uint32_t, uint32_t,
2166 uint32_t, const char *);
2167 template void TestSimd2Args
<int64_t, c_v256
, c_v256
>(uint32_t, uint32_t,
2168 uint32_t, const char *);
2169 template void TestSimd2Args
<uint32_t, c_v256
, c_v256
>(uint32_t, uint32_t,
2170 uint32_t, const char *);
2171 template void TestSimd3Args
<c_v256
, c_v256
, c_v256
, c_v256
>(uint32_t, uint32_t,
2175 } // namespace SIMD_NAMESPACE