2 /* { dg-options "-O2 -mavx512f -mavx512vnni" } */
3 /* { dg-require-effective-target avx512f } */
4 /* { dg-require-effective-target avx512vnni } */
9 #include "avx512f-helper.h"
11 #define SIZE (AVX512F_LEN / 16)
12 #define SIZE_RES (AVX512F_LEN / 32)
14 #include "avx512f-mask-type.h"
17 CALC (int *r
, int *dst
, short *s1
, short *s2
)
20 for (int i
= 0; i
< SIZE
; i
++) {
21 tempres
[i
] = ((int)(s1
[i
]) * (int)(s2
[i
]));
23 for (int i
= 0; i
< SIZE_RES
; i
++) {
24 long long test
= (long long)dst
[i
] + tempres
[i
*2] + tempres
[i
*2 + 1];
25 long long max_int
= 0x7FFFFFFF;
36 UNION_TYPE (AVX512F_LEN
, i_d
) res1
, res2
, res3
;
37 UNION_TYPE (AVX512F_LEN
, i_w
) src1
, src2
;
38 MASK_TYPE mask
= MASK_VALUE
;
39 int res_ref
[SIZE_RES
];
40 int res_ref2
[SIZE_RES
];
42 for (i
= 0; i
< SIZE
; i
++)
48 for (i
= 0; i
< SIZE_RES
; i
++)
50 res1
.a
[i
] = 0x7fffffff;
51 res2
.a
[i
] = DEFAULT_VALUE
;
52 res3
.a
[i
] = DEFAULT_VALUE
;
55 CALC (res_ref
, res1
.a
, src1
.a
, src2
.a
);
56 CALC (res_ref2
, res2
.a
, src1
.a
, src2
.a
);
58 res1
.x
= INTRINSIC (_dpwssds_epi32
) (res1
.x
, src1
.x
, src2
.x
);
59 res2
.x
= INTRINSIC (_mask_dpwssds_epi32
) (res2
.x
, mask
, src1
.x
, src2
.x
);
60 res3
.x
= INTRINSIC (_maskz_dpwssds_epi32
) (mask
, res3
.x
, src1
.x
, src2
.x
);
62 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res1
, res_ref
))
65 MASK_MERGE (i_d
) (res_ref2
, mask
, SIZE_RES
);
66 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res2
, res_ref2
))
69 MASK_ZERO (i_d
) (res_ref2
, mask
, SIZE_RES
);
70 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res3
, res_ref2
))