2 /* { dg-options "-O2 -mavx512f -mavx512vnni" } */
3 /* { dg-require-effective-target avx512f } */
4 /* { dg-require-effective-target avx512vnni } */
9 #include "avx512f-helper.h"
11 #define SIZE (AVX512F_LEN / 16)
12 #define SIZE_RES (AVX512F_LEN / 32)
14 #include "avx512f-mask-type.h"
17 CALC (int *r
, int *dst
, short *s1
, short *s2
)
20 for (int i
= 0; i
< SIZE
; i
++) {
21 tempres
[i
] = ((int)(s1
[i
]) * (int)(s2
[i
]));
23 for (int i
= 0; i
< SIZE_RES
; i
++) {
24 long long test
= (long long)dst
[i
] + tempres
[i
*2] + tempres
[i
*2 + 1];
33 UNION_TYPE (AVX512F_LEN
, i_d
) res1
, res2
, res3
;
34 UNION_TYPE (AVX512F_LEN
, i_w
) src1
, src2
;
35 MASK_TYPE mask
= MASK_VALUE
;
36 int res_ref
[SIZE_RES
];
37 int res_ref2
[SIZE_RES
];
39 for (i
= 0; i
< SIZE
; i
++)
45 for (i
= 0; i
< SIZE_RES
; i
++)
47 res1
.a
[i
] = 0x7fffffff;
48 res2
.a
[i
] = DEFAULT_VALUE
;
49 res3
.a
[i
] = DEFAULT_VALUE
;
52 CALC (res_ref
, res1
.a
, src1
.a
, src2
.a
);
53 CALC (res_ref2
, res2
.a
, src1
.a
, src2
.a
);
55 res1
.x
= INTRINSIC (_dpwssd_epi32
) (res1
.x
, src1
.x
, src2
.x
);
56 res2
.x
= INTRINSIC (_mask_dpwssd_epi32
) (res2
.x
, mask
, src1
.x
, src2
.x
);
57 res3
.x
= INTRINSIC (_maskz_dpwssd_epi32
) (mask
, res3
.x
, src1
.x
, src2
.x
);
59 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res1
, res_ref
))
62 MASK_MERGE (i_d
) (res_ref2
, mask
, SIZE_RES
);
63 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res2
, res_ref2
))
66 MASK_ZERO (i_d
) (res_ref2
, mask
, SIZE_RES
);
67 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res3
, res_ref2
))