2 /* { dg-options "-O2 -mavx5124vnniw" } */
3 /* { dg-require-effective-target avx5124vnniw } */
5 #define DEFAULT_VALUE 0x7ffffffe
8 #include "avx512f-helper.h"
10 #define SIZE (AVX512F_LEN / 32)
12 #include "avx512f-mask-type.h"
15 CALC (short *src1
, short* src2
, short *src3
,
16 short *src4
, int* prev_dst
, short *mult
, int *dst
)
20 for (i
= 0; i
< SIZE
; i
++)
25 p1dword
= (int)(src1
[2*i
]) * (int)(mult
[0]);
26 p2dword
= (int)(src1
[2*i
+1]) * (int)(mult
[1]);
27 tmp
= (long long)dst
[i
] + p1dword
+ p2dword
;
31 dst
[i
] += p1dword
+ p2dword
;
33 p1dword
= (int)(src2
[2*i
]) * (int)(mult
[2]);
34 p2dword
= (int)(src2
[2*i
+1]) * (int)(mult
[3]);
35 tmp
= (long long)dst
[i
] + p1dword
+ p2dword
;
39 dst
[i
] += p1dword
+ p2dword
;
41 p1dword
= (int)(src3
[2*i
]) * (int)(mult
[4]);
42 p2dword
= (int)(src3
[2*i
+1]) * (int)(mult
[5]);
43 tmp
= (long long)dst
[i
] + p1dword
+ p2dword
;
47 dst
[i
] += p1dword
+ p2dword
;
49 p1dword
= (int)(src4
[2*i
]) * (int)(mult
[6]);
50 p2dword
= (int)(src4
[2*i
+1]) * (int)(mult
[7]);
51 tmp
= (long long)dst
[i
] + p1dword
+ p2dword
;
55 dst
[i
] += p1dword
+ p2dword
;
63 UNION_TYPE (AVX512F_LEN
, i_w
) src1
, src2
, src3
, src4
;
64 UNION_TYPE (AVX512F_LEN
, i_d
) src5
, dst
, res1
, res2
, res3
;
65 UNION_TYPE (128, i_w
) mult
;
66 MASK_TYPE mask
= MASK_VALUE
;
69 for (i
= 0; i
< SIZE
* 2; i
++)
71 src1
.a
[i
] = 2 + 7 * i
% 291;
72 src2
.a
[i
] = 3 + 11 * (i
% 377) * i
;
73 src3
.a
[i
] = src1
.a
[i
] * src1
.a
[i
];
74 src4
.a
[i
] = src2
.a
[i
] * src2
.a
[i
];
76 for (i
= 0; i
< 8; i
++)
77 mult
.a
[i
] = 3 + i
* 2;
79 for (i
= 0; i
< SIZE
; i
++)
80 src5
.a
[i
] = DEFAULT_VALUE
;
82 CALC (src1
.a
, src2
.a
, src3
.a
, src4
.a
, src5
.a
, mult
.a
, res_ref
);
84 res1
.x
= INTRINSIC (_4dpwssds_epi32
) ( src5
.x
, src1
.x
, src2
.x
, src3
.x
, src4
.x
, &mult
.x
);
85 res2
.x
= INTRINSIC (_mask_4dpwssds_epi32
) (src5
.x
, mask
, src1
.x
, src2
.x
, src3
.x
, src4
.x
, &mult
.x
);
86 res3
.x
= INTRINSIC (_maskz_4dpwssds_epi32
) (mask
, src5
.x
, src1
.x
, src2
.x
, src3
.x
, src4
.x
, &mult
.x
);
88 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res1
, res_ref
))
91 MASK_MERGE (i_d
) (res_ref
, mask
, SIZE
);
92 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res2
, res_ref
))
95 MASK_ZERO (i_d
) (res_ref
, mask
, SIZE
);
96 if (UNION_CHECK (AVX512F_LEN
, i_d
) (res3
, res_ref
))