2 /* { dg-require-effective-target fma4 } */
3 /* { dg-options "-O2 -mfma4" } */
5 #include "fma4-check.h"
18 } dst
, res
, src1
, src2
, src3
;
20 /* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate
21 product is not rounded, only the addition is rounded. */
27 for (i
= 0; i
< NUM
* 8; i
++)
39 for (i
= 0; i
< NUM
* 4; i
++)
50 int i
, j
, check_fails
= 0;
51 for (i
= 0; i
< NUM
* 8; i
= i
+ 8)
52 for (j
= 0; j
< 8; j
++)
54 res
.f
[i
+ j
] = (src1
.f
[i
+ j
] * src2
.f
[i
+ j
]) - src3
.f
[i
+ j
];
55 if (dst
.f
[i
+ j
] != res
.f
[i
+ j
])
64 int i
, j
, check_fails
= 0;
65 for (i
= 0; i
< NUM
* 4; i
= i
+ 4)
66 for (j
= 0; j
< 4; j
++)
68 res
.d
[i
+ j
] = (src1
.d
[i
+ j
] * src2
.d
[i
+ j
]) - src3
.d
[i
+ j
];
69 if (dst
.d
[i
+ j
] != res
.d
[i
+ j
])
82 for (i
= 0; i
< NUM
; i
++)
83 dst
.x
[i
] = _mm256_msub_ps (src1
.x
[i
], src2
.x
[i
], src3
.x
[i
]);
90 for (i
= 0; i
< NUM
; i
++)
91 dst
.y
[i
] = _mm256_msub_pd (src1
.y
[i
], src2
.y
[i
], src3
.y
[i
]);