1 /* { dg-do compile } */
2 /* { dg-options "-O3 -mdejagnu-cpu=power9 -mvsx" } */
3 /* { dg-require-effective-target powerpc_vsx } */
5 /* Verify that we vectorize this SAD loop using vabsduh. */
7 extern int abs (int __x
) __attribute__ ((__nothrow__
, __leaf__
)) __attribute__ ((__const__
));
10 foo (unsigned short *w
, int i
, unsigned short *x
, int j
)
13 for (int a
= 0; a
< 16; a
++)
15 for (int b
= 0; b
< 8; b
++)
16 tot
+= abs (w
[b
] - x
[b
]);
24 bar (unsigned short *w
, unsigned short *x
, int i
, int *result
)
26 *result
= foo (w
, 8, x
, i
);
29 /* { dg-final { scan-assembler-times "vabsduh" 16 } } */
30 /* { dg-final { scan-assembler-times "vsum4shs" 16 } } */
31 /* { dg-final { scan-assembler-times "vadduwm" 17 } } */
33 /* Note: One of the 16 adds is optimized out (add with zero),
34 leaving 15. The extra two adds are for the final reduction. */