* gcc.target/powerpc/builtins-1-be.c <vclzb>: Rename duplicate test
[official-gcc.git] / gcc / testsuite / gcc.target / powerpc / sad-vectorize-2.c
blobb1b6de9ddea98528c598a4d1b286cf62bf3e88b2
1 /* { dg-do compile { target { powerpc*-*-* } } } */
2 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
3 /* { dg-require-effective-target powerpc_p9vector_ok } */
4 /* { dg-skip-if "" { powerpc*-*-aix* } } */
5 /* { dg-options "-O3 -mcpu=power9" } */
7 /* Verify that we vectorize this SAD loop using vabsduh. */
9 extern int abs (int __x) __attribute__ ((__nothrow__, __leaf__)) __attribute__ ((__const__));
11 static int
12 foo (unsigned short *w, int i, unsigned short *x, int j)
14 int tot = 0;
15 for (int a = 0; a < 16; a++)
17 for (int b = 0; b < 8; b++)
18 tot += abs (w[b] - x[b]);
19 w += i;
20 x += j;
22 return tot;
25 void
26 bar (unsigned short *w, unsigned short *x, int i, int *result)
28 *result = foo (w, 8, x, i);
31 /* { dg-final { scan-assembler-times "vabsduh" 16 } } */
32 /* { dg-final { scan-assembler-times "vsum4shs" 16 } } */
33 /* { dg-final { scan-assembler-times "vadduwm" 17 } } */
35 /* Note: One of the 16 adds is optimized out (add with zero),
36 leaving 15. The extra two adds are for the final reduction. */