2 /* { dg-do compile } */
3 /* { dg-options "-Ofast -funroll-loops -march=skylake-avx512 -mfpmath=sse" } */
4 /* { dg-final { scan-assembler-not "vmovdq(a|u)(32|64)" } } */
7 pr83008 (unsigned char *pix1
, int i_pix1
, unsigned char *pix2
, int i_pix2
)
9 unsigned int tmp
[4][4];
10 unsigned int a0
, a1
, a2
, a3
;
12 for (int i
= 0; i
< 4; i
++, pix1
+= i_pix1
, pix2
+= i_pix2
)
14 a0
= (pix1
[0] - pix2
[0]) + ((pix1
[4] - pix2
[4]) << 16);
15 a1
= (pix1
[1] - pix2
[1]) + ((pix1
[5] - pix2
[5]) << 16);
16 a2
= (pix1
[2] - pix2
[2]) + ((pix1
[6] - pix2
[6]) << 16);
17 a3
= (pix1
[3] - pix2
[3]) + ((pix1
[7] - pix2
[7]) << 16);
27 for (int i
= 0; i
< 4; i
++)
29 int t0
= tmp
[0][i
] + tmp
[1][i
];
30 int t1
= tmp
[0][i
] - tmp
[1][i
];
31 int t2
= tmp
[2][i
] + tmp
[3][i
];
32 int t3
= tmp
[2][i
] - tmp
[3][i
];
37 sum
+= (a0
) + (a1
) + (a2
) + (a3
);
39 return (sum
+ ((unsigned int) sum
>> 16)) >> 1;