Merge from mainline
[official-gcc.git] / gcc / testsuite / gcc.dg / vect / vect-reduc-dot-u8.c
blobad68bc752c5f673c6eab31afdf417b801f5e8b0f
1 /* { dg-require-effective-target vect_int } */
3 #include <stdarg.h>
4 #include "tree-vect.h"
6 #define N 64
8 #define DOT1 43680
9 #define DOT2 43680
10 #define DOT3 43680
12 unsigned char X[N] __attribute__ ((__aligned__(16)));
13 unsigned char Y[N] __attribute__ ((__aligned__(16)));
15 /* char->short->int dot product.
16 Detected as a dot-product pattern.
17 Should be vectorized on targets that support dot-product for unsigned chars.
19 unsigned int
20 foo1(int len) {
21 int i;
22 unsigned int result = 0;
23 unsigned short prod;
25 for (i=0; i<len; i++) {
26 prod = X[i] * Y[i];
27 result += prod;
29 return result;
32 /* char->short->short dot product.
33 Detected as a dot-product pattern.
34 Should be vectorized on targets that support dot-product for unsigned chars.
35 This test currently fails to vectorize on targets that support dot-product
36 of chars only when the accumulator is int.
38 unsigned short
39 foo2(int len) {
40 int i;
41 unsigned short result = 0;
43 for (i=0; i<len; i++) {
44 result += (unsigned short)(X[i] * Y[i]);
46 return result;
49 /* char->int->int dot product.
50 Not detected as a dot-product.
51 Doesn't get vectorized due to presence of type converisons. */
52 unsigned int
53 foo3(int len) {
54 int i;
55 unsigned int result = 0;
57 for (i=0; i<len; i++) {
58 result += (X[i] * Y[i]);
60 return result;
63 int main (void)
65 unsigned int dot1, dot3;
66 unsigned short dot2;
67 int i;
69 check_vect ();
71 for (i=0; i<N; i++) {
72 X[i] = i;
73 Y[i] = 64-i;
76 dot1 = foo1 (N);
77 if (dot1 != DOT1)
78 abort ();
80 dot2 = foo2 (N);
81 if (dot2 != DOT2)
82 abort ();
84 dot3 = foo3 (N);
85 if (dot3 != DOT3)
86 abort ();
88 return 0;
91 /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 2 "vect" } } */
93 /* When the vectorizer is enhanced to vectorize foo2 (accumulation into short) for
94 targets that support accumulation into int (powerpc, ia64) we'd have:
95 dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_udot_qi } }
97 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
98 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_udot_qi } } } */
100 /* { dg-final { cleanup-tree-dump "vect" } } */