PR tree-optimization/84740
[official-gcc.git] / gcc / testsuite / gcc.dg / vect / pr45752.c
blob4ddac7ad5097c72f08b948f64caa54421d4f55d0
1 /* { dg-require-effective-target vect_int } */
2 /* { dg-require-effective-target vect_perm } */
3 /* { dg-additional-options "--param tree-reassoc-width=1" } */
5 #include <stdarg.h>
6 #include "tree-vect.h"
8 #define M00 100
9 #define M10 216
10 #define M20 23
11 #define M30 237
12 #define M40 437
14 #define M01 1322
15 #define M11 13
16 #define M21 27271
17 #define M31 2280
18 #define M41 284
20 #define M02 74
21 #define M12 191
22 #define M22 500
23 #define M32 111
24 #define M42 1114
26 #define M03 134
27 #define M13 117
28 #define M23 11
29 #define M33 771
30 #define M43 71
32 #define M04 334
33 #define M14 147
34 #define M24 115
35 #define M34 7716
36 #define M44 16
38 #if VECTOR_BITS > 128
39 #define N (VECTOR_BITS * 5 / 32)
40 #else
41 #define N 20
42 #endif
44 void foo (unsigned int *__restrict__ pInput,
45 unsigned int *__restrict__ pOutput,
46 unsigned int *__restrict__ pInput2,
47 unsigned int *__restrict__ pOutput2)
49 unsigned int i, a, b, c, d, e;
51 for (i = 0; i < N / 5; i++)
53 a = *pInput++;
54 b = *pInput++;
55 c = *pInput++;
56 d = *pInput++;
57 e = *pInput++;
59 *pOutput++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
60 *pOutput++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
61 *pOutput++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
62 *pOutput++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
63 *pOutput++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
66 a = *pInput2++;
67 b = *pInput2++;
68 c = *pInput2++;
69 d = *pInput2++;
70 e = *pInput2++;
72 *pOutput2++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
73 *pOutput2++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
74 *pOutput2++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
75 *pOutput2++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
76 *pOutput2++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
81 int main (int argc, const char* argv[])
83 unsigned int input[N], output[N], i, input2[N], output2[N];
85 check_vect ();
87 for (i = 0; i < N; i++)
89 input[i] = i%256;
90 input2[i] = i + 2;
91 output[i] = 0;
92 output2[i] = 0;
93 __asm__ volatile ("");
96 #if N == 20
97 unsigned int check_results[N]
98 = { 3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
99 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404,
100 31619 };
101 unsigned int check_results2[N]
102 = { 7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243,
103 26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634,
104 35463 };
105 #else
106 volatile unsigned int check_results[N];
107 volatile unsigned int check_results2[N];
109 for (i = 0; i < N / 5; i++)
111 unsigned int a = input[i * 5];
112 unsigned int b = input[i * 5 + 1];
113 unsigned int c = input[i * 5 + 2];
114 unsigned int d = input[i * 5 + 3];
115 unsigned int e = input[i * 5 + 4];
117 check_results[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
118 check_results[i * 5 + 1] = (M10 * a + M11 * b + M12 * c
119 + M13 * d + M14 * e);
120 check_results[i * 5 + 2] = (M20 * a + M21 * b + M22 * c
121 + M23 * d + M24 * e);
122 check_results[i * 5 + 3] = (M30 * a + M31 * b + M32 * c
123 + M33 * d + M34 * e);
124 check_results[i * 5 + 4] = (M40 * a + M41 * b + M42 * c
125 + M43 * d + M44 * e);
127 a = input2[i * 5];
128 b = input2[i * 5 + 1];
129 c = input2[i * 5 + 2];
130 d = input2[i * 5 + 3];
131 e = input2[i * 5 + 4];
133 check_results2[i * 5] = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
134 check_results2[i * 5 + 1] = (M10 * a + M11 * b + M12 * c
135 + M13 * d + M14 * e);
136 check_results2[i * 5 + 2] = (M20 * a + M21 * b + M22 * c
137 + M23 * d + M24 * e);
138 check_results2[i * 5 + 3] = (M30 * a + M31 * b + M32 * c
139 + M33 * d + M34 * e);
140 check_results2[i * 5 + 4] = (M40 * a + M41 * b + M42 * c
141 + M43 * d + M44 * e);
143 asm volatile ("" ::: "memory");
145 #endif
147 foo (input, output, input2, output2);
149 for (i = 0; i < N; i++)
150 if (output[i] != check_results[i]
151 || output2[i] != check_results2[i])
152 abort ();
154 return 0;
157 /* Currently interleaving is not supported for a group-size of 5. */
159 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
160 /* { dg-final { scan-tree-dump-times "gaps requires scalar epilogue loop" 0 "vect" } } */
161 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */