libgomp: Document 'GOMP_teams4'
[official-gcc.git] / gcc / testsuite / gcc.target / aarch64 / pr110625_1.c
blob0965cac33a006853d654236f62cff68bb7902703
1 /* { dg-do compile } */
2 /* { dg-options "-Ofast -mcpu=neoverse-n2 -fdump-tree-vect-details -fno-tree-slp-vectorize" } */
3 /* { dg-final { scan-tree-dump-not "reduction latency = 8" "vect" } } */
5 /* Do not increase the vector body cost due to the incorrect reduction latency
6 Original vector body cost = 51
7 Scalar issue estimate:
8 ...
9 reduction latency = 2
10 estimated min cycles per iteration = 2.000000
11 estimated cycles per vector iteration (for VF 2) = 4.000000
12 Vector issue estimate:
13 ...
14 reduction latency = 8 <-- Too large
15 estimated min cycles per iteration = 8.000000
16 Increasing body cost to 102 because scalar code would issue more quickly
17 ...
18 missed: cost model: the vector iteration cost = 102 divided by the scalar iteration cost = 44 is greater or equal to the vectorization factor = 2.
19 missed: not vectorized: vectorization not profitable. */
21 typedef struct
23 unsigned short m1, m2, m3, m4;
24 } the_struct_t;
25 typedef struct
27 double m1, m2, m3, m4, m5;
28 } the_struct2_t;
30 double
31 bar (the_struct2_t *);
33 double
34 foo (double *k, unsigned int n, the_struct_t *the_struct)
36 unsigned int u;
37 the_struct2_t result;
38 for (u = 0; u < n; u++, k--)
40 result.m1 += (*k) * the_struct[u].m1;
41 result.m2 += (*k) * the_struct[u].m2;
42 result.m3 += (*k) * the_struct[u].m3;
43 result.m4 += (*k) * the_struct[u].m4;
45 return bar (&result);