1 /* { dg-do compile } */
2 /* { dg-options "-O2 -ftree-vectorize -fno-inline -msve-vector-bits=256 -fdump-tree-vect-details" } */
10 slp_reduc_plus (int n
)
13 for (int i
= 0; i
< n
; i
++)
15 tmp
= tmp
+ mat
[i
][0];
16 tmp
= tmp
+ mat
[i
][1];
17 tmp
= tmp
+ mat
[i
][2];
18 tmp
= tmp
+ mat
[i
][3];
24 slp_reduc_plus2 (int n
)
27 for (int i
= 0; i
< n
; i
++)
29 tmp
= tmp
+ mat2
[i
][0];
30 tmp
= tmp
+ mat2
[i
][1];
31 tmp
= tmp
+ mat2
[i
][2];
32 tmp
= tmp
+ mat2
[i
][3];
33 tmp
= tmp
+ mat2
[i
][4];
34 tmp
= tmp
+ mat2
[i
][5];
35 tmp
= tmp
+ mat2
[i
][6];
36 tmp
= tmp
+ mat2
[i
][7];
42 slp_reduc_plus3 (int n
)
45 for (int i
= 0; i
< n
; i
++)
47 tmp
= tmp
+ mat3
[i
][0];
48 tmp
= tmp
+ mat3
[i
][1];
49 tmp
= tmp
+ mat3
[i
][2];
50 tmp
= tmp
+ mat3
[i
][3];
51 tmp
= tmp
+ mat3
[i
][4];
52 tmp
= tmp
+ mat3
[i
][5];
53 tmp
= tmp
+ mat3
[i
][6];
54 tmp
= tmp
+ mat3
[i
][7];
55 tmp
= tmp
+ mat3
[i
][8];
56 tmp
= tmp
+ mat3
[i
][9];
57 tmp
= tmp
+ mat3
[i
][10];
58 tmp
= tmp
+ mat3
[i
][11];
64 slp_non_chained_reduc (int n
, double * restrict out
)
66 for (int i
= 0; i
< 3; i
++)
69 for (int i
= 0; i
< n
; i
++)
71 out
[0] = out
[0] + mat4
[i
][0];
72 out
[1] = out
[1] + mat4
[i
][1];
73 out
[2] = out
[2] + mat4
[i
][2];
77 /* Strict FP reductions shouldn't be used for the outer loops, only the
81 double_reduc1 (float (*restrict i
)[16])
85 for (int a
= 0; a
< 8; a
++)
86 for (int b
= 0; b
< 8; b
++)
92 double_reduc2 (float *restrict i
)
96 for (int a
= 0; a
< 8; a
++)
97 for (int b
= 0; b
< 16; b
++)
108 double_reduc3 (float *restrict i
, float *restrict j
)
112 for (int a
= 0; a
< 8; a
++)
113 for (int b
= 0; b
< 8; b
++)
121 /* We can't yet handle double_reduc1. */
122 /* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 3 } } */
123 /* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */
124 /* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3. Each one
125 is reported three times, once for SVE, once for 128-bit AdvSIMD and once
126 for 64-bit AdvSIMD. */
127 /* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */
128 /* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3.
129 double_reduc1 is reported 3 times (SVE, 128-bit AdvSIMD, 64-bit AdvSIMD)
131 /* { dg-final { scan-tree-dump-times "Detected reduction" 12 "vect" } } */