2 /* { dg-options "-O3 -floop-unroll-and-jam -fno-tree-loop-im --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
3 /* { dg-additional-options "--param max-completely-peel-times=16" { target { s390*-*-* } } } */
4 /* { dg-require-effective-target int32plus } */
7 extern unsigned int a
[];
8 extern unsigned int b
[];
9 extern unsigned int aa
[][1024];
10 unsigned int checksum
;
15 for (i
= 0; i
< 1024; i
++) {
16 for (j
= 0; j
< 16; j
++) {
17 sum
+= aa
[j
][i
]*31+47;
20 checksum
= checksum
* 27 + sum
;
21 //printf(" %d\n", sum);
28 for (i
= 0; i
< 1024; i
++) {
31 checksum
= checksum
* 27 + sum
;
32 //printf(" %d\n", sum);
35 #define TEST(name, body, test) \
36 static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \
39 for (i = 1; i < m; i++) { \
40 for (j = 1; j < n; j++) { \
46 static void __attribute__((noinline,noclone,optimize("O1"))) name ## noopt (unsigned long n, unsigned long m) \
49 for (i = 1; i < m; i++) { \
50 for (j = 1; j < n; j++) { \
56 TEST(foo1
, aa
[i
+1][j
+1]=aa
[i
][j
] * aa
[i
][j
] / 2, checkaa()) //ok, -1,-1
57 TEST(foo2
, aa
[i
][j
+1]=3*aa
[i
+1][j
], checkaa()) //notok, 1,-1
58 TEST(foo3
, aa
[i
+1][j
-1]=aa
[i
][j
] * aa
[i
][j
] / 2, checkaa()) //notok, -1,1
59 TEST(foo4
, aa
[i
][j
] = aa
[i
-1][j
+1] * aa
[i
-1][j
+1] / 2, checkaa()) //notok, -1,1
60 TEST(foo5
, aa
[i
][j
] = aa
[i
+1][j
+1] * aa
[i
+1][j
+1] / 2, checkaa()) //ok, 1,1
61 TEST(foo6
, aa
[i
][j
] = aa
[i
+1][j
] * aa
[i
+1][j
] / 2, checkaa()) //ok, -1,0
62 TEST(foo61
, aa
[i
][0] = aa
[i
+1][0] * aa
[i
+1][0] / 2, checkaa()) //notok, -1,0
63 TEST(foo62
, aa
[i
][j
/2] = aa
[i
+1][j
/2] * aa
[i
+1][j
/2] / 2, checkaa()) //notok, not affine
64 TEST(foo63
, aa
[i
][j
%2] = aa
[i
+1][j
%2] * aa
[i
+1][j
%2] / 2, checkaa()) //notok, not affine
65 TEST(foo7
, aa
[i
+1][j
] = aa
[i
][j
] * aa
[i
][j
] / 2, checkaa()) //ok, 1,0
66 TEST(foo9
, b
[j
] = 3*b
[j
+1] + 1, checkb()) //notok, 0,-1
67 TEST(foo10
, b
[j
] = 3*b
[j
] + 1, checkb()) //ok, 0,0
69 TEST(foo11
, f
= b
[i
-1] = 1 + 3* b
[i
+1], checkb()) //ok, 2,0 but must reduce unroll factor to 2, (it would be incorrect with unroll-by-3, which the profitability would suggest)
71 /* foo8 should work as well, but currently doesn't because the distance
72 vectors we compute are too pessimistic. We compute
73 (0,1), (1,1) and (1,-1)
74 and the last one causes us to lose. */
75 TEST(foo8
, b
[j
+1] = 3*b
[j
] + 1, checkb()) //ok, 0,1
80 unsigned int aa
[16][1024];
84 for (i
= 0; i
< 1024; i
++) {
85 for (j
= 0; j
< 16; j
++) {
86 aa
[j
][i
] = ((j
+1)*2+i
+1) % 17;
88 a
[i
] = ((i
+1)*31) % 19;
89 b
[i
] = ((i
+1)*47) % 23;
95 printf(" %s\n", #name); \
96 init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \
97 init();for(i=0;i<4;i++)name(32,8); \
98 if (checka != checksum) fail = 1; \
99 printf("%sok %s\n", checka != checksum ? "NOT " : "", #name);
125 /* Six loops should be unroll-jammed (actually seven, but see above). */
126 /* { dg-final { scan-tree-dump-times "applying unroll and jam" 6 "unrolljam" } } */