gcc/testsuite/gcc.dg/unroll-and-jam.c

   1 /* { dg-do run } */
   2 /* { dg-options "-O3 -floop-unroll-and-jam -fno-tree-loop-im --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
   3 /* { dg-additional-options "--param max-completely-peel-times=16" { target { s390*-*-* } } } */
   4 /* { dg-require-effective-target int32plus } */
   5
   6 #include <stdio.h>
   7 extern unsigned int a[];
   8 extern unsigned int b[];
   9 extern unsigned int aa[][1024];
  10 unsigned int checksum;
  11 void checkaa(void)
  12 {
  13   unsigned sum = 1;
  14   unsigned long i, j;
  15   for (i = 0; i < 1024; i++) {
  16       for (j = 0; j < 16; j++) {
  17           sum += aa[j][i]*31+47;
  18       }
  19   }
  20   checksum = checksum * 27 + sum;
  21   //printf("  %d\n", sum);
  22 }
  23
  24 void checkb(void)
  25 {
  26   unsigned sum = 1;
  27   unsigned long i, j;
  28   for (i = 0; i < 1024; i++) {
  29       sum += b[i]*31+47;
  30   }
  31   checksum = checksum * 27 + sum;
  32   //printf("  %d\n", sum);
  33 }
  34
  35 #define TEST(name, body, test) \
  36 static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \
  37 { \
  38   unsigned i, j; \
  39   for (i = 1; i < m; i++) { \
  40       for (j = 1; j < n; j++) { \
  41           body; \
  42       } \
  43   } \
  44   test; \
  45 } \
  46 static void __attribute__((noinline,noclone,optimize("O1"))) name ## noopt (unsigned long n, unsigned long m) \
  47 { \
  48   unsigned long i, j; \
  49   for (i = 1; i < m; i++) { \
  50       for (j = 1; j < n; j++) { \
  51           body; \
  52       } \
  53   } \
  54   test; \
  55 }
  56 TEST(foo1, aa[i+1][j+1]=aa[i][j] * aa[i][j] / 2, checkaa()) //ok, -1,-1
  57 TEST(foo2, aa[i][j+1]=3*aa[i+1][j], checkaa()) //notok, 1,-1
  58 TEST(foo3, aa[i+1][j-1]=aa[i][j] * aa[i][j] / 2, checkaa()) //notok, -1,1
  59 TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1
  60 TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1
  61 TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0
  62 TEST(foo61, aa[i][0] = aa[i+1][0] * aa[i+1][0] / 2, checkaa()) //notok, -1,0
  63 TEST(foo62, aa[i][j/2] = aa[i+1][j/2] * aa[i+1][j/2] / 2, checkaa()) //notok, not affine
  64 TEST(foo63, aa[i][j%2] = aa[i+1][j%2] * aa[i+1][j%2] / 2, checkaa()) //notok, not affine
  65 TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0
  66 TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1
  67 TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0
  68 extern int f;
  69 TEST(foo11, f = b[i-1] = 1 + 3* b[i+1], checkb()) //ok, 2,0 but must reduce unroll factor to 2, (it would be incorrect with unroll-by-3, which the profitability would suggest)
  70
  71 /* foo8 should work as well, but currently doesn't because the distance
  72    vectors we compute are too pessimistic.  We compute
  73      (0,1), (1,1) and (1,-1)
  74    and the last one causes us to lose.  */
  75 TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1
  76
  77 int f;
  78 unsigned int a[1024];
  79 unsigned int b[1024];
  80 unsigned int aa[16][1024];
  81 void init(void)
  82 {
  83   unsigned long i,j;
  84   for (i = 0; i < 1024; i++) {
  85       for (j = 0; j < 16; j++) {
  86           aa[j][i] = ((j+1)*2+i+1) % 17;
  87       }
  88       a[i] = ((i+1)*31) % 19;
  89       b[i] = ((i+1)*47) % 23;
  90   }
  91   checksum = 1;
  92 }
  93
  94 #define RUN(name) \
  95     printf(" %s\n", #name); \
  96     init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \
  97     init();for(i=0;i<4;i++)name(32,8); \
  98     if (checka != checksum) fail = 1; \
  99     printf("%sok %s\n", checka != checksum ? "NOT " : "", #name);
 100
 101 int main()
 102 {
 103   int fail = 0;
 104   int i;
 105   unsigned checka;
 106   RUN(foo1);
 107   RUN(foo2);
 108   RUN(foo3);
 109   RUN(foo4);
 110   RUN(foo5);
 111   RUN(foo6);
 112   RUN(foo61);
 113   RUN(foo62);
 114   RUN(foo63);
 115   RUN(foo7);
 116   RUN(foo8);
 117   RUN(foo9);
 118   RUN(foo10);
 119   RUN(foo11);
 120   if (fail)
 121     __builtin_abort();
 122   return fail;
 123 }
 124
 125 /* Six loops should be unroll-jammed (actually seven, but see above).  */
 126 /* { dg-final { scan-tree-dump-times "applying unroll and jam" 6 "unrolljam" } } */