gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "obstack.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44 #include "vecprim.h"
  45 #include "pointer-set.h"
  46
  47 /* This loop nest code generation is based on non-singular matrix
  48    math.
  49
  50  A little terminology and a general sketch of the algorithm.  See "A singular
  51  loop transformation framework based on non-singular matrices" by Wei Li and
  52  Keshav Pingali for formal proofs that the various statements below are
  53  correct.
  54
  55  A loop iteration space represents the points traversed by the loop.  A point in the
  56  iteration space can be represented by a vector of size <loop depth>.  You can
  57  therefore represent the iteration space as an integral combinations of a set
  58  of basis vectors.
  59
  60  A loop iteration space is dense if every integer point between the loop
  61  bounds is a point in the iteration space.  Every loop with a step of 1
  62  therefore has a dense iteration space.
  63
  64  for i = 1 to 3, step 1 is a dense iteration space.
  65
  66  A loop iteration space is sparse if it is not dense.  That is, the iteration
  67  space skips integer points that are within the loop bounds.
  68
  69  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  70  2 is skipped.
  71
  72  Dense source spaces are easy to transform, because they don't skip any
  73  points to begin with.  Thus we can compute the exact bounds of the target
  74  space using min/max and floor/ceil.
  75
  76  For a dense source space, we take the transformation matrix, decompose it
  77  into a lower triangular part (H) and a unimodular part (U).
  78  We then compute the auxiliary space from the unimodular part (source loop
  79  nest . U = auxiliary space) , which has two important properties:
  80   1. It traverses the iterations in the same lexicographic order as the source
  81   space.
  82   2. It is a dense space when the source is a dense space (even if the target
  83   space is going to be sparse).
  84
  85  Given the auxiliary space, we use the lower triangular part to compute the
  86  bounds in the target space by simple matrix multiplication.
  87  The gaps in the target space (IE the new loop step sizes) will be the
  88  diagonals of the H matrix.
  89
  90  Sparse source spaces require another step, because you can't directly compute
  91  the exact bounds of the auxiliary and target space from the sparse space.
  92  Rather than try to come up with a separate algorithm to handle sparse source
  93  spaces directly, we just find a legal transformation matrix that gives you
  94  the sparse source space, from a dense space, and then transform the dense
  95  space.
  96
  97  For a regular sparse space, you can represent the source space as an integer
  98  lattice, and the base space of that lattice will always be dense.  Thus, we
  99  effectively use the lattice to figure out the transformation from the lattice
 100  base space, to the sparse iteration space (IE what transform was applied to
 101  the dense space to make it sparse).  We then compose this transform with the
 102  transformation matrix specified by the user (since our matrix transformations
 103  are closed under composition, this is okay).  We can then use the base space
 104  (which is dense) plus the composed transformation matrix, to compute the rest
 105  of the transform using the dense space algorithm above.
 106
 107  In other words, our sparse source space (B) is decomposed into a dense base
 108  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 109  We then compute the composition of L and the user transformation matrix (T),
 110  so that T is now a transform from A to the result, instead of from B to the
 111  result.
 112  IE A.(LT) = result instead of B.T = result
 113  Since A is now a dense source space, we can use the dense source space
 114  algorithm above to compute the result of applying transform (LT) to A.
 115
 116  Fourier-Motzkin elimination is used to compute the bounds of the base space
 117  of the lattice.  */
 118
 119 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 120                              VEC(tree,heap) *, VEC(int,heap) *,
 121                              VEC(tree,heap) *);
 122 /* Lattice stuff that is internal to the code generation algorithm.  */
 123
 124 typedef struct lambda_lattice_s
 125 {
 126   /* Lattice base matrix.  */
 127   lambda_matrix base;
 128   /* Lattice dimension.  */
 129   int dimension;
 130   /* Origin vector for the coefficients.  */
 131   lambda_vector origin;
 132   /* Origin matrix for the invariants.  */
 133   lambda_matrix origin_invariants;
 134   /* Number of invariants.  */
 135   int invariants;
 136 } *lambda_lattice;
 137
 138 #define LATTICE_BASE(T) ((T)->base)
 139 #define LATTICE_DIMENSION(T) ((T)->dimension)
 140 #define LATTICE_ORIGIN(T) ((T)->origin)
 141 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 142 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 143
 144 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 145                        int, int);
 146 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 147 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 148                                                    struct obstack *);
 149
 150 static tree find_induction_var_from_exit_cond (struct loop *);
 151 static bool can_convert_to_perfect_nest (struct loop *);
 152
 153 /* Create a new lambda body vector.  */
 154
 155 lambda_body_vector
 156 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 157 {
 158   lambda_body_vector ret;
 159
 160   ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret));
 161   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 162   LBV_SIZE (ret) = size;
 163   LBV_DENOMINATOR (ret) = 1;
 164   return ret;
 165 }
 166
 167 /* Compute the new coefficients for the vector based on the
 168   *inverse* of the transformation matrix.  */
 169
 170 lambda_body_vector
 171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 172                                 lambda_body_vector vect,
 173                                 struct obstack * lambda_obstack)
 174 {
 175   lambda_body_vector temp;
 176   int depth;
 177
 178   /* Make sure the matrix is square.  */
 179   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 180
 181   depth = LTM_ROWSIZE (transform);
 182
 183   temp = lambda_body_vector_new (depth, lambda_obstack);
 184   LBV_DENOMINATOR (temp) =
 185     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 186   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 187                              LTM_MATRIX (transform), depth,
 188                              LBV_COEFFICIENTS (temp));
 189   LBV_SIZE (temp) = LBV_SIZE (vect);
 190   return temp;
 191 }
 192
 193 /* Print out a lambda body vector.  */
 194
 195 void
 196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 197 {
 198   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 199 }
 200
 201 /* Return TRUE if two linear expressions are equal.  */
 202
 203 static bool
 204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 205            int depth, int invariants)
 206 {
 207   int i;
 208
 209   if (lle1 == NULL || lle2 == NULL)
 210     return false;
 211   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 212     return false;
 213   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 214     return false;
 215   for (i = 0; i < depth; i++)
 216     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 217       return false;
 218   for (i = 0; i < invariants; i++)
 219     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 220         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 221       return false;
 222   return true;
 223 }
 224
 225 /* Create a new linear expression with dimension DIM, and total number
 226    of invariants INVARIANTS.  */
 227
 228 lambda_linear_expression
 229 lambda_linear_expression_new (int dim, int invariants,
 230                               struct obstack * lambda_obstack)
 231 {
 232   lambda_linear_expression ret;
 233
 234   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 235                                                  sizeof (*ret));
 236   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 237   LLE_CONSTANT (ret) = 0;
 238   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 239   LLE_DENOMINATOR (ret) = 1;
 240   LLE_NEXT (ret) = NULL;
 241
 242   return ret;
 243 }
 244
 245 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 246    The starting letter used for variable names is START.  */
 247
 248 static void
 249 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 250                          char start)
 251 {
 252   int i;
 253   bool first = true;
 254   for (i = 0; i < size; i++)
 255     {
 256       if (expr[i] != 0)
 257         {
 258           if (first)
 259             {
 260               if (expr[i] < 0)
 261                 fprintf (outfile, "-");
 262               first = false;
 263             }
 264           else if (expr[i] > 0)
 265             fprintf (outfile, " + ");
 266           else
 267             fprintf (outfile, " - ");
 268           if (abs (expr[i]) == 1)
 269             fprintf (outfile, "%c", start + i);
 270           else
 271             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 272         }
 273     }
 274 }
 275
 276 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 277    depth/number of coefficients is given by DEPTH, the number of invariants is
 278    given by INVARIANTS, and the character to start variable names with is given
 279    by START.  */
 280
 281 void
 282 print_lambda_linear_expression (FILE * outfile,
 283                                 lambda_linear_expression expr,
 284                                 int depth, int invariants, char start)
 285 {
 286   fprintf (outfile, "\tLinear expression: ");
 287   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 288   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 289   fprintf (outfile, "  invariants: ");
 290   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 291                            invariants, 'A');
 292   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 293 }
 294
 295 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 296    coefficients is given by DEPTH, the number of invariants is
 297    given by INVARIANTS, and the character to start variable names with is given
 298    by START.  */
 299
 300 void
 301 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 302                    int invariants, char start)
 303 {
 304   int step;
 305   lambda_linear_expression expr;
 306
 307   gcc_assert (loop);
 308
 309   expr = LL_LINEAR_OFFSET (loop);
 310   step = LL_STEP (loop);
 311   fprintf (outfile, "  step size = %d \n", step);
 312
 313   if (expr)
 314     {
 315       fprintf (outfile, "  linear offset: \n");
 316       print_lambda_linear_expression (outfile, expr, depth, invariants,
 317                                       start);
 318     }
 319
 320   fprintf (outfile, "  lower bound: \n");
 321   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 322     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 323   fprintf (outfile, "  upper bound: \n");
 324   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 325     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 326 }
 327
 328 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 329    number of invariants.  */
 330
 331 lambda_loopnest
 332 lambda_loopnest_new (int depth, int invariants,
 333                      struct obstack * lambda_obstack)
 334 {
 335   lambda_loopnest ret;
 336   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 337
 338   LN_LOOPS (ret) = (lambda_loop *)
 339       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 340   LN_DEPTH (ret) = depth;
 341   LN_INVARIANTS (ret) = invariants;
 342
 343   return ret;
 344 }
 345
 346 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 347    character to use for loop names is given by START.  */
 348
 349 void
 350 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 351 {
 352   int i;
 353   for (i = 0; i < LN_DEPTH (nest); i++)
 354     {
 355       fprintf (outfile, "Loop %c\n", start + i);
 356       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 357                          LN_INVARIANTS (nest), 'i');
 358       fprintf (outfile, "\n");
 359     }
 360 }
 361
 362 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 363    of invariants.  */
 364
 365 static lambda_lattice
 366 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 367 {
 368   lambda_lattice ret
 369       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 370   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 371   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 372   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 373   LATTICE_DIMENSION (ret) = depth;
 374   LATTICE_INVARIANTS (ret) = invariants;
 375   return ret;
 376 }
 377
 378 /* Compute the lattice base for NEST.  The lattice base is essentially a
 379    non-singular transform from a dense base space to a sparse iteration space.
 380    We use it so that we don't have to specially handle the case of a sparse
 381    iteration space in other parts of the algorithm.  As a result, this routine
 382    only does something interesting (IE produce a matrix that isn't the
 383    identity matrix) if NEST is a sparse space.  */
 384
 385 static lambda_lattice
 386 lambda_lattice_compute_base (lambda_loopnest nest,
 387                              struct obstack * lambda_obstack)
 388 {
 389   lambda_lattice ret;
 390   int depth, invariants;
 391   lambda_matrix base;
 392
 393   int i, j, step;
 394   lambda_loop loop;
 395   lambda_linear_expression expression;
 396
 397   depth = LN_DEPTH (nest);
 398   invariants = LN_INVARIANTS (nest);
 399
 400   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 401   base = LATTICE_BASE (ret);
 402   for (i = 0; i < depth; i++)
 403     {
 404       loop = LN_LOOPS (nest)[i];
 405       gcc_assert (loop);
 406       step = LL_STEP (loop);
 407       /* If we have a step of 1, then the base is one, and the
 408          origin and invariant coefficients are 0.  */
 409       if (step == 1)
 410         {
 411           for (j = 0; j < depth; j++)
 412             base[i][j] = 0;
 413           base[i][i] = 1;
 414           LATTICE_ORIGIN (ret)[i] = 0;
 415           for (j = 0; j < invariants; j++)
 416             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 417         }
 418       else
 419         {
 420           /* Otherwise, we need the lower bound expression (which must
 421              be an affine function)  to determine the base.  */
 422           expression = LL_LOWER_BOUND (loop);
 423           gcc_assert (expression && !LLE_NEXT (expression)
 424                       && LLE_DENOMINATOR (expression) == 1);
 425
 426           /* The lower triangular portion of the base is going to be the
 427              coefficient times the step */
 428           for (j = 0; j < i; j++)
 429             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 430               * LL_STEP (LN_LOOPS (nest)[j]);
 431           base[i][i] = step;
 432           for (j = i + 1; j < depth; j++)
 433             base[i][j] = 0;
 434
 435           /* Origin for this loop is the constant of the lower bound
 436              expression.  */
 437           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 438
 439           /* Coefficient for the invariants are equal to the invariant
 440              coefficients in the expression.  */
 441           for (j = 0; j < invariants; j++)
 442             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 443               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 444         }
 445     }
 446   return ret;
 447 }
 448
 449 /* Compute the least common multiple of two numbers A and B .  */
 450
 451 int
 452 least_common_multiple (int a, int b)
 453 {
 454   return (abs (a) * abs (b) / gcd (a, b));
 455 }
 456
 457 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 458    auxiliary nest.
 459    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 460    it is easy to calculate the answer and bounds.
 461    A sketch of how it works:
 462    Given a system of linear inequalities, ai * xj >= bk, you can always
 463    rewrite the constraints so they are all of the form
 464    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 465    in b1 ... bk, and some a in a1...ai)
 466    You can then eliminate this x from the non-constant inequalities by
 467    rewriting these as a <= b, x >= constant, and delete the x variable.
 468    You can then repeat this for any remaining x variables, and then we have
 469    an easy to use variable <= constant (or no variables at all) form that we
 470    can construct our bounds from.
 471
 472    In our case, each time we eliminate, we construct part of the bound from
 473    the ith variable, then delete the ith variable.
 474
 475    Remember the constant are in our vector a, our coefficient matrix is A,
 476    and our invariant coefficient matrix is B.
 477
 478    SIZE is the size of the matrices being passed.
 479    DEPTH is the loop nest depth.
 480    INVARIANTS is the number of loop invariants.
 481    A, B, and a are the coefficient matrix, invariant coefficient, and a
 482    vector of constants, respectively.  */
 483
 484 static lambda_loopnest
 485 compute_nest_using_fourier_motzkin (int size,
 486                                     int depth,
 487                                     int invariants,
 488                                     lambda_matrix A,
 489                                     lambda_matrix B,
 490                                     lambda_vector a,
 491                                     struct obstack * lambda_obstack)
 492 {
 493
 494   int multiple, f1, f2;
 495   int i, j, k;
 496   lambda_linear_expression expression;
 497   lambda_loop loop;
 498   lambda_loopnest auxillary_nest;
 499   lambda_matrix swapmatrix, A1, B1;
 500   lambda_vector swapvector, a1;
 501   int newsize;
 502
 503   A1 = lambda_matrix_new (128, depth);
 504   B1 = lambda_matrix_new (128, invariants);
 505   a1 = lambda_vector_new (128);
 506
 507   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 508
 509   for (i = depth - 1; i >= 0; i--)
 510     {
 511       loop = lambda_loop_new ();
 512       LN_LOOPS (auxillary_nest)[i] = loop;
 513       LL_STEP (loop) = 1;
 514
 515       for (j = 0; j < size; j++)
 516         {
 517           if (A[j][i] < 0)
 518             {
 519               /* Any linear expression in the matrix with a coefficient less
 520                  than 0 becomes part of the new lower bound.  */
 521               expression = lambda_linear_expression_new (depth, invariants,
 522                                                          lambda_obstack);
 523
 524               for (k = 0; k < i; k++)
 525                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 526
 527               for (k = 0; k < invariants; k++)
 528                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 529
 530               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 531               LLE_CONSTANT (expression) = -1 * a[j];
 532
 533               /* Ignore if identical to the existing lower bound.  */
 534               if (!lle_equal (LL_LOWER_BOUND (loop),
 535                               expression, depth, invariants))
 536                 {
 537                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 538                   LL_LOWER_BOUND (loop) = expression;
 539                 }
 540
 541             }
 542           else if (A[j][i] > 0)
 543             {
 544               /* Any linear expression with a coefficient greater than 0
 545                  becomes part of the new upper bound.  */
 546               expression = lambda_linear_expression_new (depth, invariants,
 547                                                          lambda_obstack);
 548               for (k = 0; k < i; k++)
 549                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 550
 551               for (k = 0; k < invariants; k++)
 552                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 553
 554               LLE_DENOMINATOR (expression) = A[j][i];
 555               LLE_CONSTANT (expression) = a[j];
 556
 557               /* Ignore if identical to the existing upper bound.  */
 558               if (!lle_equal (LL_UPPER_BOUND (loop),
 559                               expression, depth, invariants))
 560                 {
 561                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 562                   LL_UPPER_BOUND (loop) = expression;
 563                 }
 564
 565             }
 566         }
 567
 568       /* This portion creates a new system of linear inequalities by deleting
 569          the i'th variable, reducing the system by one variable.  */
 570       newsize = 0;
 571       for (j = 0; j < size; j++)
 572         {
 573           /* If the coefficient for the i'th variable is 0, then we can just
 574              eliminate the variable straightaway.  Otherwise, we have to
 575              multiply through by the coefficients we are eliminating.  */
 576           if (A[j][i] == 0)
 577             {
 578               lambda_vector_copy (A[j], A1[newsize], depth);
 579               lambda_vector_copy (B[j], B1[newsize], invariants);
 580               a1[newsize] = a[j];
 581               newsize++;
 582             }
 583           else if (A[j][i] > 0)
 584             {
 585               for (k = 0; k < size; k++)
 586                 {
 587                   if (A[k][i] < 0)
 588                     {
 589                       multiple = least_common_multiple (A[j][i], A[k][i]);
 590                       f1 = multiple / A[j][i];
 591                       f2 = -1 * multiple / A[k][i];
 592
 593                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 594                                             A1[newsize], depth);
 595                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 596                                             B1[newsize], invariants);
 597                       a1[newsize] = f1 * a[j] + f2 * a[k];
 598                       newsize++;
 599                     }
 600                 }
 601             }
 602         }
 603
 604       swapmatrix = A;
 605       A = A1;
 606       A1 = swapmatrix;
 607
 608       swapmatrix = B;
 609       B = B1;
 610       B1 = swapmatrix;
 611
 612       swapvector = a;
 613       a = a1;
 614       a1 = swapvector;
 615
 616       size = newsize;
 617     }
 618
 619   return auxillary_nest;
 620 }
 621
 622 /* Compute the loop bounds for the auxiliary space NEST.
 623    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 624    Given the original nest, this function will
 625    1. Convert the nest into matrix form, which consists of a matrix for the
 626    coefficients, a matrix for the
 627    invariant coefficients, and a vector for the constants.
 628    2. Use the matrix form to calculate the lattice base for the nest (which is
 629    a dense space)
 630    3. Compose the dense space transform with the user specified transform, to
 631    get a transform we can easily calculate transformed bounds for.
 632    4. Multiply the composed transformation matrix times the matrix form of the
 633    loop.
 634    5. Transform the newly created matrix (from step 4) back into a loop nest
 635    using Fourier-Motzkin elimination to figure out the bounds.  */
 636
 637 static lambda_loopnest
 638 lambda_compute_auxillary_space (lambda_loopnest nest,
 639                                 lambda_trans_matrix trans,
 640                                 struct obstack * lambda_obstack)
 641 {
 642   lambda_matrix A, B, A1, B1;
 643   lambda_vector a, a1;
 644   lambda_matrix invertedtrans;
 645   int depth, invariants, size;
 646   int i, j;
 647   lambda_loop loop;
 648   lambda_linear_expression expression;
 649   lambda_lattice lattice;
 650
 651   depth = LN_DEPTH (nest);
 652   invariants = LN_INVARIANTS (nest);
 653
 654   /* Unfortunately, we can't know the number of constraints we'll have
 655      ahead of time, but this should be enough even in ridiculous loop nest
 656      cases. We must not go over this limit.  */
 657   A = lambda_matrix_new (128, depth);
 658   B = lambda_matrix_new (128, invariants);
 659   a = lambda_vector_new (128);
 660
 661   A1 = lambda_matrix_new (128, depth);
 662   B1 = lambda_matrix_new (128, invariants);
 663   a1 = lambda_vector_new (128);
 664
 665   /* Store the bounds in the equation matrix A, constant vector a, and
 666      invariant matrix B, so that we have Ax <= a + B.
 667      This requires a little equation rearranging so that everything is on the
 668      correct side of the inequality.  */
 669   size = 0;
 670   for (i = 0; i < depth; i++)
 671     {
 672       loop = LN_LOOPS (nest)[i];
 673
 674       /* First we do the lower bound.  */
 675       if (LL_STEP (loop) > 0)
 676         expression = LL_LOWER_BOUND (loop);
 677       else
 678         expression = LL_UPPER_BOUND (loop);
 679
 680       for (; expression != NULL; expression = LLE_NEXT (expression))
 681         {
 682           /* Fill in the coefficient.  */
 683           for (j = 0; j < i; j++)
 684             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 685
 686           /* And the invariant coefficient.  */
 687           for (j = 0; j < invariants; j++)
 688             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 689
 690           /* And the constant.  */
 691           a[size] = LLE_CONSTANT (expression);
 692
 693           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 694              constants and single variables on   */
 695           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 696           a[size] *= -1;
 697           for (j = 0; j < invariants; j++)
 698             B[size][j] *= -1;
 699
 700           size++;
 701           /* Need to increase matrix sizes above.  */
 702           gcc_assert (size <= 127);
 703
 704         }
 705
 706       /* Then do the exact same thing for the upper bounds.  */
 707       if (LL_STEP (loop) > 0)
 708         expression = LL_UPPER_BOUND (loop);
 709       else
 710         expression = LL_LOWER_BOUND (loop);
 711
 712       for (; expression != NULL; expression = LLE_NEXT (expression))
 713         {
 714           /* Fill in the coefficient.  */
 715           for (j = 0; j < i; j++)
 716             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 717
 718           /* And the invariant coefficient.  */
 719           for (j = 0; j < invariants; j++)
 720             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 721
 722           /* And the constant.  */
 723           a[size] = LLE_CONSTANT (expression);
 724
 725           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 726           for (j = 0; j < i; j++)
 727             A[size][j] *= -1;
 728           A[size][i] = LLE_DENOMINATOR (expression);
 729           size++;
 730           /* Need to increase matrix sizes above.  */
 731           gcc_assert (size <= 127);
 732
 733         }
 734     }
 735
 736   /* Compute the lattice base x = base * y + origin, where y is the
 737      base space.  */
 738   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 739
 740   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 741
 742   /* A1 = A * L */
 743   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 744
 745   /* a1 = a - A * origin constant.  */
 746   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 747   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 748
 749   /* B1 = B - A * origin invariant.  */
 750   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 751                       invariants);
 752   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 753
 754   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 755      it by A1, then performing Fourier-Motzkin.  */
 756
 757   invertedtrans = lambda_matrix_new (depth, depth);
 758
 759   /* Compute the inverse of U.  */
 760   lambda_matrix_inverse (LTM_MATRIX (trans),
 761                          invertedtrans, depth);
 762
 763   /* A = A1 inv(U).  */
 764   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 765
 766   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 767                                              A, B1, a1, lambda_obstack);
 768 }
 769
 770 /* Compute the loop bounds for the target space, using the bounds of
 771    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 772    The target space loop bounds are computed by multiplying the triangular
 773    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 774    the loop steps (positive or negative) is then used to swap the bounds if
 775    the loop counts downwards.
 776    Return the target loopnest.  */
 777
 778 static lambda_loopnest
 779 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 780                              lambda_trans_matrix H, lambda_vector stepsigns,
 781                              struct obstack * lambda_obstack)
 782 {
 783   lambda_matrix inverse, H1;
 784   int determinant, i, j;
 785   int gcd1, gcd2;
 786   int factor;
 787
 788   lambda_loopnest target_nest;
 789   int depth, invariants;
 790   lambda_matrix target;
 791
 792   lambda_loop auxillary_loop, target_loop;
 793   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 794
 795   depth = LN_DEPTH (auxillary_nest);
 796   invariants = LN_INVARIANTS (auxillary_nest);
 797
 798   inverse = lambda_matrix_new (depth, depth);
 799   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 800
 801   /* H1 is H excluding its diagonal.  */
 802   H1 = lambda_matrix_new (depth, depth);
 803   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 804
 805   for (i = 0; i < depth; i++)
 806     H1[i][i] = 0;
 807
 808   /* Computes the linear offsets of the loop bounds.  */
 809   target = lambda_matrix_new (depth, depth);
 810   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 811
 812   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 813
 814   for (i = 0; i < depth; i++)
 815     {
 816
 817       /* Get a new loop structure.  */
 818       target_loop = lambda_loop_new ();
 819       LN_LOOPS (target_nest)[i] = target_loop;
 820
 821       /* Computes the gcd of the coefficients of the linear part.  */
 822       gcd1 = lambda_vector_gcd (target[i], i);
 823
 824       /* Include the denominator in the GCD.  */
 825       gcd1 = gcd (gcd1, determinant);
 826
 827       /* Now divide through by the gcd.  */
 828       for (j = 0; j < i; j++)
 829         target[i][j] = target[i][j] / gcd1;
 830
 831       expression = lambda_linear_expression_new (depth, invariants,
 832                                                  lambda_obstack);
 833       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 834       LLE_DENOMINATOR (expression) = determinant / gcd1;
 835       LLE_CONSTANT (expression) = 0;
 836       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 837                            invariants);
 838       LL_LINEAR_OFFSET (target_loop) = expression;
 839     }
 840
 841   /* For each loop, compute the new bounds from H.  */
 842   for (i = 0; i < depth; i++)
 843     {
 844       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 845       target_loop = LN_LOOPS (target_nest)[i];
 846       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 847       factor = LTM_MATRIX (H)[i][i];
 848
 849       /* First we do the lower bound.  */
 850       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 851
 852       for (; auxillary_expr != NULL;
 853            auxillary_expr = LLE_NEXT (auxillary_expr))
 854         {
 855           target_expr = lambda_linear_expression_new (depth, invariants,
 856                                                       lambda_obstack);
 857           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 858                                      depth, inverse, depth,
 859                                      LLE_COEFFICIENTS (target_expr));
 860           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 861                                     LLE_COEFFICIENTS (target_expr), depth,
 862                                     factor);
 863
 864           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 865           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 866                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 867                               invariants);
 868           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 870                                     invariants, factor);
 871           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 872
 873           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 874             {
 875               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 876                 * determinant;
 877               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 878                                         (target_expr),
 879                                         LLE_INVARIANT_COEFFICIENTS
 880                                         (target_expr), invariants,
 881                                         determinant);
 882               LLE_DENOMINATOR (target_expr) =
 883                 LLE_DENOMINATOR (target_expr) * determinant;
 884             }
 885           /* Find the gcd and divide by it here, rather than doing it
 886              at the tree level.  */
 887           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 888           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 889                                     invariants);
 890           gcd1 = gcd (gcd1, gcd2);
 891           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 892           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 893           for (j = 0; j < depth; j++)
 894             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 895           for (j = 0; j < invariants; j++)
 896             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 897           LLE_CONSTANT (target_expr) /= gcd1;
 898           LLE_DENOMINATOR (target_expr) /= gcd1;
 899           /* Ignore if identical to existing bound.  */
 900           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 901                           invariants))
 902             {
 903               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 904               LL_LOWER_BOUND (target_loop) = target_expr;
 905             }
 906         }
 907       /* Now do the upper bound.  */
 908       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 909
 910       for (; auxillary_expr != NULL;
 911            auxillary_expr = LLE_NEXT (auxillary_expr))
 912         {
 913           target_expr = lambda_linear_expression_new (depth, invariants,
 914                                                       lambda_obstack);
 915           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 916                                      depth, inverse, depth,
 917                                      LLE_COEFFICIENTS (target_expr));
 918           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 919                                     LLE_COEFFICIENTS (target_expr), depth,
 920                                     factor);
 921           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 922           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 923                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 924                               invariants);
 925           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 926                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 927                                     invariants, factor);
 928           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 929
 930           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 931             {
 932               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 933                 * determinant;
 934               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 935                                         (target_expr),
 936                                         LLE_INVARIANT_COEFFICIENTS
 937                                         (target_expr), invariants,
 938                                         determinant);
 939               LLE_DENOMINATOR (target_expr) =
 940                 LLE_DENOMINATOR (target_expr) * determinant;
 941             }
 942           /* Find the gcd and divide by it here, instead of at the
 943              tree level.  */
 944           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 945           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 946                                     invariants);
 947           gcd1 = gcd (gcd1, gcd2);
 948           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 949           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 950           for (j = 0; j < depth; j++)
 951             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 952           for (j = 0; j < invariants; j++)
 953             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 954           LLE_CONSTANT (target_expr) /= gcd1;
 955           LLE_DENOMINATOR (target_expr) /= gcd1;
 956           /* Ignore if equal to existing bound.  */
 957           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 958                           invariants))
 959             {
 960               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 961               LL_UPPER_BOUND (target_loop) = target_expr;
 962             }
 963         }
 964     }
 965   for (i = 0; i < depth; i++)
 966     {
 967       target_loop = LN_LOOPS (target_nest)[i];
 968       /* If necessary, exchange the upper and lower bounds and negate
 969          the step size.  */
 970       if (stepsigns[i] < 0)
 971         {
 972           LL_STEP (target_loop) *= -1;
 973           tmp_expr = LL_LOWER_BOUND (target_loop);
 974           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 975           LL_UPPER_BOUND (target_loop) = tmp_expr;
 976         }
 977     }
 978   return target_nest;
 979 }
 980
 981 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 982    result.  */
 983
 984 static lambda_vector
 985 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 986 {
 987   lambda_matrix matrix, H;
 988   int size;
 989   lambda_vector newsteps;
 990   int i, j, factor, minimum_column;
 991   int temp;
 992
 993   matrix = LTM_MATRIX (trans);
 994   size = LTM_ROWSIZE (trans);
 995   H = lambda_matrix_new (size, size);
 996
 997   newsteps = lambda_vector_new (size);
 998   lambda_vector_copy (stepsigns, newsteps, size);
 999
1000   lambda_matrix_copy (matrix, H, size, size);
1001
1002   for (j = 0; j < size; j++)
1003     {
1004       lambda_vector row;
1005       row = H[j];
1006       for (i = j; i < size; i++)
1007         if (row[i] < 0)
1008           lambda_matrix_col_negate (H, size, i);
1009       while (lambda_vector_first_nz (row, size, j + 1) < size)
1010         {
1011           minimum_column = lambda_vector_min_nz (row, size, j);
1012           lambda_matrix_col_exchange (H, size, j, minimum_column);
1013
1014           temp = newsteps[j];
1015           newsteps[j] = newsteps[minimum_column];
1016           newsteps[minimum_column] = temp;
1017
1018           for (i = j + 1; i < size; i++)
1019             {
1020               factor = row[i] / row[j];
1021               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1022             }
1023         }
1024     }
1025   return newsteps;
1026 }
1027
1028 /* Transform NEST according to TRANS, and return the new loopnest.
1029    This involves
1030    1. Computing a lattice base for the transformation
1031    2. Composing the dense base with the specified transformation (TRANS)
1032    3. Decomposing the combined transformation into a lower triangular portion,
1033    and a unimodular portion.
1034    4. Computing the auxiliary nest using the unimodular portion.
1035    5. Computing the target nest using the auxiliary nest and the lower
1036    triangular portion.  */
1037
1038 lambda_loopnest
1039 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1040                            struct obstack * lambda_obstack)
1041 {
1042   lambda_loopnest auxillary_nest, target_nest;
1043
1044   int depth, invariants;
1045   int i, j;
1046   lambda_lattice lattice;
1047   lambda_trans_matrix trans1, H, U;
1048   lambda_loop loop;
1049   lambda_linear_expression expression;
1050   lambda_vector origin;
1051   lambda_matrix origin_invariants;
1052   lambda_vector stepsigns;
1053   int f;
1054
1055   depth = LN_DEPTH (nest);
1056   invariants = LN_INVARIANTS (nest);
1057
1058   /* Keep track of the signs of the loop steps.  */
1059   stepsigns = lambda_vector_new (depth);
1060   for (i = 0; i < depth; i++)
1061     {
1062       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1063         stepsigns[i] = 1;
1064       else
1065         stepsigns[i] = -1;
1066     }
1067
1068   /* Compute the lattice base.  */
1069   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1070   trans1 = lambda_trans_matrix_new (depth, depth);
1071
1072   /* Multiply the transformation matrix by the lattice base.  */
1073
1074   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1075                       LTM_MATRIX (trans1), depth, depth, depth);
1076
1077   /* Compute the Hermite normal form for the new transformation matrix.  */
1078   H = lambda_trans_matrix_new (depth, depth);
1079   U = lambda_trans_matrix_new (depth, depth);
1080   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1081                          LTM_MATRIX (U));
1082
1083   /* Compute the auxiliary loop nest's space from the unimodular
1084      portion.  */
1085   auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack);
1086
1087   /* Compute the loop step signs from the old step signs and the
1088      transformation matrix.  */
1089   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1090
1091   /* Compute the target loop nest space from the auxiliary nest and
1092      the lower triangular matrix H.  */
1093   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1094                                              lambda_obstack);
1095   origin = lambda_vector_new (depth);
1096   origin_invariants = lambda_matrix_new (depth, invariants);
1097   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1098                              LATTICE_ORIGIN (lattice), origin);
1099   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1100                       origin_invariants, depth, depth, invariants);
1101
1102   for (i = 0; i < depth; i++)
1103     {
1104       loop = LN_LOOPS (target_nest)[i];
1105       expression = LL_LINEAR_OFFSET (loop);
1106       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1107         f = 1;
1108       else
1109         f = LLE_DENOMINATOR (expression);
1110
1111       LLE_CONSTANT (expression) += f * origin[i];
1112
1113       for (j = 0; j < invariants; j++)
1114         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1115           f * origin_invariants[i][j];
1116     }
1117
1118   return target_nest;
1119
1120 }
1121
1122 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1123    return the new expression.  DEPTH is the depth of the loopnest.
1124    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1125    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1126    is the amount we have to add/subtract from the expression because of the
1127    type of comparison it is used in.  */
1128
1129 static lambda_linear_expression
1130 gcc_tree_to_linear_expression (int depth, tree expr,
1131                                VEC(tree,heap) *outerinductionvars,
1132                                VEC(tree,heap) *invariants, int extra,
1133                                struct obstack * lambda_obstack)
1134 {
1135   lambda_linear_expression lle = NULL;
1136   switch (TREE_CODE (expr))
1137     {
1138     case INTEGER_CST:
1139       {
1140         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1141         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1142         if (extra != 0)
1143           LLE_CONSTANT (lle) += extra;
1144
1145         LLE_DENOMINATOR (lle) = 1;
1146       }
1147       break;
1148     case SSA_NAME:
1149       {
1150         tree iv, invar;
1151         size_t i;
1152         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1153           if (iv != NULL)
1154             {
1155               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1156                 {
1157                   lle = lambda_linear_expression_new (depth, 2 * depth,
1158                                                       lambda_obstack);
1159                   LLE_COEFFICIENTS (lle)[i] = 1;
1160                   if (extra != 0)
1161                     LLE_CONSTANT (lle) = extra;
1162
1163                   LLE_DENOMINATOR (lle) = 1;
1164                 }
1165             }
1166         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1167           if (invar != NULL)
1168             {
1169               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1170                 {
1171                   lle = lambda_linear_expression_new (depth, 2 * depth,
1172                                                       lambda_obstack);
1173                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1174                   if (extra != 0)
1175                     LLE_CONSTANT (lle) = extra;
1176                   LLE_DENOMINATOR (lle) = 1;
1177                 }
1178             }
1179       }
1180       break;
1181     default:
1182       return NULL;
1183     }
1184
1185   return lle;
1186 }
1187
1188 /* Return the depth of the loopnest NEST */
1189
1190 static int
1191 depth_of_nest (struct loop *nest)
1192 {
1193   size_t depth = 0;
1194   while (nest)
1195     {
1196       depth++;
1197       nest = nest->inner;
1198     }
1199   return depth;
1200 }
1201
1202
1203 /* Return true if OP is invariant in LOOP and all outer loops.  */
1204
1205 static bool
1206 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1207 {
1208   if (is_gimple_min_invariant (op))
1209     return true;
1210   if (loop_depth (loop) == 0)
1211     return true;
1212   if (!expr_invariant_in_loop_p (loop, op))
1213     return false;
1214   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1215     return false;
1216   return true;
1217 }
1218
1219 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1220    or NULL if it could not be converted.
1221    DEPTH is the depth of the loop.
1222    INVARIANTS is a pointer to the array of loop invariants.
1223    The induction variable for this loop should be stored in the parameter
1224    OURINDUCTIONVAR.
1225    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1226
1227 static lambda_loop
1228 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1229                          VEC(tree,heap) ** invariants,
1230                          tree * ourinductionvar,
1231                          VEC(tree,heap) * outerinductionvars,
1232                          VEC(tree,heap) ** lboundvars,
1233                          VEC(tree,heap) ** uboundvars,
1234                          VEC(int,heap) ** steps,
1235                          struct obstack * lambda_obstack)
1236 {
1237   tree phi;
1238   tree exit_cond;
1239   tree access_fn, inductionvar;
1240   tree step;
1241   lambda_loop lloop = NULL;
1242   lambda_linear_expression lbound, ubound;
1243   tree test;
1244   int stepint;
1245   int extra = 0;
1246   tree lboundvar, uboundvar, uboundresult;
1247
1248   /* Find out induction var and exit condition.  */
1249   inductionvar = find_induction_var_from_exit_cond (loop);
1250   exit_cond = get_loop_exit_condition (loop);
1251
1252   if (inductionvar == NULL || exit_cond == NULL)
1253     {
1254       if (dump_file && (dump_flags & TDF_DETAILS))
1255         fprintf (dump_file,
1256                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1257       return NULL;
1258     }
1259
1260   test = TREE_OPERAND (exit_cond, 0);
1261
1262   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1263     {
1264
1265       if (dump_file && (dump_flags & TDF_DETAILS))
1266         fprintf (dump_file,
1267                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1268
1269       return NULL;
1270     }
1271
1272   phi = SSA_NAME_DEF_STMT (inductionvar);
1273   if (TREE_CODE (phi) != PHI_NODE)
1274     {
1275       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1276       if (!phi)
1277         {
1278
1279           if (dump_file && (dump_flags & TDF_DETAILS))
1280             fprintf (dump_file,
1281                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1282
1283           return NULL;
1284         }
1285
1286       phi = SSA_NAME_DEF_STMT (phi);
1287       if (TREE_CODE (phi) != PHI_NODE)
1288         {
1289
1290           if (dump_file && (dump_flags & TDF_DETAILS))
1291             fprintf (dump_file,
1292                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1293           return NULL;
1294         }
1295
1296     }
1297
1298   /* The induction variable name/version we want to put in the array is the
1299      result of the induction variable phi node.  */
1300   *ourinductionvar = PHI_RESULT (phi);
1301   access_fn = instantiate_parameters
1302     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1303   if (access_fn == chrec_dont_know)
1304     {
1305       if (dump_file && (dump_flags & TDF_DETAILS))
1306         fprintf (dump_file,
1307                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1308
1309       return NULL;
1310     }
1311
1312   step = evolution_part_in_loop_num (access_fn, loop->num);
1313   if (!step || step == chrec_dont_know)
1314     {
1315       if (dump_file && (dump_flags & TDF_DETAILS))
1316         fprintf (dump_file,
1317                  "Unable to convert loop: Cannot determine step of loop.\n");
1318
1319       return NULL;
1320     }
1321   if (TREE_CODE (step) != INTEGER_CST)
1322     {
1323
1324       if (dump_file && (dump_flags & TDF_DETAILS))
1325         fprintf (dump_file,
1326                  "Unable to convert loop: Step of loop is not integer.\n");
1327       return NULL;
1328     }
1329
1330   stepint = TREE_INT_CST_LOW (step);
1331
1332   /* Only want phis for induction vars, which will have two
1333      arguments.  */
1334   if (PHI_NUM_ARGS (phi) != 2)
1335     {
1336       if (dump_file && (dump_flags & TDF_DETAILS))
1337         fprintf (dump_file,
1338                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1339       return NULL;
1340     }
1341
1342   /* Another induction variable check. One argument's source should be
1343      in the loop, one outside the loop.  */
1344   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1345       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1346     {
1347
1348       if (dump_file && (dump_flags & TDF_DETAILS))
1349         fprintf (dump_file,
1350                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1351
1352       return NULL;
1353     }
1354
1355   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1356     {
1357       lboundvar = PHI_ARG_DEF (phi, 1);
1358       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1359                                               outerinductionvars, *invariants,
1360                                               0, lambda_obstack);
1361     }
1362   else
1363     {
1364       lboundvar = PHI_ARG_DEF (phi, 0);
1365       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1366                                               outerinductionvars, *invariants,
1367                                               0, lambda_obstack);
1368     }
1369
1370   if (!lbound)
1371     {
1372
1373       if (dump_file && (dump_flags & TDF_DETAILS))
1374         fprintf (dump_file,
1375                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1376
1377       return NULL;
1378     }
1379   /* One part of the test may be a loop invariant tree.  */
1380   VEC_reserve (tree, heap, *invariants, 1);
1381   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1382       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1383     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1384   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1385            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1386     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1387
1388   /* The non-induction variable part of the test is the upper bound variable.
1389    */
1390   if (TREE_OPERAND (test, 0) == inductionvar)
1391     uboundvar = TREE_OPERAND (test, 1);
1392   else
1393     uboundvar = TREE_OPERAND (test, 0);
1394
1395
1396   /* We only size the vectors assuming we have, at max, 2 times as many
1397      invariants as we do loops (one for each bound).
1398      This is just an arbitrary number, but it has to be matched against the
1399      code below.  */
1400   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1401
1402
1403   /* We might have some leftover.  */
1404   if (TREE_CODE (test) == LT_EXPR)
1405     extra = -1 * stepint;
1406   else if (TREE_CODE (test) == NE_EXPR)
1407     extra = -1 * stepint;
1408   else if (TREE_CODE (test) == GT_EXPR)
1409     extra = -1 * stepint;
1410   else if (TREE_CODE (test) == EQ_EXPR)
1411     extra = 1 * stepint;
1412
1413   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1414                                           outerinductionvars,
1415                                           *invariants, extra, lambda_obstack);
1416   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1417                          build_int_cst (TREE_TYPE (uboundvar), extra));
1418   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1419   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1420   VEC_safe_push (int, heap, *steps, stepint);
1421   if (!ubound)
1422     {
1423       if (dump_file && (dump_flags & TDF_DETAILS))
1424         fprintf (dump_file,
1425                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1426       return NULL;
1427     }
1428
1429   lloop = lambda_loop_new ();
1430   LL_STEP (lloop) = stepint;
1431   LL_LOWER_BOUND (lloop) = lbound;
1432   LL_UPPER_BOUND (lloop) = ubound;
1433   return lloop;
1434 }
1435
1436 /* Given a LOOP, find the induction variable it is testing against in the exit
1437    condition.  Return the induction variable if found, NULL otherwise.  */
1438
1439 static tree
1440 find_induction_var_from_exit_cond (struct loop *loop)
1441 {
1442   tree expr = get_loop_exit_condition (loop);
1443   tree ivarop;
1444   tree test;
1445   if (expr == NULL_TREE)
1446     return NULL_TREE;
1447   if (TREE_CODE (expr) != COND_EXPR)
1448     return NULL_TREE;
1449   test = TREE_OPERAND (expr, 0);
1450   if (!COMPARISON_CLASS_P (test))
1451     return NULL_TREE;
1452
1453   /* Find the side that is invariant in this loop. The ivar must be the other
1454      side.  */
1455
1456   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1457       ivarop = TREE_OPERAND (test, 1);
1458   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1459       ivarop = TREE_OPERAND (test, 0);
1460   else
1461     return NULL_TREE;
1462
1463   if (TREE_CODE (ivarop) != SSA_NAME)
1464     return NULL_TREE;
1465   return ivarop;
1466 }
1467
1468 DEF_VEC_P(lambda_loop);
1469 DEF_VEC_ALLOC_P(lambda_loop,heap);
1470
1471 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1472    Return the new loop nest.
1473    INDUCTIONVARS is a pointer to an array of induction variables for the
1474    loopnest that will be filled in during this process.
1475    INVARIANTS is a pointer to an array of invariants that will be filled in
1476    during this process.  */
1477
1478 lambda_loopnest
1479 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1480                                  VEC(tree,heap) **inductionvars,
1481                                  VEC(tree,heap) **invariants,
1482                                  struct obstack * lambda_obstack)
1483 {
1484   lambda_loopnest ret = NULL;
1485   struct loop *temp = loop_nest;
1486   int depth = depth_of_nest (loop_nest);
1487   size_t i;
1488   VEC(lambda_loop,heap) *loops = NULL;
1489   VEC(tree,heap) *uboundvars = NULL;
1490   VEC(tree,heap) *lboundvars  = NULL;
1491   VEC(int,heap) *steps = NULL;
1492   lambda_loop newloop;
1493   tree inductionvar = NULL;
1494   bool perfect_nest = perfect_nest_p (loop_nest);
1495
1496   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1497     goto fail;
1498
1499   while (temp)
1500     {
1501       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1502                                          &inductionvar, *inductionvars,
1503                                          &lboundvars, &uboundvars,
1504                                          &steps, lambda_obstack);
1505       if (!newloop)
1506         goto fail;
1507
1508       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1509       VEC_safe_push (lambda_loop, heap, loops, newloop);
1510       temp = temp->inner;
1511     }
1512
1513   if (!perfect_nest)
1514     {
1515       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1516                             *inductionvars))
1517         {
1518           if (dump_file)
1519             fprintf (dump_file,
1520                      "Not a perfect loop nest and couldn't convert to one.\n");
1521           goto fail;
1522         }
1523       else if (dump_file)
1524         fprintf (dump_file,
1525                  "Successfully converted loop nest to perfect loop nest.\n");
1526     }
1527
1528   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1529
1530   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1531     LN_LOOPS (ret)[i] = newloop;
1532
1533  fail:
1534   VEC_free (lambda_loop, heap, loops);
1535   VEC_free (tree, heap, uboundvars);
1536   VEC_free (tree, heap, lboundvars);
1537   VEC_free (int, heap, steps);
1538
1539   return ret;
1540 }
1541
1542 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1543    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1544    inserted for us are stored.  INDUCTION_VARS is the array of induction
1545    variables for the loop this LBV is from.  TYPE is the tree type to use for
1546    the variables and trees involved.  */
1547
1548 static tree
1549 lbv_to_gcc_expression (lambda_body_vector lbv,
1550                        tree type, VEC(tree,heap) *induction_vars,
1551                        tree *stmts_to_insert)
1552 {
1553   int k;
1554   tree resvar;
1555   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1556
1557   k = LBV_DENOMINATOR (lbv);
1558   gcc_assert (k != 0);
1559   if (k != 1)
1560     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1561
1562   resvar = create_tmp_var (type, "lbvtmp");
1563   add_referenced_var (resvar);
1564   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1565 }
1566
1567 /* Convert a linear expression from coefficient and constant form to a
1568    gcc tree.
1569    Return the tree that represents the final value of the expression.
1570    LLE is the linear expression to convert.
1571    OFFSET is the linear offset to apply to the expression.
1572    TYPE is the tree type to use for the variables and math.
1573    INDUCTION_VARS is a vector of induction variables for the loops.
1574    INVARIANTS is a vector of the loop nest invariants.
1575    WRAP specifies what tree code to wrap the results in, if there is more than
1576    one (it is either MAX_EXPR, or MIN_EXPR).
1577    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1578    statements that need to be inserted for the linear expression.  */
1579
1580 static tree
1581 lle_to_gcc_expression (lambda_linear_expression lle,
1582                        lambda_linear_expression offset,
1583                        tree type,
1584                        VEC(tree,heap) *induction_vars,
1585                        VEC(tree,heap) *invariants,
1586                        enum tree_code wrap, tree *stmts_to_insert)
1587 {
1588   int k;
1589   tree resvar;
1590   tree expr = NULL_TREE;
1591   VEC(tree,heap) *results = NULL;
1592
1593   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1594
1595   /* Build up the linear expressions.  */
1596   for (; lle != NULL; lle = LLE_NEXT (lle))
1597     {
1598       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1599       expr = fold_build2 (PLUS_EXPR, type, expr,
1600                           build_linear_expr (type,
1601                                              LLE_INVARIANT_COEFFICIENTS (lle),
1602                                              invariants));
1603
1604       k = LLE_CONSTANT (lle);
1605       if (k)
1606         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1607
1608       k = LLE_CONSTANT (offset);
1609       if (k)
1610         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1611
1612       k = LLE_DENOMINATOR (lle);
1613       if (k != 1)
1614         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1615                             type, expr, build_int_cst (type, k));
1616
1617       expr = fold (expr);
1618       VEC_safe_push (tree, heap, results, expr);
1619     }
1620
1621   gcc_assert (expr);
1622
1623   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1624   if (VEC_length (tree, results) > 1)
1625     {
1626       size_t i;
1627       tree op;
1628
1629       expr = VEC_index (tree, results, 0);
1630       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1631         expr = fold_build2 (wrap, type, expr, op);
1632     }
1633
1634   VEC_free (tree, heap, results);
1635
1636   resvar = create_tmp_var (type, "lletmp");
1637   add_referenced_var (resvar);
1638   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1639 }
1640
1641 /* Remove the induction variable defined at IV_STMT.  */
1642
1643 void
1644 remove_iv (tree iv_stmt)
1645 {
1646   if (TREE_CODE (iv_stmt) == PHI_NODE)
1647     {
1648       int i;
1649
1650       for (i = 0; i < PHI_NUM_ARGS (iv_stmt); i++)
1651         {
1652           tree stmt;
1653           imm_use_iterator imm_iter;
1654           tree arg = PHI_ARG_DEF (iv_stmt, i);
1655           bool used = false;
1656
1657           if (TREE_CODE (arg) != SSA_NAME)
1658             continue;
1659
1660           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1661             if (stmt != iv_stmt)
1662               used = true;
1663
1664           if (!used)
1665             remove_iv (SSA_NAME_DEF_STMT (arg));
1666         }
1667
1668       remove_phi_node (iv_stmt, NULL_TREE, true);
1669     }
1670   else
1671     {
1672       block_stmt_iterator bsi = bsi_for_stmt (iv_stmt);
1673
1674       bsi_remove (&bsi, true);
1675       release_defs (iv_stmt);
1676     }
1677 }
1678
1679 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1680    it, back into gcc code.  This changes the
1681    loops, their induction variables, and their bodies, so that they
1682    match the transformed loopnest.
1683    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1684    loopnest.
1685    OLD_IVS is a vector of induction variables from the old loopnest.
1686    INVARIANTS is a vector of loop invariants from the old loopnest.
1687    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1688    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1689    NEW_LOOPNEST.  */
1690
1691 void
1692 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1693                                  VEC(tree,heap) *old_ivs,
1694                                  VEC(tree,heap) *invariants,
1695                                  VEC(tree,heap) **remove_ivs,
1696                                  lambda_loopnest new_loopnest,
1697                                  lambda_trans_matrix transform,
1698                                  struct obstack * lambda_obstack)
1699 {
1700   struct loop *temp;
1701   size_t i = 0;
1702   int j;
1703   size_t depth = 0;
1704   VEC(tree,heap) *new_ivs = NULL;
1705   tree oldiv;
1706   block_stmt_iterator bsi;
1707
1708   transform = lambda_trans_matrix_inverse (transform);
1709
1710   if (dump_file)
1711     {
1712       fprintf (dump_file, "Inverse of transformation matrix:\n");
1713       print_lambda_trans_matrix (dump_file, transform);
1714     }
1715   depth = depth_of_nest (old_loopnest);
1716   temp = old_loopnest;
1717
1718   while (temp)
1719     {
1720       lambda_loop newloop;
1721       basic_block bb;
1722       edge exit;
1723       tree ivvar, ivvarinced, exitcond, stmts;
1724       enum tree_code testtype;
1725       tree newupperbound, newlowerbound;
1726       lambda_linear_expression offset;
1727       tree type;
1728       bool insert_after;
1729       tree inc_stmt;
1730
1731       oldiv = VEC_index (tree, old_ivs, i);
1732       type = TREE_TYPE (oldiv);
1733
1734       /* First, build the new induction variable temporary  */
1735
1736       ivvar = create_tmp_var (type, "lnivtmp");
1737       add_referenced_var (ivvar);
1738
1739       VEC_safe_push (tree, heap, new_ivs, ivvar);
1740
1741       newloop = LN_LOOPS (new_loopnest)[i];
1742
1743       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1744          cases for now.  */
1745       offset = LL_LINEAR_OFFSET (newloop);
1746
1747       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1748                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1749
1750       /* Now build the  new lower bounds, and insert the statements
1751          necessary to generate it on the loop preheader.  */
1752       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1753                                              LL_LINEAR_OFFSET (newloop),
1754                                              type,
1755                                              new_ivs,
1756                                              invariants, MAX_EXPR, &stmts);
1757
1758       if (stmts)
1759         {
1760           bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1761           bsi_commit_edge_inserts ();
1762         }
1763       /* Build the new upper bound and insert its statements in the
1764          basic block of the exit condition */
1765       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1766                                              LL_LINEAR_OFFSET (newloop),
1767                                              type,
1768                                              new_ivs,
1769                                              invariants, MIN_EXPR, &stmts);
1770       exit = single_exit (temp);
1771       exitcond = get_loop_exit_condition (temp);
1772       bb = bb_for_stmt (exitcond);
1773       bsi = bsi_after_labels (bb);
1774       if (stmts)
1775         bsi_insert_before (&bsi, stmts, BSI_NEW_STMT);
1776
1777       /* Create the new iv.  */
1778
1779       standard_iv_increment_position (temp, &bsi, &insert_after);
1780       create_iv (newlowerbound,
1781                  build_int_cst (type, LL_STEP (newloop)),
1782                  ivvar, temp, &bsi, insert_after, &ivvar,
1783                  NULL);
1784
1785       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1786          dominate the block containing the exit condition.
1787          So we simply create our own incremented iv to use in the new exit
1788          test,  and let redundancy elimination sort it out.  */
1789       inc_stmt = build2 (PLUS_EXPR, type,
1790                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1791       inc_stmt = build_gimple_modify_stmt (SSA_NAME_VAR (ivvar), inc_stmt);
1792       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1793       GIMPLE_STMT_OPERAND (inc_stmt, 0) = ivvarinced;
1794       bsi = bsi_for_stmt (exitcond);
1795       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1796
1797       /* Replace the exit condition with the new upper bound
1798          comparison.  */
1799
1800       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1801
1802       /* We want to build a conditional where true means exit the loop, and
1803          false means continue the loop.
1804          So swap the testtype if this isn't the way things are.*/
1805
1806       if (exit->flags & EDGE_FALSE_VALUE)
1807         testtype = swap_tree_comparison (testtype);
1808
1809       COND_EXPR_COND (exitcond) = build2 (testtype,
1810                                           boolean_type_node,
1811                                           newupperbound, ivvarinced);
1812       update_stmt (exitcond);
1813       VEC_replace (tree, new_ivs, i, ivvar);
1814
1815       i++;
1816       temp = temp->inner;
1817     }
1818
1819   /* Rewrite uses of the old ivs so that they are now specified in terms of
1820      the new ivs.  */
1821
1822   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1823     {
1824       imm_use_iterator imm_iter;
1825       use_operand_p use_p;
1826       tree oldiv_def;
1827       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1828       tree stmt;
1829
1830       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1831         oldiv_def = PHI_RESULT (oldiv_stmt);
1832       else
1833         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1834       gcc_assert (oldiv_def != NULL_TREE);
1835
1836       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1837         {
1838           tree newiv, stmts;
1839           lambda_body_vector lbv, newlbv;
1840
1841           /* Compute the new expression for the induction
1842              variable.  */
1843           depth = VEC_length (tree, new_ivs);
1844           lbv = lambda_body_vector_new (depth, lambda_obstack);
1845           LBV_COEFFICIENTS (lbv)[i] = 1;
1846
1847           newlbv = lambda_body_vector_compute_new (transform, lbv,
1848                                                    lambda_obstack);
1849
1850           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1851                                          new_ivs, &stmts);
1852
1853           if (stmts && TREE_CODE (stmt) != PHI_NODE)
1854             {
1855               bsi = bsi_for_stmt (stmt);
1856               bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1857             }
1858
1859           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1860             propagate_value (use_p, newiv);
1861
1862           if (stmts && TREE_CODE (stmt) == PHI_NODE)
1863             for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
1864               if (PHI_ARG_DEF (stmt, j) == newiv)
1865                 bsi_insert_on_edge (PHI_ARG_EDGE (stmt, j), stmts);
1866
1867           update_stmt (stmt);
1868         }
1869
1870       /* Remove the now unused induction variable.  */
1871       VEC_safe_push (tree, heap, *remove_ivs, oldiv_stmt);
1872     }
1873   VEC_free (tree, heap, new_ivs);
1874 }
1875
1876 /* Return TRUE if this is not interesting statement from the perspective of
1877    determining if we have a perfect loop nest.  */
1878
1879 static bool
1880 not_interesting_stmt (tree stmt)
1881 {
1882   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1883      loop, we would have already failed the number of exits tests.  */
1884   if (TREE_CODE (stmt) == LABEL_EXPR
1885       || TREE_CODE (stmt) == GOTO_EXPR
1886       || TREE_CODE (stmt) == COND_EXPR)
1887     return true;
1888   return false;
1889 }
1890
1891 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1892
1893 static bool
1894 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1895 {
1896   int i;
1897   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1898     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1899       if (PHI_ARG_DEF (phi, i) == def)
1900         return true;
1901   return false;
1902 }
1903
1904 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1905
1906 static bool
1907 stmt_uses_phi_result (tree stmt, tree phi_result)
1908 {
1909   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1910
1911   /* This is conservatively true, because we only want SIMPLE bumpers
1912      of the form x +- constant for our pass.  */
1913   return (use == phi_result);
1914 }
1915
1916 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1917    in-loop-edge in a phi node, and the operand it uses is the result of that
1918    phi node.
1919    I.E. i_29 = i_3 + 1
1920         i_3 = PHI (0, i_29);  */
1921
1922 static bool
1923 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
1924 {
1925   tree use;
1926   tree def;
1927   imm_use_iterator iter;
1928   use_operand_p use_p;
1929
1930   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1931   if (!def)
1932     return false;
1933
1934   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1935     {
1936       use = USE_STMT (use_p);
1937       if (TREE_CODE (use) == PHI_NODE)
1938         {
1939           if (phi_loop_edge_uses_def (loop, use, def))
1940             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1941               return true;
1942         }
1943     }
1944   return false;
1945 }
1946
1947
1948 /* Return true if LOOP is a perfect loop nest.
1949    Perfect loop nests are those loop nests where all code occurs in the
1950    innermost loop body.
1951    If S is a program statement, then
1952
1953    i.e.
1954    DO I = 1, 20
1955        S1
1956        DO J = 1, 20
1957        ...
1958        END DO
1959    END DO
1960    is not a perfect loop nest because of S1.
1961
1962    DO I = 1, 20
1963       DO J = 1, 20
1964         S1
1965         ...
1966       END DO
1967    END DO
1968    is a perfect loop nest.
1969
1970    Since we don't have high level loops anymore, we basically have to walk our
1971    statements and ignore those that are there because the loop needs them (IE
1972    the induction variable increment, and jump back to the top of the loop).  */
1973
1974 bool
1975 perfect_nest_p (struct loop *loop)
1976 {
1977   basic_block *bbs;
1978   size_t i;
1979   tree exit_cond;
1980
1981   /* Loops at depth 0 are perfect nests.  */
1982   if (!loop->inner)
1983     return true;
1984
1985   bbs = get_loop_body (loop);
1986   exit_cond = get_loop_exit_condition (loop);
1987
1988   for (i = 0; i < loop->num_nodes; i++)
1989     {
1990       if (bbs[i]->loop_father == loop)
1991         {
1992           block_stmt_iterator bsi;
1993
1994           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1995             {
1996               tree stmt = bsi_stmt (bsi);
1997
1998               if (TREE_CODE (stmt) == COND_EXPR
1999                   && exit_cond != stmt)
2000                 goto non_perfectly_nested;
2001
2002               if (stmt == exit_cond
2003                   || not_interesting_stmt (stmt)
2004                   || stmt_is_bumper_for_loop (loop, stmt))
2005                 continue;
2006
2007             non_perfectly_nested:
2008               free (bbs);
2009               return false;
2010             }
2011         }
2012     }
2013
2014   free (bbs);
2015
2016   return perfect_nest_p (loop->inner);
2017 }
2018
2019 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2020    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2021    avoid creating duplicate temporaries and FIRSTBSI is statement
2022    iterator where new temporaries should be inserted at the beginning
2023    of body basic block.  */
2024
2025 static void
2026 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2027                                 int xstep, tree y, tree yinit,
2028                                 htab_t replacements,
2029                                 block_stmt_iterator *firstbsi)
2030 {
2031   ssa_op_iter iter;
2032   use_operand_p use_p;
2033
2034   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2035     {
2036       tree use = USE_FROM_PTR (use_p);
2037       tree step = NULL_TREE;
2038       tree scev, init, val, var, setstmt;
2039       struct tree_map *h, in;
2040       void **loc;
2041
2042       /* Replace uses of X with Y right away.  */
2043       if (use == x)
2044         {
2045           SET_USE (use_p, y);
2046           continue;
2047         }
2048
2049       scev = instantiate_parameters (loop,
2050                                      analyze_scalar_evolution (loop, use));
2051
2052       if (scev == NULL || scev == chrec_dont_know)
2053         continue;
2054
2055       step = evolution_part_in_loop_num (scev, loop->num);
2056       if (step == NULL
2057           || step == chrec_dont_know
2058           || TREE_CODE (step) != INTEGER_CST
2059           || int_cst_value (step) != xstep)
2060         continue;
2061
2062       /* Use REPLACEMENTS hash table to cache already created
2063          temporaries.  */
2064       in.hash = htab_hash_pointer (use);
2065       in.base.from = use;
2066       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2067       if (h != NULL)
2068         {
2069           SET_USE (use_p, h->to);
2070           continue;
2071         }
2072
2073       /* USE which has the same step as X should be replaced
2074          with a temporary set to Y + YINIT - INIT.  */
2075       init = initial_condition_in_loop_num (scev, loop->num);
2076       gcc_assert (init != NULL && init != chrec_dont_know);
2077       if (TREE_TYPE (use) == TREE_TYPE (y))
2078         {
2079           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2080           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2081           if (val == y)
2082             {
2083               /* If X has the same type as USE, the same step
2084                  and same initial value, it can be replaced by Y.  */
2085               SET_USE (use_p, y);
2086               continue;
2087             }
2088         }
2089       else
2090         {
2091           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2092           val = fold_convert (TREE_TYPE (use), val);
2093           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2094         }
2095
2096       /* Create a temporary variable and insert it at the beginning
2097          of the loop body basic block, right after the PHI node
2098          which sets Y.  */
2099       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2100       add_referenced_var (var);
2101       val = force_gimple_operand_bsi (firstbsi, val, false, NULL,
2102                                       true, BSI_SAME_STMT);
2103       setstmt = build_gimple_modify_stmt (var, val);
2104       var = make_ssa_name (var, setstmt);
2105       GIMPLE_STMT_OPERAND (setstmt, 0) = var;
2106       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2107       update_stmt (setstmt);
2108       SET_USE (use_p, var);
2109       h = GGC_NEW (struct tree_map);
2110       h->hash = in.hash;
2111       h->base.from = use;
2112       h->to = var;
2113       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2114       gcc_assert ((*(struct tree_map **)loc) == NULL);
2115       *(struct tree_map **) loc = h;
2116     }
2117 }
2118
2119 /* Return true if STMT is an exit PHI for LOOP */
2120
2121 static bool
2122 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2123 {
2124
2125   if (TREE_CODE (stmt) != PHI_NODE
2126       || PHI_NUM_ARGS (stmt) != 1
2127       || bb_for_stmt (stmt) != single_exit (loop)->dest)
2128     return false;
2129
2130   return true;
2131 }
2132
2133 /* Return true if STMT can be put back into the loop INNER, by
2134    copying it to the beginning of that loop and changing the uses.  */
2135
2136 static bool
2137 can_put_in_inner_loop (struct loop *inner, tree stmt)
2138 {
2139   imm_use_iterator imm_iter;
2140   use_operand_p use_p;
2141
2142   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2143   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2144       || !expr_invariant_in_loop_p (inner, GIMPLE_STMT_OPERAND (stmt, 1)))
2145     return false;
2146
2147   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2148     {
2149       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2150         {
2151           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2152
2153           if (!flow_bb_inside_loop_p (inner, immbb))
2154             return false;
2155         }
2156     }
2157   return true;
2158 }
2159
2160 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2161 static bool
2162 can_put_after_inner_loop (struct loop *loop, tree stmt)
2163 {
2164   imm_use_iterator imm_iter;
2165   use_operand_p use_p;
2166
2167   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2168     return false;
2169
2170   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2171     {
2172       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2173         {
2174           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2175
2176           if (!dominated_by_p (CDI_DOMINATORS,
2177                                immbb,
2178                                loop->inner->header)
2179               && !can_put_in_inner_loop (loop->inner, stmt))
2180             return false;
2181         }
2182     }
2183   return true;
2184 }
2185
2186 /* Return true when the induction variable IV is simple enough to be
2187    re-synthesized.  */
2188
2189 static bool
2190 can_duplicate_iv (tree iv, struct loop *loop)
2191 {
2192   tree scev = instantiate_parameters
2193     (loop, analyze_scalar_evolution (loop, iv));
2194
2195   if (!automatically_generated_chrec_p (scev))
2196     {
2197       tree step = evolution_part_in_loop_num (scev, loop->num);
2198
2199       if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2200         return true;
2201     }
2202
2203   return false;
2204 }
2205
2206 /* If this is a scalar operation that can be put back into the inner
2207    loop, or after the inner loop, through copying, then do so. This
2208    works on the theory that any amount of scalar code we have to
2209    reduplicate into or after the loops is less expensive that the win
2210    we get from rearranging the memory walk the loop is doing so that
2211    it has better cache behavior.  */
2212
2213 static bool
2214 cannot_convert_modify_to_perfect_nest (tree stmt, struct loop *loop)
2215 {
2216
2217   use_operand_p use_a, use_b;
2218   imm_use_iterator imm_iter;
2219   ssa_op_iter op_iter, op_iter1;
2220   tree op0 = GIMPLE_STMT_OPERAND (stmt, 0);
2221
2222   /* The statement should not define a variable used in the inner
2223      loop.  */
2224   if (TREE_CODE (op0) == SSA_NAME
2225       && !can_duplicate_iv (op0, loop))
2226     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2227       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2228           == loop->inner)
2229         return true;
2230
2231   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2232     {
2233       tree node, op = USE_FROM_PTR (use_a);
2234
2235       /* The variables should not be used in both loops.  */
2236       if (!can_duplicate_iv (op, loop))
2237         FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2238           if (bb_for_stmt (USE_STMT (use_b))->loop_father
2239               == loop->inner)
2240             return true;
2241
2242       /* The statement should not use the value of a scalar that was
2243          modified in the loop.  */
2244       node = SSA_NAME_DEF_STMT (op);
2245       if (TREE_CODE (node) == PHI_NODE)
2246         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2247         {
2248           tree arg = USE_FROM_PTR (use_b);
2249
2250           if (TREE_CODE (arg) == SSA_NAME)
2251             {
2252               tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2253
2254               if (bb_for_stmt (arg_stmt)
2255                   && (bb_for_stmt (arg_stmt)->loop_father
2256                       == loop->inner))
2257                 return true;
2258             }
2259         }
2260     }
2261
2262   return false;
2263 }
2264
2265 /* Return true when BB contains statements that can harm the transform
2266    to a perfect loop nest.  */
2267
2268 static bool
2269 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2270 {
2271   block_stmt_iterator bsi;
2272   tree exit_condition = get_loop_exit_condition (loop);
2273
2274   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
2275     {
2276       tree stmt = bsi_stmt (bsi);
2277
2278       if (stmt == exit_condition
2279           || not_interesting_stmt (stmt)
2280           || stmt_is_bumper_for_loop (loop, stmt))
2281         continue;
2282
2283       if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
2284         {
2285           if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2286             return true;
2287
2288           if (can_duplicate_iv (GIMPLE_STMT_OPERAND (stmt, 0), loop))
2289             continue;
2290
2291           if (can_put_in_inner_loop (loop->inner, stmt)
2292               || can_put_after_inner_loop (loop, stmt))
2293             continue;
2294         }
2295
2296       /* If the bb of a statement we care about isn't dominated by the
2297          header of the inner loop, then we can't handle this case
2298          right now.  This test ensures that the statement comes
2299          completely *after* the inner loop.  */
2300       if (!dominated_by_p (CDI_DOMINATORS,
2301                            bb_for_stmt (stmt),
2302                            loop->inner->header))
2303         return true;
2304     }
2305
2306   return false;
2307 }
2308
2309 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2310    perfect one.  At the moment, we only handle imperfect nests of
2311    depth 2, where all of the statements occur after the inner loop.  */
2312
2313 static bool
2314 can_convert_to_perfect_nest (struct loop *loop)
2315 {
2316   basic_block *bbs;
2317   tree phi;
2318   size_t i;
2319
2320   /* Can't handle triply nested+ loops yet.  */
2321   if (!loop->inner || loop->inner->inner)
2322     return false;
2323
2324   bbs = get_loop_body (loop);
2325   for (i = 0; i < loop->num_nodes; i++)
2326     if (bbs[i]->loop_father == loop
2327         && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2328       goto fail;
2329
2330   /* We also need to make sure the loop exit only has simple copy phis in it,
2331      otherwise we don't know how to transform it into a perfect nest.  */
2332   for (phi = phi_nodes (single_exit (loop)->dest); phi; phi = PHI_CHAIN (phi))
2333     if (PHI_NUM_ARGS (phi) != 1)
2334       goto fail;
2335
2336   free (bbs);
2337   return true;
2338
2339  fail:
2340   free (bbs);
2341   return false;
2342 }
2343
2344 /* Transform the loop nest into a perfect nest, if possible.
2345    LOOP is the loop nest to transform into a perfect nest
2346    LBOUNDS are the lower bounds for the loops to transform
2347    UBOUNDS are the upper bounds for the loops to transform
2348    STEPS is the STEPS for the loops to transform.
2349    LOOPIVS is the induction variables for the loops to transform.
2350
2351    Basically, for the case of
2352
2353    FOR (i = 0; i < 50; i++)
2354     {
2355      FOR (j =0; j < 50; j++)
2356      {
2357         <whatever>
2358      }
2359      <some code>
2360     }
2361
2362    This function will transform it into a perfect loop nest by splitting the
2363    outer loop into two loops, like so:
2364
2365    FOR (i = 0; i < 50; i++)
2366    {
2367      FOR (j = 0; j < 50; j++)
2368      {
2369          <whatever>
2370      }
2371    }
2372
2373    FOR (i = 0; i < 50; i ++)
2374    {
2375     <some code>
2376    }
2377
2378    Return FALSE if we can't make this loop into a perfect nest.  */
2379
2380 static bool
2381 perfect_nestify (struct loop *loop,
2382                  VEC(tree,heap) *lbounds,
2383                  VEC(tree,heap) *ubounds,
2384                  VEC(int,heap) *steps,
2385                  VEC(tree,heap) *loopivs)
2386 {
2387   basic_block *bbs;
2388   tree exit_condition;
2389   tree cond_stmt;
2390   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2391   int i;
2392   block_stmt_iterator bsi, firstbsi;
2393   bool insert_after;
2394   edge e;
2395   struct loop *newloop;
2396   tree phi;
2397   tree uboundvar;
2398   tree stmt;
2399   tree oldivvar, ivvar, ivvarinced;
2400   VEC(tree,heap) *phis = NULL;
2401   htab_t replacements = NULL;
2402
2403   /* Create the new loop.  */
2404   olddest = single_exit (loop)->dest;
2405   preheaderbb = split_edge (single_exit (loop));
2406   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2407
2408   /* Push the exit phi nodes that we are moving.  */
2409   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2410     {
2411       VEC_reserve (tree, heap, phis, 2);
2412       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2413       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2414     }
2415   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2416
2417   /* Remove the exit phis from the old basic block.  */
2418   while (phi_nodes (olddest) != NULL)
2419     remove_phi_node (phi_nodes (olddest), NULL, false);
2420
2421   /* and add them back to the new basic block.  */
2422   while (VEC_length (tree, phis) != 0)
2423     {
2424       tree def;
2425       tree phiname;
2426       def = VEC_pop (tree, phis);
2427       phiname = VEC_pop (tree, phis);
2428       phi = create_phi_node (phiname, preheaderbb);
2429       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2430     }
2431   flush_pending_stmts (e);
2432   VEC_free (tree, heap, phis);
2433
2434   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2435   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2436   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2437   cond_stmt = build3 (COND_EXPR, void_type_node,
2438                       build2 (NE_EXPR, boolean_type_node,
2439                               integer_one_node,
2440                               integer_zero_node),
2441                       NULL_TREE, NULL_TREE);
2442   bsi = bsi_start (bodybb);
2443   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2444   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2445   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2446   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2447
2448   /* Update the loop structures.  */
2449   newloop = duplicate_loop (loop, olddest->loop_father);
2450   newloop->header = headerbb;
2451   newloop->latch = latchbb;
2452   add_bb_to_loop (latchbb, newloop);
2453   add_bb_to_loop (bodybb, newloop);
2454   add_bb_to_loop (headerbb, newloop);
2455   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2456   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2457   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2458                            single_exit (loop)->src);
2459   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2460   set_immediate_dominator (CDI_DOMINATORS, olddest,
2461                            recompute_dominator (CDI_DOMINATORS, olddest));
2462   /* Create the new iv.  */
2463   oldivvar = VEC_index (tree, loopivs, 0);
2464   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2465   add_referenced_var (ivvar);
2466   standard_iv_increment_position (newloop, &bsi, &insert_after);
2467   create_iv (VEC_index (tree, lbounds, 0),
2468              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2469              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2470
2471   /* Create the new upper bound.  This may be not just a variable, so we copy
2472      it to one just in case.  */
2473
2474   exit_condition = get_loop_exit_condition (newloop);
2475   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2476   add_referenced_var (uboundvar);
2477   stmt = build_gimple_modify_stmt (uboundvar, VEC_index (tree, ubounds, 0));
2478   uboundvar = make_ssa_name (uboundvar, stmt);
2479   GIMPLE_STMT_OPERAND (stmt, 0) = uboundvar;
2480
2481   if (insert_after)
2482     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2483   else
2484     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2485   update_stmt (stmt);
2486   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2487                                             boolean_type_node,
2488                                             uboundvar,
2489                                             ivvarinced);
2490   update_stmt (exit_condition);
2491   replacements = htab_create_ggc (20, tree_map_hash,
2492                                   tree_map_eq, NULL);
2493   bbs = get_loop_body_in_dom_order (loop);
2494   /* Now move the statements, and replace the induction variable in the moved
2495      statements with the correct loop induction variable.  */
2496   oldivvar = VEC_index (tree, loopivs, 0);
2497   firstbsi = bsi_start (bodybb);
2498   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2499     {
2500       block_stmt_iterator tobsi = bsi_last (bodybb);
2501       if (bbs[i]->loop_father == loop)
2502         {
2503           /* If this is true, we are *before* the inner loop.
2504              If this isn't true, we are *after* it.
2505
2506              The only time can_convert_to_perfect_nest returns true when we
2507              have statements before the inner loop is if they can be moved
2508              into the inner loop.
2509
2510              The only time can_convert_to_perfect_nest returns true when we
2511              have statements after the inner loop is if they can be moved into
2512              the new split loop.  */
2513
2514           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2515             {
2516               block_stmt_iterator header_bsi
2517                 = bsi_after_labels (loop->inner->header);
2518
2519               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2520                 {
2521                   tree stmt = bsi_stmt (bsi);
2522
2523                   if (stmt == exit_condition
2524                       || not_interesting_stmt (stmt)
2525                       || stmt_is_bumper_for_loop (loop, stmt))
2526                     {
2527                       bsi_next (&bsi);
2528                       continue;
2529                     }
2530
2531                   bsi_move_before (&bsi, &header_bsi);
2532                 }
2533             }
2534           else
2535             {
2536               /* Note that the bsi only needs to be explicitly incremented
2537                  when we don't move something, since it is automatically
2538                  incremented when we do.  */
2539               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2540                 {
2541                   ssa_op_iter i;
2542                   tree n, stmt = bsi_stmt (bsi);
2543
2544                   if (stmt == exit_condition
2545                       || not_interesting_stmt (stmt)
2546                       || stmt_is_bumper_for_loop (loop, stmt))
2547                     {
2548                       bsi_next (&bsi);
2549                       continue;
2550                     }
2551
2552                   replace_uses_equiv_to_x_with_y
2553                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2554                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2555
2556                   bsi_move_before (&bsi, &tobsi);
2557
2558                   /* If the statement has any virtual operands, they may
2559                      need to be rewired because the original loop may
2560                      still reference them.  */
2561                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2562                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2563                 }
2564             }
2565
2566         }
2567     }
2568
2569   free (bbs);
2570   htab_delete (replacements);
2571   return perfect_nest_p (loop);
2572 }
2573
2574 /* Return true if TRANS is a legal transformation matrix that respects
2575    the dependence vectors in DISTS and DIRS.  The conservative answer
2576    is false.
2577
2578    "Wolfe proves that a unimodular transformation represented by the
2579    matrix T is legal when applied to a loop nest with a set of
2580    lexicographically non-negative distance vectors RDG if and only if
2581    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2582    i.e.: if and only if it transforms the lexicographically positive
2583    distance vectors to lexicographically positive vectors.  Note that
2584    a unimodular matrix must transform the zero vector (and only it) to
2585    the zero vector." S.Muchnick.  */
2586
2587 bool
2588 lambda_transform_legal_p (lambda_trans_matrix trans,
2589                           int nb_loops,
2590                           VEC (ddr_p, heap) *dependence_relations)
2591 {
2592   unsigned int i, j;
2593   lambda_vector distres;
2594   struct data_dependence_relation *ddr;
2595
2596   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2597               && LTM_ROWSIZE (trans) == nb_loops);
2598
2599   /* When there are no dependences, the transformation is correct.  */
2600   if (VEC_length (ddr_p, dependence_relations) == 0)
2601     return true;
2602
2603   ddr = VEC_index (ddr_p, dependence_relations, 0);
2604   if (ddr == NULL)
2605     return true;
2606
2607   /* When there is an unknown relation in the dependence_relations, we
2608      know that it is no worth looking at this loop nest: give up.  */
2609   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2610     return false;
2611
2612   distres = lambda_vector_new (nb_loops);
2613
2614   /* For each distance vector in the dependence graph.  */
2615   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2616     {
2617       /* Don't care about relations for which we know that there is no
2618          dependence, nor about read-read (aka. output-dependences):
2619          these data accesses can happen in any order.  */
2620       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2621           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2622         continue;
2623
2624       /* Conservatively answer: "this transformation is not valid".  */
2625       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2626         return false;
2627
2628       /* If the dependence could not be captured by a distance vector,
2629          conservatively answer that the transform is not valid.  */
2630       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2631         return false;
2632
2633       /* Compute trans.dist_vect */
2634       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2635         {
2636           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2637                                      DDR_DIST_VECT (ddr, j), distres);
2638
2639           if (!lambda_vector_lexico_pos (distres, nb_loops))
2640             return false;
2641         }
2642     }
2643   return true;
2644 }
2645
2646
2647 /* Collects parameters from affine function ACCESS_FUNCTION, and push
2648    them in PARAMETERS.  */
2649
2650 static void
2651 lambda_collect_parameters_from_af (tree access_function,
2652                                    struct pointer_set_t *param_set,
2653                                    VEC (tree, heap) **parameters)
2654 {
2655   if (access_function == NULL)
2656     return;
2657
2658   if (TREE_CODE (access_function) == SSA_NAME
2659       && pointer_set_contains (param_set, access_function) == 0)
2660     {
2661       pointer_set_insert (param_set, access_function);
2662       VEC_safe_push (tree, heap, *parameters, access_function);
2663     }
2664   else
2665     {
2666       int i, num_operands = tree_operand_length (access_function);
2667
2668       for (i = 0; i < num_operands; i++)
2669         lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i),
2670                                            param_set, parameters);
2671     }
2672 }
2673
2674 /* Collects parameters from DATAREFS, and push them in PARAMETERS.  */
2675
2676 void
2677 lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs,
2678                            VEC (tree, heap) **parameters)
2679 {
2680   unsigned i, j;
2681   struct pointer_set_t *parameter_set = pointer_set_create ();
2682   data_reference_p data_reference;
2683
2684   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, data_reference); i++)
2685     for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++)
2686       lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j),
2687                                          parameter_set, parameters);
2688 }
2689
2690 /* Translates BASE_EXPR to vector CY.  AM is needed for inferring
2691    indexing positions in the data access vector.  CST is the analyzed
2692    integer constant.  */
2693
2694 static bool
2695 av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am,
2696                 int cst)
2697 {
2698   bool result = true;
2699
2700   switch (TREE_CODE (base_expr))
2701     {
2702     case INTEGER_CST:
2703       /* Constant part.  */
2704       cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst;
2705       return true;
2706
2707     case SSA_NAME:
2708       {
2709         int param_index =
2710           access_matrix_get_index_for_parameter (base_expr, am);
2711
2712         if (param_index >= 0)
2713           {
2714             cy[param_index] = cst + cy[param_index];
2715             return true;
2716           }
2717
2718         return false;
2719       }
2720
2721     case PLUS_EXPR:
2722       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2723         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst);
2724
2725     case MINUS_EXPR:
2726       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2727         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2728
2729     case MULT_EXPR:
2730       if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2731         result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2732                                  cy, am, cst *
2733                                  int_cst_value (TREE_OPERAND (base_expr, 0)));
2734       else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2735         result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2736                                  cy, am, cst *
2737                                  int_cst_value (TREE_OPERAND (base_expr, 1)));
2738       else
2739         result = false;
2740
2741       return result;
2742
2743     case NEGATE_EXPR:
2744       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst);
2745
2746     default:
2747       return false;
2748     }
2749
2750   return result;
2751 }
2752
2753 /* Translates ACCESS_FUN to vector CY.  AM is needed for inferring
2754    indexing positions in the data access vector.  */
2755
2756 static bool
2757 av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am)
2758 {
2759   switch (TREE_CODE (access_fun))
2760     {
2761     case POLYNOMIAL_CHREC:
2762       {
2763         tree left = CHREC_LEFT (access_fun);
2764         tree right = CHREC_RIGHT (access_fun);
2765         unsigned var;
2766
2767         if (TREE_CODE (right) != INTEGER_CST)
2768           return false;
2769
2770         var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun));
2771         cy[var] = int_cst_value (right);
2772
2773         if (TREE_CODE (left) == POLYNOMIAL_CHREC)
2774           return av_for_af (left, cy, am);
2775         else
2776           return av_for_af_base (left, cy, am, 1);
2777       }
2778
2779     case INTEGER_CST:
2780       /* Constant part.  */
2781       return av_for_af_base (access_fun, cy, am, 1);
2782
2783     default:
2784       return false;
2785     }
2786 }
2787
2788 /* Initializes the access matrix for DATA_REFERENCE.  */
2789
2790 static bool
2791 build_access_matrix (data_reference_p data_reference,
2792                      VEC (tree, heap) *parameters, int loop_nest_num)
2793 {
2794   struct access_matrix *am = GGC_NEW (struct access_matrix);
2795   unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference);
2796   struct loop *loop = bb_for_stmt (DR_STMT (data_reference))->loop_father;
2797   struct loop *loop_nest = get_loop (loop_nest_num);
2798   unsigned nivs = loop_depth (loop) - loop_depth (loop_nest) + 1;
2799   unsigned lambda_nb_columns;
2800   lambda_vector_vec_p matrix;
2801
2802   AM_LOOP_NEST_NUM (am) = loop_nest_num;
2803   AM_NB_INDUCTION_VARS (am) = nivs;
2804   AM_PARAMETERS (am) = parameters;
2805
2806   lambda_nb_columns = AM_NB_COLUMNS (am);
2807   matrix = VEC_alloc (lambda_vector, heap, lambda_nb_columns);
2808   AM_MATRIX (am) = matrix;
2809
2810   for (i = 0; i < ndim; i++)
2811     {
2812       lambda_vector access_vector = lambda_vector_new (lambda_nb_columns);
2813       tree access_function = DR_ACCESS_FN (data_reference, i);
2814
2815       if (!av_for_af (access_function, access_vector, am))
2816         return false;
2817
2818       VEC_safe_push (lambda_vector, heap, matrix, access_vector);
2819     }
2820
2821   DR_ACCESS_MATRIX (data_reference) = am;
2822   return true;
2823 }
2824
2825 /* Returns false when one of the access matrices cannot be built.  */
2826
2827 bool
2828 lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs,
2829                                 VEC (tree, heap) *parameters,
2830                                 int loop_nest_num)
2831 {
2832   data_reference_p dataref;
2833   unsigned ix;
2834
2835   for (ix = 0; VEC_iterate (data_reference_p, datarefs, ix, dataref); ix++)
2836     if (!build_access_matrix (dataref, parameters, loop_nest_num))
2837       return false;
2838
2839   return true;
2840 }