gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "tree-flow.h"
  32 #include "tree-dump.h"
  33 #include "timevar.h"
  34 #include "cfgloop.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "tree-chrec.h"
  38 #include "tree-data-ref.h"
  39 #include "tree-pass.h"
  40 #include "tree-scalar-evolution.h"
  41 #include "vec.h"
  42 #include "lambda.h"
  43 #include "vecprim.h"
  44
  45 /* This loop nest code generation is based on non-singular matrix
  46    math.
  47
  48  A little terminology and a general sketch of the algorithm.  See "A singular
  49  loop transformation framework based on non-singular matrices" by Wei Li and
  50  Keshav Pingali for formal proofs that the various statements below are
  51  correct.
  52
  53  A loop iteration space represents the points traversed by the loop.  A point in the
  54  iteration space can be represented by a vector of size <loop depth>.  You can
  55  therefore represent the iteration space as an integral combinations of a set
  56  of basis vectors.
  57
  58  A loop iteration space is dense if every integer point between the loop
  59  bounds is a point in the iteration space.  Every loop with a step of 1
  60  therefore has a dense iteration space.
  61
  62  for i = 1 to 3, step 1 is a dense iteration space.
  63
  64  A loop iteration space is sparse if it is not dense.  That is, the iteration
  65  space skips integer points that are within the loop bounds.
  66
  67  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  68  2 is skipped.
  69
  70  Dense source spaces are easy to transform, because they don't skip any
  71  points to begin with.  Thus we can compute the exact bounds of the target
  72  space using min/max and floor/ceil.
  73
  74  For a dense source space, we take the transformation matrix, decompose it
  75  into a lower triangular part (H) and a unimodular part (U).
  76  We then compute the auxiliary space from the unimodular part (source loop
  77  nest . U = auxiliary space) , which has two important properties:
  78   1. It traverses the iterations in the same lexicographic order as the source
  79   space.
  80   2. It is a dense space when the source is a dense space (even if the target
  81   space is going to be sparse).
  82
  83  Given the auxiliary space, we use the lower triangular part to compute the
  84  bounds in the target space by simple matrix multiplication.
  85  The gaps in the target space (IE the new loop step sizes) will be the
  86  diagonals of the H matrix.
  87
  88  Sparse source spaces require another step, because you can't directly compute
  89  the exact bounds of the auxiliary and target space from the sparse space.
  90  Rather than try to come up with a separate algorithm to handle sparse source
  91  spaces directly, we just find a legal transformation matrix that gives you
  92  the sparse source space, from a dense space, and then transform the dense
  93  space.
  94
  95  For a regular sparse space, you can represent the source space as an integer
  96  lattice, and the base space of that lattice will always be dense.  Thus, we
  97  effectively use the lattice to figure out the transformation from the lattice
  98  base space, to the sparse iteration space (IE what transform was applied to
  99  the dense space to make it sparse).  We then compose this transform with the
 100  transformation matrix specified by the user (since our matrix transformations
 101  are closed under composition, this is okay).  We can then use the base space
 102  (which is dense) plus the composed transformation matrix, to compute the rest
 103  of the transform using the dense space algorithm above.
 104
 105  In other words, our sparse source space (B) is decomposed into a dense base
 106  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 107  We then compute the composition of L and the user transformation matrix (T),
 108  so that T is now a transform from A to the result, instead of from B to the
 109  result.
 110  IE A.(LT) = result instead of B.T = result
 111  Since A is now a dense source space, we can use the dense source space
 112  algorithm above to compute the result of applying transform (LT) to A.
 113
 114  Fourier-Motzkin elimination is used to compute the bounds of the base space
 115  of the lattice.  */
 116
 117 static bool perfect_nestify (struct loops *,
 118                              struct loop *, VEC(tree,heap) *,
 119                              VEC(tree,heap) *, VEC(int,heap) *,
 120                              VEC(tree,heap) *);
 121 /* Lattice stuff that is internal to the code generation algorithm.  */
 122
 123 typedef struct
 124 {
 125   /* Lattice base matrix.  */
 126   lambda_matrix base;
 127   /* Lattice dimension.  */
 128   int dimension;
 129   /* Origin vector for the coefficients.  */
 130   lambda_vector origin;
 131   /* Origin matrix for the invariants.  */
 132   lambda_matrix origin_invariants;
 133   /* Number of invariants.  */
 134   int invariants;
 135 } *lambda_lattice;
 136
 137 #define LATTICE_BASE(T) ((T)->base)
 138 #define LATTICE_DIMENSION(T) ((T)->dimension)
 139 #define LATTICE_ORIGIN(T) ((T)->origin)
 140 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 141 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 142
 143 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 144                        int, int);
 145 static lambda_lattice lambda_lattice_new (int, int);
 146 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 147
 148 static tree find_induction_var_from_exit_cond (struct loop *);
 149 static bool can_convert_to_perfect_nest (struct loop *);
 150
 151 /* Create a new lambda body vector.  */
 152
 153 lambda_body_vector
 154 lambda_body_vector_new (int size)
 155 {
 156   lambda_body_vector ret;
 157
 158   ret = ggc_alloc (sizeof (*ret));
 159   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 160   LBV_SIZE (ret) = size;
 161   LBV_DENOMINATOR (ret) = 1;
 162   return ret;
 163 }
 164
 165 /* Compute the new coefficients for the vector based on the
 166   *inverse* of the transformation matrix.  */
 167
 168 lambda_body_vector
 169 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 170                                 lambda_body_vector vect)
 171 {
 172   lambda_body_vector temp;
 173   int depth;
 174
 175   /* Make sure the matrix is square.  */
 176   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 177
 178   depth = LTM_ROWSIZE (transform);
 179
 180   temp = lambda_body_vector_new (depth);
 181   LBV_DENOMINATOR (temp) =
 182     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 183   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 184                              LTM_MATRIX (transform), depth,
 185                              LBV_COEFFICIENTS (temp));
 186   LBV_SIZE (temp) = LBV_SIZE (vect);
 187   return temp;
 188 }
 189
 190 /* Print out a lambda body vector.  */
 191
 192 void
 193 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 194 {
 195   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 196 }
 197
 198 /* Return TRUE if two linear expressions are equal.  */
 199
 200 static bool
 201 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 202            int depth, int invariants)
 203 {
 204   int i;
 205
 206   if (lle1 == NULL || lle2 == NULL)
 207     return false;
 208   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 209     return false;
 210   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 211     return false;
 212   for (i = 0; i < depth; i++)
 213     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 214       return false;
 215   for (i = 0; i < invariants; i++)
 216     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 217         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 218       return false;
 219   return true;
 220 }
 221
 222 /* Create a new linear expression with dimension DIM, and total number
 223    of invariants INVARIANTS.  */
 224
 225 lambda_linear_expression
 226 lambda_linear_expression_new (int dim, int invariants)
 227 {
 228   lambda_linear_expression ret;
 229
 230   ret = ggc_alloc_cleared (sizeof (*ret));
 231
 232   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 233   LLE_CONSTANT (ret) = 0;
 234   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 235   LLE_DENOMINATOR (ret) = 1;
 236   LLE_NEXT (ret) = NULL;
 237
 238   return ret;
 239 }
 240
 241 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 242    The starting letter used for variable names is START.  */
 243
 244 static void
 245 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 246                          char start)
 247 {
 248   int i;
 249   bool first = true;
 250   for (i = 0; i < size; i++)
 251     {
 252       if (expr[i] != 0)
 253         {
 254           if (first)
 255             {
 256               if (expr[i] < 0)
 257                 fprintf (outfile, "-");
 258               first = false;
 259             }
 260           else if (expr[i] > 0)
 261             fprintf (outfile, " + ");
 262           else
 263             fprintf (outfile, " - ");
 264           if (abs (expr[i]) == 1)
 265             fprintf (outfile, "%c", start + i);
 266           else
 267             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 268         }
 269     }
 270 }
 271
 272 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 273    depth/number of coefficients is given by DEPTH, the number of invariants is
 274    given by INVARIANTS, and the character to start variable names with is given
 275    by START.  */
 276
 277 void
 278 print_lambda_linear_expression (FILE * outfile,
 279                                 lambda_linear_expression expr,
 280                                 int depth, int invariants, char start)
 281 {
 282   fprintf (outfile, "\tLinear expression: ");
 283   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 284   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 285   fprintf (outfile, "  invariants: ");
 286   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 287                            invariants, 'A');
 288   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 289 }
 290
 291 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 292    coefficients is given by DEPTH, the number of invariants is
 293    given by INVARIANTS, and the character to start variable names with is given
 294    by START.  */
 295
 296 void
 297 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 298                    int invariants, char start)
 299 {
 300   int step;
 301   lambda_linear_expression expr;
 302
 303   gcc_assert (loop);
 304
 305   expr = LL_LINEAR_OFFSET (loop);
 306   step = LL_STEP (loop);
 307   fprintf (outfile, "  step size = %d \n", step);
 308
 309   if (expr)
 310     {
 311       fprintf (outfile, "  linear offset: \n");
 312       print_lambda_linear_expression (outfile, expr, depth, invariants,
 313                                       start);
 314     }
 315
 316   fprintf (outfile, "  lower bound: \n");
 317   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 318     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 319   fprintf (outfile, "  upper bound: \n");
 320   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322 }
 323
 324 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 325    number of invariants.  */
 326
 327 lambda_loopnest
 328 lambda_loopnest_new (int depth, int invariants)
 329 {
 330   lambda_loopnest ret;
 331   ret = ggc_alloc (sizeof (*ret));
 332
 333   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 334   LN_DEPTH (ret) = depth;
 335   LN_INVARIANTS (ret) = invariants;
 336
 337   return ret;
 338 }
 339
 340 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 341    character to use for loop names is given by START.  */
 342
 343 void
 344 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 345 {
 346   int i;
 347   for (i = 0; i < LN_DEPTH (nest); i++)
 348     {
 349       fprintf (outfile, "Loop %c\n", start + i);
 350       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 351                          LN_INVARIANTS (nest), 'i');
 352       fprintf (outfile, "\n");
 353     }
 354 }
 355
 356 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 357    of invariants.  */
 358
 359 static lambda_lattice
 360 lambda_lattice_new (int depth, int invariants)
 361 {
 362   lambda_lattice ret;
 363   ret = ggc_alloc (sizeof (*ret));
 364   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 365   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 366   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 367   LATTICE_DIMENSION (ret) = depth;
 368   LATTICE_INVARIANTS (ret) = invariants;
 369   return ret;
 370 }
 371
 372 /* Compute the lattice base for NEST.  The lattice base is essentially a
 373    non-singular transform from a dense base space to a sparse iteration space.
 374    We use it so that we don't have to specially handle the case of a sparse
 375    iteration space in other parts of the algorithm.  As a result, this routine
 376    only does something interesting (IE produce a matrix that isn't the
 377    identity matrix) if NEST is a sparse space.  */
 378
 379 static lambda_lattice
 380 lambda_lattice_compute_base (lambda_loopnest nest)
 381 {
 382   lambda_lattice ret;
 383   int depth, invariants;
 384   lambda_matrix base;
 385
 386   int i, j, step;
 387   lambda_loop loop;
 388   lambda_linear_expression expression;
 389
 390   depth = LN_DEPTH (nest);
 391   invariants = LN_INVARIANTS (nest);
 392
 393   ret = lambda_lattice_new (depth, invariants);
 394   base = LATTICE_BASE (ret);
 395   for (i = 0; i < depth; i++)
 396     {
 397       loop = LN_LOOPS (nest)[i];
 398       gcc_assert (loop);
 399       step = LL_STEP (loop);
 400       /* If we have a step of 1, then the base is one, and the
 401          origin and invariant coefficients are 0.  */
 402       if (step == 1)
 403         {
 404           for (j = 0; j < depth; j++)
 405             base[i][j] = 0;
 406           base[i][i] = 1;
 407           LATTICE_ORIGIN (ret)[i] = 0;
 408           for (j = 0; j < invariants; j++)
 409             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 410         }
 411       else
 412         {
 413           /* Otherwise, we need the lower bound expression (which must
 414              be an affine function)  to determine the base.  */
 415           expression = LL_LOWER_BOUND (loop);
 416           gcc_assert (expression && !LLE_NEXT (expression)
 417                       && LLE_DENOMINATOR (expression) == 1);
 418
 419           /* The lower triangular portion of the base is going to be the
 420              coefficient times the step */
 421           for (j = 0; j < i; j++)
 422             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 423               * LL_STEP (LN_LOOPS (nest)[j]);
 424           base[i][i] = step;
 425           for (j = i + 1; j < depth; j++)
 426             base[i][j] = 0;
 427
 428           /* Origin for this loop is the constant of the lower bound
 429              expression.  */
 430           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 431
 432           /* Coefficient for the invariants are equal to the invariant
 433              coefficients in the expression.  */
 434           for (j = 0; j < invariants; j++)
 435             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 436               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 437         }
 438     }
 439   return ret;
 440 }
 441
 442 /* Compute the least common multiple of two numbers A and B .  */
 443
 444 static int
 445 lcm (int a, int b)
 446 {
 447   return (abs (a) * abs (b) / gcd (a, b));
 448 }
 449
 450 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 451    auxiliary nest.
 452    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 453    it is easy to calculate the answer and bounds.
 454    A sketch of how it works:
 455    Given a system of linear inequalities, ai * xj >= bk, you can always
 456    rewrite the constraints so they are all of the form
 457    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 458    in b1 ... bk, and some a in a1...ai)
 459    You can then eliminate this x from the non-constant inequalities by
 460    rewriting these as a <= b, x >= constant, and delete the x variable.
 461    You can then repeat this for any remaining x variables, and then we have
 462    an easy to use variable <= constant (or no variables at all) form that we
 463    can construct our bounds from.
 464
 465    In our case, each time we eliminate, we construct part of the bound from
 466    the ith variable, then delete the ith variable.
 467
 468    Remember the constant are in our vector a, our coefficient matrix is A,
 469    and our invariant coefficient matrix is B.
 470
 471    SIZE is the size of the matrices being passed.
 472    DEPTH is the loop nest depth.
 473    INVARIANTS is the number of loop invariants.
 474    A, B, and a are the coefficient matrix, invariant coefficient, and a
 475    vector of constants, respectively.  */
 476
 477 static lambda_loopnest
 478 compute_nest_using_fourier_motzkin (int size,
 479                                     int depth,
 480                                     int invariants,
 481                                     lambda_matrix A,
 482                                     lambda_matrix B,
 483                                     lambda_vector a)
 484 {
 485
 486   int multiple, f1, f2;
 487   int i, j, k;
 488   lambda_linear_expression expression;
 489   lambda_loop loop;
 490   lambda_loopnest auxillary_nest;
 491   lambda_matrix swapmatrix, A1, B1;
 492   lambda_vector swapvector, a1;
 493   int newsize;
 494
 495   A1 = lambda_matrix_new (128, depth);
 496   B1 = lambda_matrix_new (128, invariants);
 497   a1 = lambda_vector_new (128);
 498
 499   auxillary_nest = lambda_loopnest_new (depth, invariants);
 500
 501   for (i = depth - 1; i >= 0; i--)
 502     {
 503       loop = lambda_loop_new ();
 504       LN_LOOPS (auxillary_nest)[i] = loop;
 505       LL_STEP (loop) = 1;
 506
 507       for (j = 0; j < size; j++)
 508         {
 509           if (A[j][i] < 0)
 510             {
 511               /* Any linear expression in the matrix with a coefficient less
 512                  than 0 becomes part of the new lower bound.  */
 513               expression = lambda_linear_expression_new (depth, invariants);
 514
 515               for (k = 0; k < i; k++)
 516                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 517
 518               for (k = 0; k < invariants; k++)
 519                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 520
 521               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 522               LLE_CONSTANT (expression) = -1 * a[j];
 523
 524               /* Ignore if identical to the existing lower bound.  */
 525               if (!lle_equal (LL_LOWER_BOUND (loop),
 526                               expression, depth, invariants))
 527                 {
 528                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 529                   LL_LOWER_BOUND (loop) = expression;
 530                 }
 531
 532             }
 533           else if (A[j][i] > 0)
 534             {
 535               /* Any linear expression with a coefficient greater than 0
 536                  becomes part of the new upper bound.  */
 537               expression = lambda_linear_expression_new (depth, invariants);
 538               for (k = 0; k < i; k++)
 539                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 540
 541               for (k = 0; k < invariants; k++)
 542                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 543
 544               LLE_DENOMINATOR (expression) = A[j][i];
 545               LLE_CONSTANT (expression) = a[j];
 546
 547               /* Ignore if identical to the existing upper bound.  */
 548               if (!lle_equal (LL_UPPER_BOUND (loop),
 549                               expression, depth, invariants))
 550                 {
 551                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 552                   LL_UPPER_BOUND (loop) = expression;
 553                 }
 554
 555             }
 556         }
 557
 558       /* This portion creates a new system of linear inequalities by deleting
 559          the i'th variable, reducing the system by one variable.  */
 560       newsize = 0;
 561       for (j = 0; j < size; j++)
 562         {
 563           /* If the coefficient for the i'th variable is 0, then we can just
 564              eliminate the variable straightaway.  Otherwise, we have to
 565              multiply through by the coefficients we are eliminating.  */
 566           if (A[j][i] == 0)
 567             {
 568               lambda_vector_copy (A[j], A1[newsize], depth);
 569               lambda_vector_copy (B[j], B1[newsize], invariants);
 570               a1[newsize] = a[j];
 571               newsize++;
 572             }
 573           else if (A[j][i] > 0)
 574             {
 575               for (k = 0; k < size; k++)
 576                 {
 577                   if (A[k][i] < 0)
 578                     {
 579                       multiple = lcm (A[j][i], A[k][i]);
 580                       f1 = multiple / A[j][i];
 581                       f2 = -1 * multiple / A[k][i];
 582
 583                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 584                                             A1[newsize], depth);
 585                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 586                                             B1[newsize], invariants);
 587                       a1[newsize] = f1 * a[j] + f2 * a[k];
 588                       newsize++;
 589                     }
 590                 }
 591             }
 592         }
 593
 594       swapmatrix = A;
 595       A = A1;
 596       A1 = swapmatrix;
 597
 598       swapmatrix = B;
 599       B = B1;
 600       B1 = swapmatrix;
 601
 602       swapvector = a;
 603       a = a1;
 604       a1 = swapvector;
 605
 606       size = newsize;
 607     }
 608
 609   return auxillary_nest;
 610 }
 611
 612 /* Compute the loop bounds for the auxiliary space NEST.
 613    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 614    Given the original nest, this function will
 615    1. Convert the nest into matrix form, which consists of a matrix for the
 616    coefficients, a matrix for the
 617    invariant coefficients, and a vector for the constants.
 618    2. Use the matrix form to calculate the lattice base for the nest (which is
 619    a dense space)
 620    3. Compose the dense space transform with the user specified transform, to
 621    get a transform we can easily calculate transformed bounds for.
 622    4. Multiply the composed transformation matrix times the matrix form of the
 623    loop.
 624    5. Transform the newly created matrix (from step 4) back into a loop nest
 625    using Fourier-Motzkin elimination to figure out the bounds.  */
 626
 627 static lambda_loopnest
 628 lambda_compute_auxillary_space (lambda_loopnest nest,
 629                                 lambda_trans_matrix trans)
 630 {
 631   lambda_matrix A, B, A1, B1;
 632   lambda_vector a, a1;
 633   lambda_matrix invertedtrans;
 634   int depth, invariants, size;
 635   int i, j;
 636   lambda_loop loop;
 637   lambda_linear_expression expression;
 638   lambda_lattice lattice;
 639
 640   depth = LN_DEPTH (nest);
 641   invariants = LN_INVARIANTS (nest);
 642
 643   /* Unfortunately, we can't know the number of constraints we'll have
 644      ahead of time, but this should be enough even in ridiculous loop nest
 645      cases. We must not go over this limit.  */
 646   A = lambda_matrix_new (128, depth);
 647   B = lambda_matrix_new (128, invariants);
 648   a = lambda_vector_new (128);
 649
 650   A1 = lambda_matrix_new (128, depth);
 651   B1 = lambda_matrix_new (128, invariants);
 652   a1 = lambda_vector_new (128);
 653
 654   /* Store the bounds in the equation matrix A, constant vector a, and
 655      invariant matrix B, so that we have Ax <= a + B.
 656      This requires a little equation rearranging so that everything is on the
 657      correct side of the inequality.  */
 658   size = 0;
 659   for (i = 0; i < depth; i++)
 660     {
 661       loop = LN_LOOPS (nest)[i];
 662
 663       /* First we do the lower bound.  */
 664       if (LL_STEP (loop) > 0)
 665         expression = LL_LOWER_BOUND (loop);
 666       else
 667         expression = LL_UPPER_BOUND (loop);
 668
 669       for (; expression != NULL; expression = LLE_NEXT (expression))
 670         {
 671           /* Fill in the coefficient.  */
 672           for (j = 0; j < i; j++)
 673             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 674
 675           /* And the invariant coefficient.  */
 676           for (j = 0; j < invariants; j++)
 677             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 678
 679           /* And the constant.  */
 680           a[size] = LLE_CONSTANT (expression);
 681
 682           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 683              constants and single variables on   */
 684           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 685           a[size] *= -1;
 686           for (j = 0; j < invariants; j++)
 687             B[size][j] *= -1;
 688
 689           size++;
 690           /* Need to increase matrix sizes above.  */
 691           gcc_assert (size <= 127);
 692
 693         }
 694
 695       /* Then do the exact same thing for the upper bounds.  */
 696       if (LL_STEP (loop) > 0)
 697         expression = LL_UPPER_BOUND (loop);
 698       else
 699         expression = LL_LOWER_BOUND (loop);
 700
 701       for (; expression != NULL; expression = LLE_NEXT (expression))
 702         {
 703           /* Fill in the coefficient.  */
 704           for (j = 0; j < i; j++)
 705             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 706
 707           /* And the invariant coefficient.  */
 708           for (j = 0; j < invariants; j++)
 709             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 710
 711           /* And the constant.  */
 712           a[size] = LLE_CONSTANT (expression);
 713
 714           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 715           for (j = 0; j < i; j++)
 716             A[size][j] *= -1;
 717           A[size][i] = LLE_DENOMINATOR (expression);
 718           size++;
 719           /* Need to increase matrix sizes above.  */
 720           gcc_assert (size <= 127);
 721
 722         }
 723     }
 724
 725   /* Compute the lattice base x = base * y + origin, where y is the
 726      base space.  */
 727   lattice = lambda_lattice_compute_base (nest);
 728
 729   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 730
 731   /* A1 = A * L */
 732   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 733
 734   /* a1 = a - A * origin constant.  */
 735   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 736   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 737
 738   /* B1 = B - A * origin invariant.  */
 739   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 740                       invariants);
 741   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 742
 743   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 744      it by A1, then performing Fourier-Motzkin.  */
 745
 746   invertedtrans = lambda_matrix_new (depth, depth);
 747
 748   /* Compute the inverse of U.  */
 749   lambda_matrix_inverse (LTM_MATRIX (trans),
 750                          invertedtrans, depth);
 751
 752   /* A = A1 inv(U).  */
 753   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 754
 755   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 756                                              A, B1, a1);
 757 }
 758
 759 /* Compute the loop bounds for the target space, using the bounds of
 760    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 761    The target space loop bounds are computed by multiplying the triangular
 762    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 763    the loop steps (positive or negative) is then used to swap the bounds if
 764    the loop counts downwards.
 765    Return the target loopnest.  */
 766
 767 static lambda_loopnest
 768 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 769                              lambda_trans_matrix H, lambda_vector stepsigns)
 770 {
 771   lambda_matrix inverse, H1;
 772   int determinant, i, j;
 773   int gcd1, gcd2;
 774   int factor;
 775
 776   lambda_loopnest target_nest;
 777   int depth, invariants;
 778   lambda_matrix target;
 779
 780   lambda_loop auxillary_loop, target_loop;
 781   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 782
 783   depth = LN_DEPTH (auxillary_nest);
 784   invariants = LN_INVARIANTS (auxillary_nest);
 785
 786   inverse = lambda_matrix_new (depth, depth);
 787   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 788
 789   /* H1 is H excluding its diagonal.  */
 790   H1 = lambda_matrix_new (depth, depth);
 791   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 792
 793   for (i = 0; i < depth; i++)
 794     H1[i][i] = 0;
 795
 796   /* Computes the linear offsets of the loop bounds.  */
 797   target = lambda_matrix_new (depth, depth);
 798   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 799
 800   target_nest = lambda_loopnest_new (depth, invariants);
 801
 802   for (i = 0; i < depth; i++)
 803     {
 804
 805       /* Get a new loop structure.  */
 806       target_loop = lambda_loop_new ();
 807       LN_LOOPS (target_nest)[i] = target_loop;
 808
 809       /* Computes the gcd of the coefficients of the linear part.  */
 810       gcd1 = lambda_vector_gcd (target[i], i);
 811
 812       /* Include the denominator in the GCD.  */
 813       gcd1 = gcd (gcd1, determinant);
 814
 815       /* Now divide through by the gcd.  */
 816       for (j = 0; j < i; j++)
 817         target[i][j] = target[i][j] / gcd1;
 818
 819       expression = lambda_linear_expression_new (depth, invariants);
 820       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 821       LLE_DENOMINATOR (expression) = determinant / gcd1;
 822       LLE_CONSTANT (expression) = 0;
 823       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 824                            invariants);
 825       LL_LINEAR_OFFSET (target_loop) = expression;
 826     }
 827
 828   /* For each loop, compute the new bounds from H.  */
 829   for (i = 0; i < depth; i++)
 830     {
 831       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 832       target_loop = LN_LOOPS (target_nest)[i];
 833       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 834       factor = LTM_MATRIX (H)[i][i];
 835
 836       /* First we do the lower bound.  */
 837       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 838
 839       for (; auxillary_expr != NULL;
 840            auxillary_expr = LLE_NEXT (auxillary_expr))
 841         {
 842           target_expr = lambda_linear_expression_new (depth, invariants);
 843           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 844                                      depth, inverse, depth,
 845                                      LLE_COEFFICIENTS (target_expr));
 846           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 847                                     LLE_COEFFICIENTS (target_expr), depth,
 848                                     factor);
 849
 850           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 851           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 852                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 853                               invariants);
 854           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 855                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 856                                     invariants, factor);
 857           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 858
 859           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 860             {
 861               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 862                 * determinant;
 863               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 864                                         (target_expr),
 865                                         LLE_INVARIANT_COEFFICIENTS
 866                                         (target_expr), invariants,
 867                                         determinant);
 868               LLE_DENOMINATOR (target_expr) =
 869                 LLE_DENOMINATOR (target_expr) * determinant;
 870             }
 871           /* Find the gcd and divide by it here, rather than doing it
 872              at the tree level.  */
 873           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 874           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 875                                     invariants);
 876           gcd1 = gcd (gcd1, gcd2);
 877           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 878           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 879           for (j = 0; j < depth; j++)
 880             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 881           for (j = 0; j < invariants; j++)
 882             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 883           LLE_CONSTANT (target_expr) /= gcd1;
 884           LLE_DENOMINATOR (target_expr) /= gcd1;
 885           /* Ignore if identical to existing bound.  */
 886           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 887                           invariants))
 888             {
 889               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 890               LL_LOWER_BOUND (target_loop) = target_expr;
 891             }
 892         }
 893       /* Now do the upper bound.  */
 894       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 895
 896       for (; auxillary_expr != NULL;
 897            auxillary_expr = LLE_NEXT (auxillary_expr))
 898         {
 899           target_expr = lambda_linear_expression_new (depth, invariants);
 900           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 901                                      depth, inverse, depth,
 902                                      LLE_COEFFICIENTS (target_expr));
 903           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 904                                     LLE_COEFFICIENTS (target_expr), depth,
 905                                     factor);
 906           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 907           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 908                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 909                               invariants);
 910           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 911                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 912                                     invariants, factor);
 913           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 914
 915           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 916             {
 917               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 918                 * determinant;
 919               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 920                                         (target_expr),
 921                                         LLE_INVARIANT_COEFFICIENTS
 922                                         (target_expr), invariants,
 923                                         determinant);
 924               LLE_DENOMINATOR (target_expr) =
 925                 LLE_DENOMINATOR (target_expr) * determinant;
 926             }
 927           /* Find the gcd and divide by it here, instead of at the
 928              tree level.  */
 929           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 930           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 931                                     invariants);
 932           gcd1 = gcd (gcd1, gcd2);
 933           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 934           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 935           for (j = 0; j < depth; j++)
 936             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 937           for (j = 0; j < invariants; j++)
 938             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 939           LLE_CONSTANT (target_expr) /= gcd1;
 940           LLE_DENOMINATOR (target_expr) /= gcd1;
 941           /* Ignore if equal to existing bound.  */
 942           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 943                           invariants))
 944             {
 945               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 946               LL_UPPER_BOUND (target_loop) = target_expr;
 947             }
 948         }
 949     }
 950   for (i = 0; i < depth; i++)
 951     {
 952       target_loop = LN_LOOPS (target_nest)[i];
 953       /* If necessary, exchange the upper and lower bounds and negate
 954          the step size.  */
 955       if (stepsigns[i] < 0)
 956         {
 957           LL_STEP (target_loop) *= -1;
 958           tmp_expr = LL_LOWER_BOUND (target_loop);
 959           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 960           LL_UPPER_BOUND (target_loop) = tmp_expr;
 961         }
 962     }
 963   return target_nest;
 964 }
 965
 966 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 967    result.  */
 968
 969 static lambda_vector
 970 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 971 {
 972   lambda_matrix matrix, H;
 973   int size;
 974   lambda_vector newsteps;
 975   int i, j, factor, minimum_column;
 976   int temp;
 977
 978   matrix = LTM_MATRIX (trans);
 979   size = LTM_ROWSIZE (trans);
 980   H = lambda_matrix_new (size, size);
 981
 982   newsteps = lambda_vector_new (size);
 983   lambda_vector_copy (stepsigns, newsteps, size);
 984
 985   lambda_matrix_copy (matrix, H, size, size);
 986
 987   for (j = 0; j < size; j++)
 988     {
 989       lambda_vector row;
 990       row = H[j];
 991       for (i = j; i < size; i++)
 992         if (row[i] < 0)
 993           lambda_matrix_col_negate (H, size, i);
 994       while (lambda_vector_first_nz (row, size, j + 1) < size)
 995         {
 996           minimum_column = lambda_vector_min_nz (row, size, j);
 997           lambda_matrix_col_exchange (H, size, j, minimum_column);
 998
 999           temp = newsteps[j];
1000           newsteps[j] = newsteps[minimum_column];
1001           newsteps[minimum_column] = temp;
1002
1003           for (i = j + 1; i < size; i++)
1004             {
1005               factor = row[i] / row[j];
1006               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1007             }
1008         }
1009     }
1010   return newsteps;
1011 }
1012
1013 /* Transform NEST according to TRANS, and return the new loopnest.
1014    This involves
1015    1. Computing a lattice base for the transformation
1016    2. Composing the dense base with the specified transformation (TRANS)
1017    3. Decomposing the combined transformation into a lower triangular portion,
1018    and a unimodular portion.
1019    4. Computing the auxiliary nest using the unimodular portion.
1020    5. Computing the target nest using the auxiliary nest and the lower
1021    triangular portion.  */
1022
1023 lambda_loopnest
1024 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1025 {
1026   lambda_loopnest auxillary_nest, target_nest;
1027
1028   int depth, invariants;
1029   int i, j;
1030   lambda_lattice lattice;
1031   lambda_trans_matrix trans1, H, U;
1032   lambda_loop loop;
1033   lambda_linear_expression expression;
1034   lambda_vector origin;
1035   lambda_matrix origin_invariants;
1036   lambda_vector stepsigns;
1037   int f;
1038
1039   depth = LN_DEPTH (nest);
1040   invariants = LN_INVARIANTS (nest);
1041
1042   /* Keep track of the signs of the loop steps.  */
1043   stepsigns = lambda_vector_new (depth);
1044   for (i = 0; i < depth; i++)
1045     {
1046       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1047         stepsigns[i] = 1;
1048       else
1049         stepsigns[i] = -1;
1050     }
1051
1052   /* Compute the lattice base.  */
1053   lattice = lambda_lattice_compute_base (nest);
1054   trans1 = lambda_trans_matrix_new (depth, depth);
1055
1056   /* Multiply the transformation matrix by the lattice base.  */
1057
1058   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1059                       LTM_MATRIX (trans1), depth, depth, depth);
1060
1061   /* Compute the Hermite normal form for the new transformation matrix.  */
1062   H = lambda_trans_matrix_new (depth, depth);
1063   U = lambda_trans_matrix_new (depth, depth);
1064   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1065                          LTM_MATRIX (U));
1066
1067   /* Compute the auxiliary loop nest's space from the unimodular
1068      portion.  */
1069   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1070
1071   /* Compute the loop step signs from the old step signs and the
1072      transformation matrix.  */
1073   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1074
1075   /* Compute the target loop nest space from the auxiliary nest and
1076      the lower triangular matrix H.  */
1077   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1078   origin = lambda_vector_new (depth);
1079   origin_invariants = lambda_matrix_new (depth, invariants);
1080   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1081                              LATTICE_ORIGIN (lattice), origin);
1082   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1083                       origin_invariants, depth, depth, invariants);
1084
1085   for (i = 0; i < depth; i++)
1086     {
1087       loop = LN_LOOPS (target_nest)[i];
1088       expression = LL_LINEAR_OFFSET (loop);
1089       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1090         f = 1;
1091       else
1092         f = LLE_DENOMINATOR (expression);
1093
1094       LLE_CONSTANT (expression) += f * origin[i];
1095
1096       for (j = 0; j < invariants; j++)
1097         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1098           f * origin_invariants[i][j];
1099     }
1100
1101   return target_nest;
1102
1103 }
1104
1105 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1106    return the new expression.  DEPTH is the depth of the loopnest.
1107    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1108    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1109    is the amount we have to add/subtract from the expression because of the
1110    type of comparison it is used in.  */
1111
1112 static lambda_linear_expression
1113 gcc_tree_to_linear_expression (int depth, tree expr,
1114                                VEC(tree,heap) *outerinductionvars,
1115                                VEC(tree,heap) *invariants, int extra)
1116 {
1117   lambda_linear_expression lle = NULL;
1118   switch (TREE_CODE (expr))
1119     {
1120     case INTEGER_CST:
1121       {
1122         lle = lambda_linear_expression_new (depth, 2 * depth);
1123         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1124         if (extra != 0)
1125           LLE_CONSTANT (lle) += extra;
1126
1127         LLE_DENOMINATOR (lle) = 1;
1128       }
1129       break;
1130     case SSA_NAME:
1131       {
1132         tree iv, invar;
1133         size_t i;
1134         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1135           if (iv != NULL)
1136             {
1137               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1138                 {
1139                   lle = lambda_linear_expression_new (depth, 2 * depth);
1140                   LLE_COEFFICIENTS (lle)[i] = 1;
1141                   if (extra != 0)
1142                     LLE_CONSTANT (lle) = extra;
1143
1144                   LLE_DENOMINATOR (lle) = 1;
1145                 }
1146             }
1147         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1148           if (invar != NULL)
1149             {
1150               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1151                 {
1152                   lle = lambda_linear_expression_new (depth, 2 * depth);
1153                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1154                   if (extra != 0)
1155                     LLE_CONSTANT (lle) = extra;
1156                   LLE_DENOMINATOR (lle) = 1;
1157                 }
1158             }
1159       }
1160       break;
1161     default:
1162       return NULL;
1163     }
1164
1165   return lle;
1166 }
1167
1168 /* Return the depth of the loopnest NEST */
1169
1170 static int
1171 depth_of_nest (struct loop *nest)
1172 {
1173   size_t depth = 0;
1174   while (nest)
1175     {
1176       depth++;
1177       nest = nest->inner;
1178     }
1179   return depth;
1180 }
1181
1182
1183 /* Return true if OP is invariant in LOOP and all outer loops.  */
1184
1185 static bool
1186 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1187 {
1188   if (is_gimple_min_invariant (op))
1189     return true;
1190   if (loop->depth == 0)
1191     return true;
1192   if (!expr_invariant_in_loop_p (loop, op))
1193     return false;
1194   if (loop->outer
1195       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1196     return false;
1197   return true;
1198 }
1199
1200 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1201    or NULL if it could not be converted.
1202    DEPTH is the depth of the loop.
1203    INVARIANTS is a pointer to the array of loop invariants.
1204    The induction variable for this loop should be stored in the parameter
1205    OURINDUCTIONVAR.
1206    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1207
1208 static lambda_loop
1209 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1210                          VEC(tree,heap) ** invariants,
1211                          tree * ourinductionvar,
1212                          VEC(tree,heap) * outerinductionvars,
1213                          VEC(tree,heap) ** lboundvars,
1214                          VEC(tree,heap) ** uboundvars,
1215                          VEC(int,heap) ** steps)
1216 {
1217   tree phi;
1218   tree exit_cond;
1219   tree access_fn, inductionvar;
1220   tree step;
1221   lambda_loop lloop = NULL;
1222   lambda_linear_expression lbound, ubound;
1223   tree test;
1224   int stepint;
1225   int extra = 0;
1226   tree lboundvar, uboundvar, uboundresult;
1227
1228   /* Find out induction var and exit condition.  */
1229   inductionvar = find_induction_var_from_exit_cond (loop);
1230   exit_cond = get_loop_exit_condition (loop);
1231
1232   if (inductionvar == NULL || exit_cond == NULL)
1233     {
1234       if (dump_file && (dump_flags & TDF_DETAILS))
1235         fprintf (dump_file,
1236                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1237       return NULL;
1238     }
1239
1240   test = TREE_OPERAND (exit_cond, 0);
1241
1242   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1243     {
1244
1245       if (dump_file && (dump_flags & TDF_DETAILS))
1246         fprintf (dump_file,
1247                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1248
1249       return NULL;
1250     }
1251
1252   phi = SSA_NAME_DEF_STMT (inductionvar);
1253   if (TREE_CODE (phi) != PHI_NODE)
1254     {
1255       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1256       if (!phi)
1257         {
1258
1259           if (dump_file && (dump_flags & TDF_DETAILS))
1260             fprintf (dump_file,
1261                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1262
1263           return NULL;
1264         }
1265
1266       phi = SSA_NAME_DEF_STMT (phi);
1267       if (TREE_CODE (phi) != PHI_NODE)
1268         {
1269
1270           if (dump_file && (dump_flags & TDF_DETAILS))
1271             fprintf (dump_file,
1272                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1273           return NULL;
1274         }
1275
1276     }
1277
1278   /* The induction variable name/version we want to put in the array is the
1279      result of the induction variable phi node.  */
1280   *ourinductionvar = PHI_RESULT (phi);
1281   access_fn = instantiate_parameters
1282     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1283   if (access_fn == chrec_dont_know)
1284     {
1285       if (dump_file && (dump_flags & TDF_DETAILS))
1286         fprintf (dump_file,
1287                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1288
1289       return NULL;
1290     }
1291
1292   step = evolution_part_in_loop_num (access_fn, loop->num);
1293   if (!step || step == chrec_dont_know)
1294     {
1295       if (dump_file && (dump_flags & TDF_DETAILS))
1296         fprintf (dump_file,
1297                  "Unable to convert loop: Cannot determine step of loop.\n");
1298
1299       return NULL;
1300     }
1301   if (TREE_CODE (step) != INTEGER_CST)
1302     {
1303
1304       if (dump_file && (dump_flags & TDF_DETAILS))
1305         fprintf (dump_file,
1306                  "Unable to convert loop: Step of loop is not integer.\n");
1307       return NULL;
1308     }
1309
1310   stepint = TREE_INT_CST_LOW (step);
1311
1312   /* Only want phis for induction vars, which will have two
1313      arguments.  */
1314   if (PHI_NUM_ARGS (phi) != 2)
1315     {
1316       if (dump_file && (dump_flags & TDF_DETAILS))
1317         fprintf (dump_file,
1318                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1319       return NULL;
1320     }
1321
1322   /* Another induction variable check. One argument's source should be
1323      in the loop, one outside the loop.  */
1324   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1325       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1326     {
1327
1328       if (dump_file && (dump_flags & TDF_DETAILS))
1329         fprintf (dump_file,
1330                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1331
1332       return NULL;
1333     }
1334
1335   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1336     {
1337       lboundvar = PHI_ARG_DEF (phi, 1);
1338       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1339                                               outerinductionvars, *invariants,
1340                                               0);
1341     }
1342   else
1343     {
1344       lboundvar = PHI_ARG_DEF (phi, 0);
1345       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1346                                               outerinductionvars, *invariants,
1347                                               0);
1348     }
1349
1350   if (!lbound)
1351     {
1352
1353       if (dump_file && (dump_flags & TDF_DETAILS))
1354         fprintf (dump_file,
1355                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1356
1357       return NULL;
1358     }
1359   /* One part of the test may be a loop invariant tree.  */
1360   VEC_reserve (tree, heap, *invariants, 1);
1361   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1362       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1363     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1364   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1365            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1366     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1367
1368   /* The non-induction variable part of the test is the upper bound variable.
1369    */
1370   if (TREE_OPERAND (test, 0) == inductionvar)
1371     uboundvar = TREE_OPERAND (test, 1);
1372   else
1373     uboundvar = TREE_OPERAND (test, 0);
1374
1375
1376   /* We only size the vectors assuming we have, at max, 2 times as many
1377      invariants as we do loops (one for each bound).
1378      This is just an arbitrary number, but it has to be matched against the
1379      code below.  */
1380   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1381
1382
1383   /* We might have some leftover.  */
1384   if (TREE_CODE (test) == LT_EXPR)
1385     extra = -1 * stepint;
1386   else if (TREE_CODE (test) == NE_EXPR)
1387     extra = -1 * stepint;
1388   else if (TREE_CODE (test) == GT_EXPR)
1389     extra = -1 * stepint;
1390   else if (TREE_CODE (test) == EQ_EXPR)
1391     extra = 1 * stepint;
1392
1393   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1394                                           outerinductionvars,
1395                                           *invariants, extra);
1396   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1397                          build_int_cst (TREE_TYPE (uboundvar), extra));
1398   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1399   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1400   VEC_safe_push (int, heap, *steps, stepint);
1401   if (!ubound)
1402     {
1403       if (dump_file && (dump_flags & TDF_DETAILS))
1404         fprintf (dump_file,
1405                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1406       return NULL;
1407     }
1408
1409   lloop = lambda_loop_new ();
1410   LL_STEP (lloop) = stepint;
1411   LL_LOWER_BOUND (lloop) = lbound;
1412   LL_UPPER_BOUND (lloop) = ubound;
1413   return lloop;
1414 }
1415
1416 /* Given a LOOP, find the induction variable it is testing against in the exit
1417    condition.  Return the induction variable if found, NULL otherwise.  */
1418
1419 static tree
1420 find_induction_var_from_exit_cond (struct loop *loop)
1421 {
1422   tree expr = get_loop_exit_condition (loop);
1423   tree ivarop;
1424   tree test;
1425   if (expr == NULL_TREE)
1426     return NULL_TREE;
1427   if (TREE_CODE (expr) != COND_EXPR)
1428     return NULL_TREE;
1429   test = TREE_OPERAND (expr, 0);
1430   if (!COMPARISON_CLASS_P (test))
1431     return NULL_TREE;
1432
1433   /* Find the side that is invariant in this loop. The ivar must be the other
1434      side.  */
1435
1436   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1437       ivarop = TREE_OPERAND (test, 1);
1438   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1439       ivarop = TREE_OPERAND (test, 0);
1440   else
1441     return NULL_TREE;
1442
1443   if (TREE_CODE (ivarop) != SSA_NAME)
1444     return NULL_TREE;
1445   return ivarop;
1446 }
1447
1448 DEF_VEC_P(lambda_loop);
1449 DEF_VEC_ALLOC_P(lambda_loop,heap);
1450
1451 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1452    Return the new loop nest.
1453    INDUCTIONVARS is a pointer to an array of induction variables for the
1454    loopnest that will be filled in during this process.
1455    INVARIANTS is a pointer to an array of invariants that will be filled in
1456    during this process.  */
1457
1458 lambda_loopnest
1459 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1460                                  struct loop *loop_nest,
1461                                  VEC(tree,heap) **inductionvars,
1462                                  VEC(tree,heap) **invariants)
1463 {
1464   lambda_loopnest ret = NULL;
1465   struct loop *temp = loop_nest;
1466   int depth = depth_of_nest (loop_nest);
1467   size_t i;
1468   VEC(lambda_loop,heap) *loops = NULL;
1469   VEC(tree,heap) *uboundvars = NULL;
1470   VEC(tree,heap) *lboundvars  = NULL;
1471   VEC(int,heap) *steps = NULL;
1472   lambda_loop newloop;
1473   tree inductionvar = NULL;
1474   bool perfect_nest = perfect_nest_p (loop_nest);
1475
1476   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1477     goto fail;
1478
1479   while (temp)
1480     {
1481       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1482                                          &inductionvar, *inductionvars,
1483                                          &lboundvars, &uboundvars,
1484                                          &steps);
1485       if (!newloop)
1486         goto fail;
1487
1488       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1489       VEC_safe_push (lambda_loop, heap, loops, newloop);
1490       temp = temp->inner;
1491     }
1492
1493   if (!perfect_nest)
1494     {
1495       if (!perfect_nestify (currloops, loop_nest,
1496                             lboundvars, uboundvars, steps, *inductionvars))
1497         {
1498           if (dump_file)
1499             fprintf (dump_file,
1500                      "Not a perfect loop nest and couldn't convert to one.\n");
1501           goto fail;
1502         }
1503       else if (dump_file)
1504         fprintf (dump_file,
1505                  "Successfully converted loop nest to perfect loop nest.\n");
1506     }
1507
1508   ret = lambda_loopnest_new (depth, 2 * depth);
1509
1510   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1511     LN_LOOPS (ret)[i] = newloop;
1512
1513  fail:
1514   VEC_free (lambda_loop, heap, loops);
1515   VEC_free (tree, heap, uboundvars);
1516   VEC_free (tree, heap, lboundvars);
1517   VEC_free (int, heap, steps);
1518
1519   return ret;
1520 }
1521
1522 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1523    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1524    inserted for us are stored.  INDUCTION_VARS is the array of induction
1525    variables for the loop this LBV is from.  TYPE is the tree type to use for
1526    the variables and trees involved.  */
1527
1528 static tree
1529 lbv_to_gcc_expression (lambda_body_vector lbv,
1530                        tree type, VEC(tree,heap) *induction_vars,
1531                        tree *stmts_to_insert)
1532 {
1533   tree stmts, stmt, resvar, name;
1534   tree iv;
1535   size_t i;
1536   tree_stmt_iterator tsi;
1537
1538   /* Create a statement list and a linear expression temporary.  */
1539   stmts = alloc_stmt_list ();
1540   resvar = create_tmp_var (type, "lbvtmp");
1541   add_referenced_var (resvar);
1542
1543   /* Start at 0.  */
1544   stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1545   name = make_ssa_name (resvar, stmt);
1546   TREE_OPERAND (stmt, 0) = name;
1547   tsi = tsi_last (stmts);
1548   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1549
1550   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1551     {
1552       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1553         {
1554           tree newname;
1555           tree coeffmult;
1556
1557           /* newname = coefficient * induction_variable */
1558           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1559           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1560                          fold_build2 (MULT_EXPR, type, iv, coeffmult));
1561
1562           newname = make_ssa_name (resvar, stmt);
1563           TREE_OPERAND (stmt, 0) = newname;
1564           fold_stmt (&stmt);
1565           tsi = tsi_last (stmts);
1566           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1567
1568           /* name = name + newname */
1569           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1570                          build2 (PLUS_EXPR, type, name, newname));
1571           name = make_ssa_name (resvar, stmt);
1572           TREE_OPERAND (stmt, 0) = name;
1573           fold_stmt (&stmt);
1574           tsi = tsi_last (stmts);
1575           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1576
1577         }
1578     }
1579
1580   /* Handle any denominator that occurs.  */
1581   if (LBV_DENOMINATOR (lbv) != 1)
1582     {
1583       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1584       stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1585                      build2 (CEIL_DIV_EXPR, type, name, denominator));
1586       name = make_ssa_name (resvar, stmt);
1587       TREE_OPERAND (stmt, 0) = name;
1588       fold_stmt (&stmt);
1589       tsi = tsi_last (stmts);
1590       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1591     }
1592   *stmts_to_insert = stmts;
1593   return name;
1594 }
1595
1596 /* Convert a linear expression from coefficient and constant form to a
1597    gcc tree.
1598    Return the tree that represents the final value of the expression.
1599    LLE is the linear expression to convert.
1600    OFFSET is the linear offset to apply to the expression.
1601    TYPE is the tree type to use for the variables and math.
1602    INDUCTION_VARS is a vector of induction variables for the loops.
1603    INVARIANTS is a vector of the loop nest invariants.
1604    WRAP specifies what tree code to wrap the results in, if there is more than
1605    one (it is either MAX_EXPR, or MIN_EXPR).
1606    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1607    statements that need to be inserted for the linear expression.  */
1608
1609 static tree
1610 lle_to_gcc_expression (lambda_linear_expression lle,
1611                        lambda_linear_expression offset,
1612                        tree type,
1613                        VEC(tree,heap) *induction_vars,
1614                        VEC(tree,heap) *invariants,
1615                        enum tree_code wrap, tree *stmts_to_insert)
1616 {
1617   tree stmts, stmt, resvar, name;
1618   size_t i;
1619   tree_stmt_iterator tsi;
1620   tree iv, invar;
1621   VEC(tree,heap) *results = NULL;
1622
1623   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1624   name = NULL_TREE;
1625   /* Create a statement list and a linear expression temporary.  */
1626   stmts = alloc_stmt_list ();
1627   resvar = create_tmp_var (type, "lletmp");
1628   add_referenced_var (resvar);
1629
1630   /* Build up the linear expressions, and put the variable representing the
1631      result in the results array.  */
1632   for (; lle != NULL; lle = LLE_NEXT (lle))
1633     {
1634       /* Start at name = 0.  */
1635       stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1636       name = make_ssa_name (resvar, stmt);
1637       TREE_OPERAND (stmt, 0) = name;
1638       fold_stmt (&stmt);
1639       tsi = tsi_last (stmts);
1640       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1641
1642       /* First do the induction variables.
1643          at the end, name = name + all the induction variables added
1644          together.  */
1645       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1646         {
1647           if (LLE_COEFFICIENTS (lle)[i] != 0)
1648             {
1649               tree newname;
1650               tree mult;
1651               tree coeff;
1652
1653               /* mult = induction variable * coefficient.  */
1654               if (LLE_COEFFICIENTS (lle)[i] == 1)
1655                 {
1656                   mult = VEC_index (tree, induction_vars, i);
1657                 }
1658               else
1659                 {
1660                   coeff = build_int_cst (type,
1661                                          LLE_COEFFICIENTS (lle)[i]);
1662                   mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1663                 }
1664
1665               /* newname = mult */
1666               stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1667               newname = make_ssa_name (resvar, stmt);
1668               TREE_OPERAND (stmt, 0) = newname;
1669               fold_stmt (&stmt);
1670               tsi = tsi_last (stmts);
1671               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1672
1673               /* name = name + newname */
1674               stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1675                              build2 (PLUS_EXPR, type, name, newname));
1676               name = make_ssa_name (resvar, stmt);
1677               TREE_OPERAND (stmt, 0) = name;
1678               fold_stmt (&stmt);
1679               tsi = tsi_last (stmts);
1680               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1681             }
1682         }
1683
1684       /* Handle our invariants.
1685          At the end, we have name = name + result of adding all multiplied
1686          invariants.  */
1687       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1688         {
1689           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1690             {
1691               tree newname;
1692               tree mult;
1693               tree coeff;
1694               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1695               /* mult = invariant * coefficient  */
1696               if (invcoeff == 1)
1697                 {
1698                   mult = invar;
1699                 }
1700               else
1701                 {
1702                   coeff = build_int_cst (type, invcoeff);
1703                   mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1704                 }
1705
1706               /* newname = mult */
1707               stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1708               newname = make_ssa_name (resvar, stmt);
1709               TREE_OPERAND (stmt, 0) = newname;
1710               fold_stmt (&stmt);
1711               tsi = tsi_last (stmts);
1712               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1713
1714               /* name = name + newname */
1715               stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1716                              build2 (PLUS_EXPR, type, name, newname));
1717               name = make_ssa_name (resvar, stmt);
1718               TREE_OPERAND (stmt, 0) = name;
1719               fold_stmt (&stmt);
1720               tsi = tsi_last (stmts);
1721               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1722             }
1723         }
1724
1725       /* Now handle the constant.
1726          name = name + constant.  */
1727       if (LLE_CONSTANT (lle) != 0)
1728         {
1729           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1730                          build2 (PLUS_EXPR, type, name,
1731                                  build_int_cst (type, LLE_CONSTANT (lle))));
1732           name = make_ssa_name (resvar, stmt);
1733           TREE_OPERAND (stmt, 0) = name;
1734           fold_stmt (&stmt);
1735           tsi = tsi_last (stmts);
1736           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1737         }
1738
1739       /* Now handle the offset.
1740          name = name + linear offset.  */
1741       if (LLE_CONSTANT (offset) != 0)
1742         {
1743           stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1744                          build2 (PLUS_EXPR, type, name,
1745                                  build_int_cst (type, LLE_CONSTANT (offset))));
1746           name = make_ssa_name (resvar, stmt);
1747           TREE_OPERAND (stmt, 0) = name;
1748           fold_stmt (&stmt);
1749           tsi = tsi_last (stmts);
1750           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1751         }
1752
1753       /* Handle any denominator that occurs.  */
1754       if (LLE_DENOMINATOR (lle) != 1)
1755         {
1756           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1757           stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1758                          type, name, stmt);
1759           stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
1760
1761           /* name = {ceil, floor}(name/denominator) */
1762           name = make_ssa_name (resvar, stmt);
1763           TREE_OPERAND (stmt, 0) = name;
1764           tsi = tsi_last (stmts);
1765           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1766         }
1767       VEC_safe_push (tree, heap, results, name);
1768     }
1769
1770   /* Again, out of laziness, we don't handle this case yet.  It's not
1771      hard, it just hasn't occurred.  */
1772   gcc_assert (VEC_length (tree, results) <= 2);
1773
1774   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1775   if (VEC_length (tree, results) > 1)
1776     {
1777       tree op1 = VEC_index (tree, results, 0);
1778       tree op2 = VEC_index (tree, results, 1);
1779       stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1780                      build2 (wrap, type, op1, op2));
1781       name = make_ssa_name (resvar, stmt);
1782       TREE_OPERAND (stmt, 0) = name;
1783       tsi = tsi_last (stmts);
1784       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1785     }
1786
1787   VEC_free (tree, heap, results);
1788
1789   *stmts_to_insert = stmts;
1790   return name;
1791 }
1792
1793 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1794    it, back into gcc code.  This changes the
1795    loops, their induction variables, and their bodies, so that they
1796    match the transformed loopnest.
1797    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1798    loopnest.
1799    OLD_IVS is a vector of induction variables from the old loopnest.
1800    INVARIANTS is a vector of loop invariants from the old loopnest.
1801    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1802    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1803    NEW_LOOPNEST.  */
1804
1805 void
1806 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1807                                  VEC(tree,heap) *old_ivs,
1808                                  VEC(tree,heap) *invariants,
1809                                  lambda_loopnest new_loopnest,
1810                                  lambda_trans_matrix transform)
1811 {
1812   struct loop *temp;
1813   size_t i = 0;
1814   size_t depth = 0;
1815   VEC(tree,heap) *new_ivs = NULL;
1816   tree oldiv;
1817
1818   block_stmt_iterator bsi;
1819
1820   if (dump_file)
1821     {
1822       transform = lambda_trans_matrix_inverse (transform);
1823       fprintf (dump_file, "Inverse of transformation matrix:\n");
1824       print_lambda_trans_matrix (dump_file, transform);
1825     }
1826   depth = depth_of_nest (old_loopnest);
1827   temp = old_loopnest;
1828
1829   while (temp)
1830     {
1831       lambda_loop newloop;
1832       basic_block bb;
1833       edge exit;
1834       tree ivvar, ivvarinced, exitcond, stmts;
1835       enum tree_code testtype;
1836       tree newupperbound, newlowerbound;
1837       lambda_linear_expression offset;
1838       tree type;
1839       bool insert_after;
1840       tree inc_stmt;
1841
1842       oldiv = VEC_index (tree, old_ivs, i);
1843       type = TREE_TYPE (oldiv);
1844
1845       /* First, build the new induction variable temporary  */
1846
1847       ivvar = create_tmp_var (type, "lnivtmp");
1848       add_referenced_var (ivvar);
1849
1850       VEC_safe_push (tree, heap, new_ivs, ivvar);
1851
1852       newloop = LN_LOOPS (new_loopnest)[i];
1853
1854       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1855          cases for now.  */
1856       offset = LL_LINEAR_OFFSET (newloop);
1857
1858       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1859                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1860
1861       /* Now build the  new lower bounds, and insert the statements
1862          necessary to generate it on the loop preheader.  */
1863       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1864                                              LL_LINEAR_OFFSET (newloop),
1865                                              type,
1866                                              new_ivs,
1867                                              invariants, MAX_EXPR, &stmts);
1868       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1869       bsi_commit_edge_inserts ();
1870       /* Build the new upper bound and insert its statements in the
1871          basic block of the exit condition */
1872       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1873                                              LL_LINEAR_OFFSET (newloop),
1874                                              type,
1875                                              new_ivs,
1876                                              invariants, MIN_EXPR, &stmts);
1877       exit = temp->single_exit;
1878       exitcond = get_loop_exit_condition (temp);
1879       bb = bb_for_stmt (exitcond);
1880       bsi = bsi_start (bb);
1881       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1882
1883       /* Create the new iv.  */
1884
1885       standard_iv_increment_position (temp, &bsi, &insert_after);
1886       create_iv (newlowerbound,
1887                  build_int_cst (type, LL_STEP (newloop)),
1888                  ivvar, temp, &bsi, insert_after, &ivvar,
1889                  NULL);
1890
1891       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1892          dominate the block containing the exit condition.
1893          So we simply create our own incremented iv to use in the new exit
1894          test,  and let redundancy elimination sort it out.  */
1895       inc_stmt = build2 (PLUS_EXPR, type,
1896                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1897       inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1898                          inc_stmt);
1899       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1900       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1901       bsi = bsi_for_stmt (exitcond);
1902       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1903
1904       /* Replace the exit condition with the new upper bound
1905          comparison.  */
1906
1907       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1908
1909       /* We want to build a conditional where true means exit the loop, and
1910          false means continue the loop.
1911          So swap the testtype if this isn't the way things are.*/
1912
1913       if (exit->flags & EDGE_FALSE_VALUE)
1914         testtype = swap_tree_comparison (testtype);
1915
1916       COND_EXPR_COND (exitcond) = build2 (testtype,
1917                                           boolean_type_node,
1918                                           newupperbound, ivvarinced);
1919       update_stmt (exitcond);
1920       VEC_replace (tree, new_ivs, i, ivvar);
1921
1922       i++;
1923       temp = temp->inner;
1924     }
1925
1926   /* Rewrite uses of the old ivs so that they are now specified in terms of
1927      the new ivs.  */
1928
1929   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1930     {
1931       imm_use_iterator imm_iter;
1932       use_operand_p use_p;
1933       tree oldiv_def;
1934       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1935       tree stmt;
1936
1937       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1938         oldiv_def = PHI_RESULT (oldiv_stmt);
1939       else
1940         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1941       gcc_assert (oldiv_def != NULL_TREE);
1942
1943       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1944         {
1945           tree newiv, stmts;
1946           lambda_body_vector lbv, newlbv;
1947
1948           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1949
1950           /* Compute the new expression for the induction
1951              variable.  */
1952           depth = VEC_length (tree, new_ivs);
1953           lbv = lambda_body_vector_new (depth);
1954           LBV_COEFFICIENTS (lbv)[i] = 1;
1955
1956           newlbv = lambda_body_vector_compute_new (transform, lbv);
1957
1958           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1959                                          new_ivs, &stmts);
1960           bsi = bsi_for_stmt (stmt);
1961           /* Insert the statements to build that
1962              expression.  */
1963           bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1964
1965           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1966             propagate_value (use_p, newiv);
1967           update_stmt (stmt);
1968         }
1969     }
1970   VEC_free (tree, heap, new_ivs);
1971 }
1972
1973 /* Return TRUE if this is not interesting statement from the perspective of
1974    determining if we have a perfect loop nest.  */
1975
1976 static bool
1977 not_interesting_stmt (tree stmt)
1978 {
1979   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1980      loop, we would have already failed the number of exits tests.  */
1981   if (TREE_CODE (stmt) == LABEL_EXPR
1982       || TREE_CODE (stmt) == GOTO_EXPR
1983       || TREE_CODE (stmt) == COND_EXPR)
1984     return true;
1985   return false;
1986 }
1987
1988 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1989
1990 static bool
1991 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1992 {
1993   int i;
1994   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1995     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1996       if (PHI_ARG_DEF (phi, i) == def)
1997         return true;
1998   return false;
1999 }
2000
2001 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2002
2003 static bool
2004 stmt_uses_phi_result (tree stmt, tree phi_result)
2005 {
2006   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2007
2008   /* This is conservatively true, because we only want SIMPLE bumpers
2009      of the form x +- constant for our pass.  */
2010   return (use == phi_result);
2011 }
2012
2013 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2014    in-loop-edge in a phi node, and the operand it uses is the result of that
2015    phi node.
2016    I.E. i_29 = i_3 + 1
2017         i_3 = PHI (0, i_29);  */
2018
2019 static bool
2020 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2021 {
2022   tree use;
2023   tree def;
2024   imm_use_iterator iter;
2025   use_operand_p use_p;
2026
2027   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2028   if (!def)
2029     return false;
2030
2031   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2032     {
2033       use = USE_STMT (use_p);
2034       if (TREE_CODE (use) == PHI_NODE)
2035         {
2036           if (phi_loop_edge_uses_def (loop, use, def))
2037             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2038               return true;
2039         }
2040     }
2041   return false;
2042 }
2043
2044
2045 /* Return true if LOOP is a perfect loop nest.
2046    Perfect loop nests are those loop nests where all code occurs in the
2047    innermost loop body.
2048    If S is a program statement, then
2049
2050    i.e.
2051    DO I = 1, 20
2052        S1
2053        DO J = 1, 20
2054        ...
2055        END DO
2056    END DO
2057    is not a perfect loop nest because of S1.
2058
2059    DO I = 1, 20
2060       DO J = 1, 20
2061         S1
2062         ...
2063       END DO
2064    END DO
2065    is a perfect loop nest.
2066
2067    Since we don't have high level loops anymore, we basically have to walk our
2068    statements and ignore those that are there because the loop needs them (IE
2069    the induction variable increment, and jump back to the top of the loop).  */
2070
2071 bool
2072 perfect_nest_p (struct loop *loop)
2073 {
2074   basic_block *bbs;
2075   size_t i;
2076   tree exit_cond;
2077
2078   if (!loop->inner)
2079     return true;
2080   bbs = get_loop_body (loop);
2081   exit_cond = get_loop_exit_condition (loop);
2082   for (i = 0; i < loop->num_nodes; i++)
2083     {
2084       if (bbs[i]->loop_father == loop)
2085         {
2086           block_stmt_iterator bsi;
2087           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2088             {
2089               tree stmt = bsi_stmt (bsi);
2090               if (stmt == exit_cond
2091                   || not_interesting_stmt (stmt)
2092                   || stmt_is_bumper_for_loop (loop, stmt))
2093                 continue;
2094               free (bbs);
2095               return false;
2096             }
2097         }
2098     }
2099   free (bbs);
2100   /* See if the inner loops are perfectly nested as well.  */
2101   if (loop->inner)
2102     return perfect_nest_p (loop->inner);
2103   return true;
2104 }
2105
2106 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2107    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2108    avoid creating duplicate temporaries and FIRSTBSI is statement
2109    iterator where new temporaries should be inserted at the beginning
2110    of body basic block.  */
2111
2112 static void
2113 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2114                                 int xstep, tree y, tree yinit,
2115                                 htab_t replacements,
2116                                 block_stmt_iterator *firstbsi)
2117 {
2118   ssa_op_iter iter;
2119   use_operand_p use_p;
2120
2121   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2122     {
2123       tree use = USE_FROM_PTR (use_p);
2124       tree step = NULL_TREE;
2125       tree scev, init, val, var, setstmt;
2126       struct tree_map *h, in;
2127       void **loc;
2128
2129       /* Replace uses of X with Y right away.  */
2130       if (use == x)
2131         {
2132           SET_USE (use_p, y);
2133           continue;
2134         }
2135
2136       scev = instantiate_parameters (loop,
2137                                      analyze_scalar_evolution (loop, use));
2138
2139       if (scev == NULL || scev == chrec_dont_know)
2140         continue;
2141
2142       step = evolution_part_in_loop_num (scev, loop->num);
2143       if (step == NULL
2144           || step == chrec_dont_know
2145           || TREE_CODE (step) != INTEGER_CST
2146           || int_cst_value (step) != xstep)
2147         continue;
2148
2149       /* Use REPLACEMENTS hash table to cache already created
2150          temporaries.  */
2151       in.hash = htab_hash_pointer (use);
2152       in.from = use;
2153       h = htab_find_with_hash (replacements, &in, in.hash);
2154       if (h != NULL)
2155         {
2156           SET_USE (use_p, h->to);
2157           continue;
2158         }
2159
2160       /* USE which has the same step as X should be replaced
2161          with a temporary set to Y + YINIT - INIT.  */
2162       init = initial_condition_in_loop_num (scev, loop->num);
2163       gcc_assert (init != NULL && init != chrec_dont_know);
2164       if (TREE_TYPE (use) == TREE_TYPE (y))
2165         {
2166           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2167           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2168           if (val == y)
2169             {
2170               /* If X has the same type as USE, the same step
2171                  and same initial value, it can be replaced by Y.  */
2172               SET_USE (use_p, y);
2173               continue;
2174             }
2175         }
2176       else
2177         {
2178           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2179           val = fold_convert (TREE_TYPE (use), val);
2180           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2181         }
2182
2183       /* Create a temporary variable and insert it at the beginning
2184          of the loop body basic block, right after the PHI node
2185          which sets Y.  */
2186       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2187       add_referenced_var (var);
2188       val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2189       setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2190       var = make_ssa_name (var, setstmt);
2191       TREE_OPERAND (setstmt, 0) = var;
2192       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2193       update_stmt (setstmt);
2194       SET_USE (use_p, var);
2195       h = ggc_alloc (sizeof (struct tree_map));
2196       h->hash = in.hash;
2197       h->from = use;
2198       h->to = var;
2199       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2200       gcc_assert ((*(struct tree_map **)loc) == NULL);
2201       *(struct tree_map **) loc = h;
2202     }
2203 }
2204
2205 /* Return true if STMT is an exit PHI for LOOP */
2206
2207 static bool
2208 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2209 {
2210
2211   if (TREE_CODE (stmt) != PHI_NODE
2212       || PHI_NUM_ARGS (stmt) != 1
2213       || bb_for_stmt (stmt) != loop->single_exit->dest)
2214     return false;
2215
2216   return true;
2217 }
2218
2219 /* Return true if STMT can be put back into the loop INNER, by
2220    copying it to the beginning of that loop and changing the uses.  */
2221
2222 static bool
2223 can_put_in_inner_loop (struct loop *inner, tree stmt)
2224 {
2225   imm_use_iterator imm_iter;
2226   use_operand_p use_p;
2227
2228   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2229   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2230       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2231     return false;
2232
2233   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2234     {
2235       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2236         {
2237           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2238
2239           if (!flow_bb_inside_loop_p (inner, immbb))
2240             return false;
2241         }
2242     }
2243   return true;
2244 }
2245
2246 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2247 static bool
2248 can_put_after_inner_loop (struct loop *loop, tree stmt)
2249 {
2250   imm_use_iterator imm_iter;
2251   use_operand_p use_p;
2252
2253   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2254     return false;
2255
2256   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2257     {
2258       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2259         {
2260           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2261
2262           if (!dominated_by_p (CDI_DOMINATORS,
2263                                immbb,
2264                                loop->inner->header)
2265               && !can_put_in_inner_loop (loop->inner, stmt))
2266             return false;
2267         }
2268     }
2269   return true;
2270 }
2271
2272
2273
2274 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2275    perfect one.  At the moment, we only handle imperfect nests of
2276    depth 2, where all of the statements occur after the inner loop.  */
2277
2278 static bool
2279 can_convert_to_perfect_nest (struct loop *loop)
2280 {
2281   basic_block *bbs;
2282   tree exit_condition, phi;
2283   size_t i;
2284   block_stmt_iterator bsi;
2285   basic_block exitdest;
2286
2287   /* Can't handle triply nested+ loops yet.  */
2288   if (!loop->inner || loop->inner->inner)
2289     return false;
2290
2291   bbs = get_loop_body (loop);
2292   exit_condition = get_loop_exit_condition (loop);
2293   for (i = 0; i < loop->num_nodes; i++)
2294     {
2295       if (bbs[i]->loop_father == loop)
2296         {
2297           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2298             {
2299               tree stmt = bsi_stmt (bsi);
2300
2301               if (stmt == exit_condition
2302                   || not_interesting_stmt (stmt)
2303                   || stmt_is_bumper_for_loop (loop, stmt))
2304                 continue;
2305
2306               /* If this is a scalar operation that can be put back
2307                  into the inner loop, or after the inner loop, through
2308                  copying, then do so. This works on the theory that
2309                  any amount of scalar code we have to reduplicate
2310                  into or after the loops is less expensive that the
2311                  win we get from rearranging the memory walk
2312                  the loop is doing so that it has better
2313                  cache behavior.  */
2314               if (TREE_CODE (stmt) == MODIFY_EXPR)
2315                 {
2316                   use_operand_p use_a, use_b;
2317                   imm_use_iterator imm_iter;
2318                   ssa_op_iter op_iter, op_iter1;
2319                   tree op0 = TREE_OPERAND (stmt, 0);
2320                   tree scev = instantiate_parameters
2321                     (loop, analyze_scalar_evolution (loop, op0));
2322
2323                   /* If the IV is simple, it can be duplicated.  */
2324                   if (!automatically_generated_chrec_p (scev))
2325                     {
2326                       tree step = evolution_part_in_loop_num (scev, loop->num);
2327                       if (step && step != chrec_dont_know
2328                           && TREE_CODE (step) == INTEGER_CST)
2329                         continue;
2330                     }
2331
2332                   /* The statement should not define a variable used
2333                      in the inner loop.  */
2334                   if (TREE_CODE (op0) == SSA_NAME)
2335                     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2336                       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2337                           == loop->inner)
2338                         goto fail;
2339
2340                   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2341                     {
2342                       tree node, op = USE_FROM_PTR (use_a);
2343
2344                       /* The variables should not be used in both loops.  */
2345                       FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2346                       if (bb_for_stmt (USE_STMT (use_b))->loop_father
2347                           == loop->inner)
2348                         goto fail;
2349
2350                       /* The statement should not use the value of a
2351                          scalar that was modified in the loop.  */
2352                       node = SSA_NAME_DEF_STMT (op);
2353                       if (TREE_CODE (node) == PHI_NODE)
2354                         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2355                           {
2356                             tree arg = USE_FROM_PTR (use_b);
2357
2358                             if (TREE_CODE (arg) == SSA_NAME)
2359                               {
2360                                 tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2361
2362                                 if (bb_for_stmt (arg_stmt)->loop_father
2363                                     == loop->inner)
2364                                   goto fail;
2365                               }
2366                           }
2367                     }
2368
2369                   if (can_put_in_inner_loop (loop->inner, stmt)
2370                       || can_put_after_inner_loop (loop, stmt))
2371                     continue;
2372                 }
2373
2374               /* Otherwise, if the bb of a statement we care about isn't
2375                  dominated by the header of the inner loop, then we can't
2376                  handle this case right now.  This test ensures that the
2377                  statement comes completely *after* the inner loop.  */
2378               if (!dominated_by_p (CDI_DOMINATORS,
2379                                    bb_for_stmt (stmt),
2380                                    loop->inner->header))
2381                 goto fail;
2382             }
2383         }
2384     }
2385
2386   /* We also need to make sure the loop exit only has simple copy phis in it,
2387      otherwise we don't know how to transform it into a perfect nest right
2388      now.  */
2389   exitdest = loop->single_exit->dest;
2390
2391   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2392     if (PHI_NUM_ARGS (phi) != 1)
2393       goto fail;
2394
2395   free (bbs);
2396   return true;
2397
2398  fail:
2399   free (bbs);
2400   return false;
2401 }
2402
2403 /* Transform the loop nest into a perfect nest, if possible.
2404    LOOPS is the current struct loops *
2405    LOOP is the loop nest to transform into a perfect nest
2406    LBOUNDS are the lower bounds for the loops to transform
2407    UBOUNDS are the upper bounds for the loops to transform
2408    STEPS is the STEPS for the loops to transform.
2409    LOOPIVS is the induction variables for the loops to transform.
2410
2411    Basically, for the case of
2412
2413    FOR (i = 0; i < 50; i++)
2414     {
2415      FOR (j =0; j < 50; j++)
2416      {
2417         <whatever>
2418      }
2419      <some code>
2420     }
2421
2422    This function will transform it into a perfect loop nest by splitting the
2423    outer loop into two loops, like so:
2424
2425    FOR (i = 0; i < 50; i++)
2426    {
2427      FOR (j = 0; j < 50; j++)
2428      {
2429          <whatever>
2430      }
2431    }
2432
2433    FOR (i = 0; i < 50; i ++)
2434    {
2435     <some code>
2436    }
2437
2438    Return FALSE if we can't make this loop into a perfect nest.  */
2439
2440 static bool
2441 perfect_nestify (struct loops *loops,
2442                  struct loop *loop,
2443                  VEC(tree,heap) *lbounds,
2444                  VEC(tree,heap) *ubounds,
2445                  VEC(int,heap) *steps,
2446                  VEC(tree,heap) *loopivs)
2447 {
2448   basic_block *bbs;
2449   tree exit_condition;
2450   tree then_label, else_label, cond_stmt;
2451   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2452   int i;
2453   block_stmt_iterator bsi, firstbsi;
2454   bool insert_after;
2455   edge e;
2456   struct loop *newloop;
2457   tree phi;
2458   tree uboundvar;
2459   tree stmt;
2460   tree oldivvar, ivvar, ivvarinced;
2461   VEC(tree,heap) *phis = NULL;
2462   htab_t replacements = NULL;
2463
2464   /* Create the new loop.  */
2465   olddest = loop->single_exit->dest;
2466   preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2467   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2468
2469   /* Push the exit phi nodes that we are moving.  */
2470   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2471     {
2472       VEC_reserve (tree, heap, phis, 2);
2473       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2474       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2475     }
2476   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2477
2478   /* Remove the exit phis from the old basic block.  Make sure to set
2479      PHI_RESULT to null so it doesn't get released.  */
2480   while (phi_nodes (olddest) != NULL)
2481     {
2482       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2483       remove_phi_node (phi_nodes (olddest), NULL);
2484     }
2485
2486   /* and add them back to the new basic block.  */
2487   while (VEC_length (tree, phis) != 0)
2488     {
2489       tree def;
2490       tree phiname;
2491       def = VEC_pop (tree, phis);
2492       phiname = VEC_pop (tree, phis);
2493       phi = create_phi_node (phiname, preheaderbb);
2494       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2495     }
2496   flush_pending_stmts (e);
2497   VEC_free (tree, heap, phis);
2498
2499   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2500   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2501   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2502   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2503   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2504   cond_stmt = build3 (COND_EXPR, void_type_node,
2505                       build2 (NE_EXPR, boolean_type_node,
2506                               integer_one_node,
2507                               integer_zero_node),
2508                       then_label, else_label);
2509   bsi = bsi_start (bodybb);
2510   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2511   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2512   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2513   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2514
2515   /* Update the loop structures.  */
2516   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2517   newloop->header = headerbb;
2518   newloop->latch = latchbb;
2519   newloop->single_exit = e;
2520   add_bb_to_loop (latchbb, newloop);
2521   add_bb_to_loop (bodybb, newloop);
2522   add_bb_to_loop (headerbb, newloop);
2523   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2524   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2525   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2526                            loop->single_exit->src);
2527   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2528   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2529   /* Create the new iv.  */
2530   oldivvar = VEC_index (tree, loopivs, 0);
2531   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2532   add_referenced_var (ivvar);
2533   standard_iv_increment_position (newloop, &bsi, &insert_after);
2534   create_iv (VEC_index (tree, lbounds, 0),
2535              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2536              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2537
2538   /* Create the new upper bound.  This may be not just a variable, so we copy
2539      it to one just in case.  */
2540
2541   exit_condition = get_loop_exit_condition (newloop);
2542   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2543   add_referenced_var (uboundvar);
2544   stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
2545                  VEC_index (tree, ubounds, 0));
2546   uboundvar = make_ssa_name (uboundvar, stmt);
2547   TREE_OPERAND (stmt, 0) = uboundvar;
2548
2549   if (insert_after)
2550     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2551   else
2552     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2553   update_stmt (stmt);
2554   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2555                                             boolean_type_node,
2556                                             uboundvar,
2557                                             ivvarinced);
2558   update_stmt (exit_condition);
2559   replacements = htab_create_ggc (20, tree_map_hash,
2560                                   tree_map_eq, NULL);
2561   bbs = get_loop_body_in_dom_order (loop);
2562   /* Now move the statements, and replace the induction variable in the moved
2563      statements with the correct loop induction variable.  */
2564   oldivvar = VEC_index (tree, loopivs, 0);
2565   firstbsi = bsi_start (bodybb);
2566   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2567     {
2568       block_stmt_iterator tobsi = bsi_last (bodybb);
2569       if (bbs[i]->loop_father == loop)
2570         {
2571           /* If this is true, we are *before* the inner loop.
2572              If this isn't true, we are *after* it.
2573
2574              The only time can_convert_to_perfect_nest returns true when we
2575              have statements before the inner loop is if they can be moved
2576              into the inner loop.
2577
2578              The only time can_convert_to_perfect_nest returns true when we
2579              have statements after the inner loop is if they can be moved into
2580              the new split loop.  */
2581
2582           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2583             {
2584               block_stmt_iterator header_bsi
2585                 = bsi_after_labels (loop->inner->header);
2586
2587               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2588                 {
2589                   tree stmt = bsi_stmt (bsi);
2590
2591                   if (stmt == exit_condition
2592                       || not_interesting_stmt (stmt)
2593                       || stmt_is_bumper_for_loop (loop, stmt))
2594                     {
2595                       bsi_next (&bsi);
2596                       continue;
2597                     }
2598
2599                   bsi_move_before (&bsi, &header_bsi);
2600                 }
2601             }
2602           else
2603             {
2604               /* Note that the bsi only needs to be explicitly incremented
2605                  when we don't move something, since it is automatically
2606                  incremented when we do.  */
2607               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2608                 {
2609                   ssa_op_iter i;
2610                   tree n, stmt = bsi_stmt (bsi);
2611
2612                   if (stmt == exit_condition
2613                       || not_interesting_stmt (stmt)
2614                       || stmt_is_bumper_for_loop (loop, stmt))
2615                     {
2616                       bsi_next (&bsi);
2617                       continue;
2618                     }
2619
2620                   replace_uses_equiv_to_x_with_y
2621                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2622                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2623
2624                   bsi_move_before (&bsi, &tobsi);
2625
2626                   /* If the statement has any virtual operands, they may
2627                      need to be rewired because the original loop may
2628                      still reference them.  */
2629                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2630                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2631                 }
2632             }
2633
2634         }
2635     }
2636
2637   free (bbs);
2638   htab_delete (replacements);
2639   return perfect_nest_p (loop);
2640 }
2641
2642 /* Return true if TRANS is a legal transformation matrix that respects
2643    the dependence vectors in DISTS and DIRS.  The conservative answer
2644    is false.
2645
2646    "Wolfe proves that a unimodular transformation represented by the
2647    matrix T is legal when applied to a loop nest with a set of
2648    lexicographically non-negative distance vectors RDG if and only if
2649    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2650    i.e.: if and only if it transforms the lexicographically positive
2651    distance vectors to lexicographically positive vectors.  Note that
2652    a unimodular matrix must transform the zero vector (and only it) to
2653    the zero vector." S.Muchnick.  */
2654
2655 bool
2656 lambda_transform_legal_p (lambda_trans_matrix trans,
2657                           int nb_loops,
2658                           VEC (ddr_p, heap) *dependence_relations)
2659 {
2660   unsigned int i, j;
2661   lambda_vector distres;
2662   struct data_dependence_relation *ddr;
2663
2664   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2665               && LTM_ROWSIZE (trans) == nb_loops);
2666
2667   /* When there is an unknown relation in the dependence_relations, we
2668      know that it is no worth looking at this loop nest: give up.  */
2669   ddr = VEC_index (ddr_p, dependence_relations, 0);
2670   if (ddr == NULL)
2671     return true;
2672   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2673     return false;
2674
2675   distres = lambda_vector_new (nb_loops);
2676
2677   /* For each distance vector in the dependence graph.  */
2678   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2679     {
2680       /* Don't care about relations for which we know that there is no
2681          dependence, nor about read-read (aka. output-dependences):
2682          these data accesses can happen in any order.  */
2683       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2684           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2685         continue;
2686
2687       /* Conservatively answer: "this transformation is not valid".  */
2688       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2689         return false;
2690
2691       /* If the dependence could not be captured by a distance vector,
2692          conservatively answer that the transform is not valid.  */
2693       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2694         return false;
2695
2696       /* Compute trans.dist_vect */
2697       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2698         {
2699           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2700                                      DDR_DIST_VECT (ddr, j), distres);
2701
2702           if (!lambda_vector_lexico_pos (distres, nb_loops))
2703             return false;
2704         }
2705     }
2706   return true;
2707 }