contrib/gcc-4.1/gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20     02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "ggc.h"
  27 #include "tree.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "basic-block.h"
  31 #include "diagnostic.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44
  45 /* This loop nest code generation is based on non-singular matrix
  46    math.
  47
  48  A little terminology and a general sketch of the algorithm.  See "A singular
  49  loop transformation framework based on non-singular matrices" by Wei Li and
  50  Keshav Pingali for formal proofs that the various statements below are
  51  correct.
  52
  53  A loop iteration space represents the points traversed by the loop.  A point in the
  54  iteration space can be represented by a vector of size <loop depth>.  You can
  55  therefore represent the iteration space as an integral combinations of a set
  56  of basis vectors.
  57
  58  A loop iteration space is dense if every integer point between the loop
  59  bounds is a point in the iteration space.  Every loop with a step of 1
  60  therefore has a dense iteration space.
  61
  62  for i = 1 to 3, step 1 is a dense iteration space.
  63
  64  A loop iteration space is sparse if it is not dense.  That is, the iteration
  65  space skips integer points that are within the loop bounds.
  66
  67  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  68  2 is skipped.
  69
  70  Dense source spaces are easy to transform, because they don't skip any
  71  points to begin with.  Thus we can compute the exact bounds of the target
  72  space using min/max and floor/ceil.
  73
  74  For a dense source space, we take the transformation matrix, decompose it
  75  into a lower triangular part (H) and a unimodular part (U).
  76  We then compute the auxiliary space from the unimodular part (source loop
  77  nest . U = auxiliary space) , which has two important properties:
  78   1. It traverses the iterations in the same lexicographic order as the source
  79   space.
  80   2. It is a dense space when the source is a dense space (even if the target
  81   space is going to be sparse).
  82
  83  Given the auxiliary space, we use the lower triangular part to compute the
  84  bounds in the target space by simple matrix multiplication.
  85  The gaps in the target space (IE the new loop step sizes) will be the
  86  diagonals of the H matrix.
  87
  88  Sparse source spaces require another step, because you can't directly compute
  89  the exact bounds of the auxiliary and target space from the sparse space.
  90  Rather than try to come up with a separate algorithm to handle sparse source
  91  spaces directly, we just find a legal transformation matrix that gives you
  92  the sparse source space, from a dense space, and then transform the dense
  93  space.
  94
  95  For a regular sparse space, you can represent the source space as an integer
  96  lattice, and the base space of that lattice will always be dense.  Thus, we
  97  effectively use the lattice to figure out the transformation from the lattice
  98  base space, to the sparse iteration space (IE what transform was applied to
  99  the dense space to make it sparse).  We then compose this transform with the
 100  transformation matrix specified by the user (since our matrix transformations
 101  are closed under composition, this is okay).  We can then use the base space
 102  (which is dense) plus the composed transformation matrix, to compute the rest
 103  of the transform using the dense space algorithm above.
 104
 105  In other words, our sparse source space (B) is decomposed into a dense base
 106  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 107  We then compute the composition of L and the user transformation matrix (T),
 108  so that T is now a transform from A to the result, instead of from B to the
 109  result.
 110  IE A.(LT) = result instead of B.T = result
 111  Since A is now a dense source space, we can use the dense source space
 112  algorithm above to compute the result of applying transform (LT) to A.
 113
 114  Fourier-Motzkin elimination is used to compute the bounds of the base space
 115  of the lattice.  */
 116
 117 DEF_VEC_I(int);
 118 DEF_VEC_ALLOC_I(int,heap);
 119
 120 static bool perfect_nestify (struct loops *,
 121                              struct loop *, VEC(tree,heap) *,
 122                              VEC(tree,heap) *, VEC(int,heap) *,
 123                              VEC(tree,heap) *);
 124 /* Lattice stuff that is internal to the code generation algorithm.  */
 125
 126 typedef struct
 127 {
 128   /* Lattice base matrix.  */
 129   lambda_matrix base;
 130   /* Lattice dimension.  */
 131   int dimension;
 132   /* Origin vector for the coefficients.  */
 133   lambda_vector origin;
 134   /* Origin matrix for the invariants.  */
 135   lambda_matrix origin_invariants;
 136   /* Number of invariants.  */
 137   int invariants;
 138 } *lambda_lattice;
 139
 140 #define LATTICE_BASE(T) ((T)->base)
 141 #define LATTICE_DIMENSION(T) ((T)->dimension)
 142 #define LATTICE_ORIGIN(T) ((T)->origin)
 143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 145
 146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 147                        int, int);
 148 static lambda_lattice lambda_lattice_new (int, int);
 149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 150
 151 static tree find_induction_var_from_exit_cond (struct loop *);
 152 static bool can_convert_to_perfect_nest (struct loop *);
 153
 154 /* Create a new lambda body vector.  */
 155
 156 lambda_body_vector
 157 lambda_body_vector_new (int size)
 158 {
 159   lambda_body_vector ret;
 160
 161   ret = ggc_alloc (sizeof (*ret));
 162   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 163   LBV_SIZE (ret) = size;
 164   LBV_DENOMINATOR (ret) = 1;
 165   return ret;
 166 }
 167
 168 /* Compute the new coefficients for the vector based on the
 169   *inverse* of the transformation matrix.  */
 170
 171 lambda_body_vector
 172 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 173                                 lambda_body_vector vect)
 174 {
 175   lambda_body_vector temp;
 176   int depth;
 177
 178   /* Make sure the matrix is square.  */
 179   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 180
 181   depth = LTM_ROWSIZE (transform);
 182
 183   temp = lambda_body_vector_new (depth);
 184   LBV_DENOMINATOR (temp) =
 185     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 186   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 187                              LTM_MATRIX (transform), depth,
 188                              LBV_COEFFICIENTS (temp));
 189   LBV_SIZE (temp) = LBV_SIZE (vect);
 190   return temp;
 191 }
 192
 193 /* Print out a lambda body vector.  */
 194
 195 void
 196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 197 {
 198   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 199 }
 200
 201 /* Return TRUE if two linear expressions are equal.  */
 202
 203 static bool
 204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 205            int depth, int invariants)
 206 {
 207   int i;
 208
 209   if (lle1 == NULL || lle2 == NULL)
 210     return false;
 211   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 212     return false;
 213   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 214     return false;
 215   for (i = 0; i < depth; i++)
 216     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 217       return false;
 218   for (i = 0; i < invariants; i++)
 219     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 220         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 221       return false;
 222   return true;
 223 }
 224
 225 /* Create a new linear expression with dimension DIM, and total number
 226    of invariants INVARIANTS.  */
 227
 228 lambda_linear_expression
 229 lambda_linear_expression_new (int dim, int invariants)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = ggc_alloc_cleared (sizeof (*ret));
 234
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants)
 332 {
 333   lambda_loopnest ret;
 334   ret = ggc_alloc (sizeof (*ret));
 335
 336   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 337   LN_DEPTH (ret) = depth;
 338   LN_INVARIANTS (ret) = invariants;
 339
 340   return ret;
 341 }
 342
 343 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 344    character to use for loop names is given by START.  */
 345
 346 void
 347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 348 {
 349   int i;
 350   for (i = 0; i < LN_DEPTH (nest); i++)
 351     {
 352       fprintf (outfile, "Loop %c\n", start + i);
 353       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 354                          LN_INVARIANTS (nest), 'i');
 355       fprintf (outfile, "\n");
 356     }
 357 }
 358
 359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 360    of invariants.  */
 361
 362 static lambda_lattice
 363 lambda_lattice_new (int depth, int invariants)
 364 {
 365   lambda_lattice ret;
 366   ret = ggc_alloc (sizeof (*ret));
 367   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 368   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 369   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 370   LATTICE_DIMENSION (ret) = depth;
 371   LATTICE_INVARIANTS (ret) = invariants;
 372   return ret;
 373 }
 374
 375 /* Compute the lattice base for NEST.  The lattice base is essentially a
 376    non-singular transform from a dense base space to a sparse iteration space.
 377    We use it so that we don't have to specially handle the case of a sparse
 378    iteration space in other parts of the algorithm.  As a result, this routine
 379    only does something interesting (IE produce a matrix that isn't the
 380    identity matrix) if NEST is a sparse space.  */
 381
 382 static lambda_lattice
 383 lambda_lattice_compute_base (lambda_loopnest nest)
 384 {
 385   lambda_lattice ret;
 386   int depth, invariants;
 387   lambda_matrix base;
 388
 389   int i, j, step;
 390   lambda_loop loop;
 391   lambda_linear_expression expression;
 392
 393   depth = LN_DEPTH (nest);
 394   invariants = LN_INVARIANTS (nest);
 395
 396   ret = lambda_lattice_new (depth, invariants);
 397   base = LATTICE_BASE (ret);
 398   for (i = 0; i < depth; i++)
 399     {
 400       loop = LN_LOOPS (nest)[i];
 401       gcc_assert (loop);
 402       step = LL_STEP (loop);
 403       /* If we have a step of 1, then the base is one, and the
 404          origin and invariant coefficients are 0.  */
 405       if (step == 1)
 406         {
 407           for (j = 0; j < depth; j++)
 408             base[i][j] = 0;
 409           base[i][i] = 1;
 410           LATTICE_ORIGIN (ret)[i] = 0;
 411           for (j = 0; j < invariants; j++)
 412             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 413         }
 414       else
 415         {
 416           /* Otherwise, we need the lower bound expression (which must
 417              be an affine function)  to determine the base.  */
 418           expression = LL_LOWER_BOUND (loop);
 419           gcc_assert (expression && !LLE_NEXT (expression)
 420                       && LLE_DENOMINATOR (expression) == 1);
 421
 422           /* The lower triangular portion of the base is going to be the
 423              coefficient times the step */
 424           for (j = 0; j < i; j++)
 425             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 426               * LL_STEP (LN_LOOPS (nest)[j]);
 427           base[i][i] = step;
 428           for (j = i + 1; j < depth; j++)
 429             base[i][j] = 0;
 430
 431           /* Origin for this loop is the constant of the lower bound
 432              expression.  */
 433           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 434
 435           /* Coefficient for the invariants are equal to the invariant
 436              coefficients in the expression.  */
 437           for (j = 0; j < invariants; j++)
 438             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 439               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 440         }
 441     }
 442   return ret;
 443 }
 444
 445 /* Compute the greatest common denominator of two numbers (A and B) using
 446    Euclid's algorithm.  */
 447
 448 static int
 449 gcd (int a, int b)
 450 {
 451
 452   int x, y, z;
 453
 454   x = abs (a);
 455   y = abs (b);
 456
 457   while (x > 0)
 458     {
 459       z = y % x;
 460       y = x;
 461       x = z;
 462     }
 463
 464   return (y);
 465 }
 466
 467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 468
 469 static int
 470 gcd_vector (lambda_vector vector, int size)
 471 {
 472   int i;
 473   int gcd1 = 0;
 474
 475   if (size > 0)
 476     {
 477       gcd1 = vector[0];
 478       for (i = 1; i < size; i++)
 479         gcd1 = gcd (gcd1, vector[i]);
 480     }
 481   return gcd1;
 482 }
 483
 484 /* Compute the least common multiple of two numbers A and B .  */
 485
 486 static int
 487 lcm (int a, int b)
 488 {
 489   return (abs (a) * abs (b) / gcd (a, b));
 490 }
 491
 492 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 493    auxiliary nest.
 494    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 495    it is easy to calculate the answer and bounds.
 496    A sketch of how it works:
 497    Given a system of linear inequalities, ai * xj >= bk, you can always
 498    rewrite the constraints so they are all of the form
 499    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 500    in b1 ... bk, and some a in a1...ai)
 501    You can then eliminate this x from the non-constant inequalities by
 502    rewriting these as a <= b, x >= constant, and delete the x variable.
 503    You can then repeat this for any remaining x variables, and then we have
 504    an easy to use variable <= constant (or no variables at all) form that we
 505    can construct our bounds from.
 506
 507    In our case, each time we eliminate, we construct part of the bound from
 508    the ith variable, then delete the ith variable.
 509
 510    Remember the constant are in our vector a, our coefficient matrix is A,
 511    and our invariant coefficient matrix is B.
 512
 513    SIZE is the size of the matrices being passed.
 514    DEPTH is the loop nest depth.
 515    INVARIANTS is the number of loop invariants.
 516    A, B, and a are the coefficient matrix, invariant coefficient, and a
 517    vector of constants, respectively.  */
 518
 519 static lambda_loopnest
 520 compute_nest_using_fourier_motzkin (int size,
 521                                     int depth,
 522                                     int invariants,
 523                                     lambda_matrix A,
 524                                     lambda_matrix B,
 525                                     lambda_vector a)
 526 {
 527
 528   int multiple, f1, f2;
 529   int i, j, k;
 530   lambda_linear_expression expression;
 531   lambda_loop loop;
 532   lambda_loopnest auxillary_nest;
 533   lambda_matrix swapmatrix, A1, B1;
 534   lambda_vector swapvector, a1;
 535   int newsize;
 536
 537   A1 = lambda_matrix_new (128, depth);
 538   B1 = lambda_matrix_new (128, invariants);
 539   a1 = lambda_vector_new (128);
 540
 541   auxillary_nest = lambda_loopnest_new (depth, invariants);
 542
 543   for (i = depth - 1; i >= 0; i--)
 544     {
 545       loop = lambda_loop_new ();
 546       LN_LOOPS (auxillary_nest)[i] = loop;
 547       LL_STEP (loop) = 1;
 548
 549       for (j = 0; j < size; j++)
 550         {
 551           if (A[j][i] < 0)
 552             {
 553               /* Any linear expression in the matrix with a coefficient less
 554                  than 0 becomes part of the new lower bound.  */
 555               expression = lambda_linear_expression_new (depth, invariants);
 556
 557               for (k = 0; k < i; k++)
 558                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 559
 560               for (k = 0; k < invariants; k++)
 561                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 562
 563               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 564               LLE_CONSTANT (expression) = -1 * a[j];
 565
 566               /* Ignore if identical to the existing lower bound.  */
 567               if (!lle_equal (LL_LOWER_BOUND (loop),
 568                               expression, depth, invariants))
 569                 {
 570                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 571                   LL_LOWER_BOUND (loop) = expression;
 572                 }
 573
 574             }
 575           else if (A[j][i] > 0)
 576             {
 577               /* Any linear expression with a coefficient greater than 0
 578                  becomes part of the new upper bound.  */
 579               expression = lambda_linear_expression_new (depth, invariants);
 580               for (k = 0; k < i; k++)
 581                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 582
 583               for (k = 0; k < invariants; k++)
 584                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 585
 586               LLE_DENOMINATOR (expression) = A[j][i];
 587               LLE_CONSTANT (expression) = a[j];
 588
 589               /* Ignore if identical to the existing upper bound.  */
 590               if (!lle_equal (LL_UPPER_BOUND (loop),
 591                               expression, depth, invariants))
 592                 {
 593                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 594                   LL_UPPER_BOUND (loop) = expression;
 595                 }
 596
 597             }
 598         }
 599
 600       /* This portion creates a new system of linear inequalities by deleting
 601          the i'th variable, reducing the system by one variable.  */
 602       newsize = 0;
 603       for (j = 0; j < size; j++)
 604         {
 605           /* If the coefficient for the i'th variable is 0, then we can just
 606              eliminate the variable straightaway.  Otherwise, we have to
 607              multiply through by the coefficients we are eliminating.  */
 608           if (A[j][i] == 0)
 609             {
 610               lambda_vector_copy (A[j], A1[newsize], depth);
 611               lambda_vector_copy (B[j], B1[newsize], invariants);
 612               a1[newsize] = a[j];
 613               newsize++;
 614             }
 615           else if (A[j][i] > 0)
 616             {
 617               for (k = 0; k < size; k++)
 618                 {
 619                   if (A[k][i] < 0)
 620                     {
 621                       multiple = lcm (A[j][i], A[k][i]);
 622                       f1 = multiple / A[j][i];
 623                       f2 = -1 * multiple / A[k][i];
 624
 625                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 626                                             A1[newsize], depth);
 627                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 628                                             B1[newsize], invariants);
 629                       a1[newsize] = f1 * a[j] + f2 * a[k];
 630                       newsize++;
 631                     }
 632                 }
 633             }
 634         }
 635
 636       swapmatrix = A;
 637       A = A1;
 638       A1 = swapmatrix;
 639
 640       swapmatrix = B;
 641       B = B1;
 642       B1 = swapmatrix;
 643
 644       swapvector = a;
 645       a = a1;
 646       a1 = swapvector;
 647
 648       size = newsize;
 649     }
 650
 651   return auxillary_nest;
 652 }
 653
 654 /* Compute the loop bounds for the auxiliary space NEST.
 655    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 656    Given the original nest, this function will
 657    1. Convert the nest into matrix form, which consists of a matrix for the
 658    coefficients, a matrix for the
 659    invariant coefficients, and a vector for the constants.
 660    2. Use the matrix form to calculate the lattice base for the nest (which is
 661    a dense space)
 662    3. Compose the dense space transform with the user specified transform, to
 663    get a transform we can easily calculate transformed bounds for.
 664    4. Multiply the composed transformation matrix times the matrix form of the
 665    loop.
 666    5. Transform the newly created matrix (from step 4) back into a loop nest
 667    using fourier motzkin elimination to figure out the bounds.  */
 668
 669 static lambda_loopnest
 670 lambda_compute_auxillary_space (lambda_loopnest nest,
 671                                 lambda_trans_matrix trans)
 672 {
 673   lambda_matrix A, B, A1, B1;
 674   lambda_vector a, a1;
 675   lambda_matrix invertedtrans;
 676   int depth, invariants, size;
 677   int i, j;
 678   lambda_loop loop;
 679   lambda_linear_expression expression;
 680   lambda_lattice lattice;
 681
 682   depth = LN_DEPTH (nest);
 683   invariants = LN_INVARIANTS (nest);
 684
 685   /* Unfortunately, we can't know the number of constraints we'll have
 686      ahead of time, but this should be enough even in ridiculous loop nest
 687      cases. We must not go over this limit.  */
 688   A = lambda_matrix_new (128, depth);
 689   B = lambda_matrix_new (128, invariants);
 690   a = lambda_vector_new (128);
 691
 692   A1 = lambda_matrix_new (128, depth);
 693   B1 = lambda_matrix_new (128, invariants);
 694   a1 = lambda_vector_new (128);
 695
 696   /* Store the bounds in the equation matrix A, constant vector a, and
 697      invariant matrix B, so that we have Ax <= a + B.
 698      This requires a little equation rearranging so that everything is on the
 699      correct side of the inequality.  */
 700   size = 0;
 701   for (i = 0; i < depth; i++)
 702     {
 703       loop = LN_LOOPS (nest)[i];
 704
 705       /* First we do the lower bound.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_LOWER_BOUND (loop);
 708       else
 709         expression = LL_UPPER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 725              constants and single variables on   */
 726           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 727           a[size] *= -1;
 728           for (j = 0; j < invariants; j++)
 729             B[size][j] *= -1;
 730
 731           size++;
 732           /* Need to increase matrix sizes above.  */
 733           gcc_assert (size <= 127);
 734
 735         }
 736
 737       /* Then do the exact same thing for the upper bounds.  */
 738       if (LL_STEP (loop) > 0)
 739         expression = LL_UPPER_BOUND (loop);
 740       else
 741         expression = LL_LOWER_BOUND (loop);
 742
 743       for (; expression != NULL; expression = LLE_NEXT (expression))
 744         {
 745           /* Fill in the coefficient.  */
 746           for (j = 0; j < i; j++)
 747             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 748
 749           /* And the invariant coefficient.  */
 750           for (j = 0; j < invariants; j++)
 751             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 752
 753           /* And the constant.  */
 754           a[size] = LLE_CONSTANT (expression);
 755
 756           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 757           for (j = 0; j < i; j++)
 758             A[size][j] *= -1;
 759           A[size][i] = LLE_DENOMINATOR (expression);
 760           size++;
 761           /* Need to increase matrix sizes above.  */
 762           gcc_assert (size <= 127);
 763
 764         }
 765     }
 766
 767   /* Compute the lattice base x = base * y + origin, where y is the
 768      base space.  */
 769   lattice = lambda_lattice_compute_base (nest);
 770
 771   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 772
 773   /* A1 = A * L */
 774   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 775
 776   /* a1 = a - A * origin constant.  */
 777   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 778   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 779
 780   /* B1 = B - A * origin invariant.  */
 781   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 782                       invariants);
 783   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 784
 785   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 786      it by A1, then performing fourier motzkin.  */
 787
 788   invertedtrans = lambda_matrix_new (depth, depth);
 789
 790   /* Compute the inverse of U.  */
 791   lambda_matrix_inverse (LTM_MATRIX (trans),
 792                          invertedtrans, depth);
 793
 794   /* A = A1 inv(U).  */
 795   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 796
 797   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 798                                              A, B1, a1);
 799 }
 800
 801 /* Compute the loop bounds for the target space, using the bounds of
 802    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 803    The target space loop bounds are computed by multiplying the triangular
 804    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 805    the loop steps (positive or negative) is then used to swap the bounds if
 806    the loop counts downwards.
 807    Return the target loopnest.  */
 808
 809 static lambda_loopnest
 810 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 811                              lambda_trans_matrix H, lambda_vector stepsigns)
 812 {
 813   lambda_matrix inverse, H1;
 814   int determinant, i, j;
 815   int gcd1, gcd2;
 816   int factor;
 817
 818   lambda_loopnest target_nest;
 819   int depth, invariants;
 820   lambda_matrix target;
 821
 822   lambda_loop auxillary_loop, target_loop;
 823   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 824
 825   depth = LN_DEPTH (auxillary_nest);
 826   invariants = LN_INVARIANTS (auxillary_nest);
 827
 828   inverse = lambda_matrix_new (depth, depth);
 829   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 830
 831   /* H1 is H excluding its diagonal.  */
 832   H1 = lambda_matrix_new (depth, depth);
 833   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 834
 835   for (i = 0; i < depth; i++)
 836     H1[i][i] = 0;
 837
 838   /* Computes the linear offsets of the loop bounds.  */
 839   target = lambda_matrix_new (depth, depth);
 840   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 841
 842   target_nest = lambda_loopnest_new (depth, invariants);
 843
 844   for (i = 0; i < depth; i++)
 845     {
 846
 847       /* Get a new loop structure.  */
 848       target_loop = lambda_loop_new ();
 849       LN_LOOPS (target_nest)[i] = target_loop;
 850
 851       /* Computes the gcd of the coefficients of the linear part.  */
 852       gcd1 = gcd_vector (target[i], i);
 853
 854       /* Include the denominator in the GCD.  */
 855       gcd1 = gcd (gcd1, determinant);
 856
 857       /* Now divide through by the gcd.  */
 858       for (j = 0; j < i; j++)
 859         target[i][j] = target[i][j] / gcd1;
 860
 861       expression = lambda_linear_expression_new (depth, invariants);
 862       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 863       LLE_DENOMINATOR (expression) = determinant / gcd1;
 864       LLE_CONSTANT (expression) = 0;
 865       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 866                            invariants);
 867       LL_LINEAR_OFFSET (target_loop) = expression;
 868     }
 869
 870   /* For each loop, compute the new bounds from H.  */
 871   for (i = 0; i < depth; i++)
 872     {
 873       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 874       target_loop = LN_LOOPS (target_nest)[i];
 875       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 876       factor = LTM_MATRIX (H)[i][i];
 877
 878       /* First we do the lower bound.  */
 879       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 880
 881       for (; auxillary_expr != NULL;
 882            auxillary_expr = LLE_NEXT (auxillary_expr))
 883         {
 884           target_expr = lambda_linear_expression_new (depth, invariants);
 885           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 886                                      depth, inverse, depth,
 887                                      LLE_COEFFICIENTS (target_expr));
 888           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 889                                     LLE_COEFFICIENTS (target_expr), depth,
 890                                     factor);
 891
 892           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 893           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 894                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 895                               invariants);
 896           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 898                                     invariants, factor);
 899           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 900
 901           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 902             {
 903               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 904                 * determinant;
 905               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 906                                         (target_expr),
 907                                         LLE_INVARIANT_COEFFICIENTS
 908                                         (target_expr), invariants,
 909                                         determinant);
 910               LLE_DENOMINATOR (target_expr) =
 911                 LLE_DENOMINATOR (target_expr) * determinant;
 912             }
 913           /* Find the gcd and divide by it here, rather than doing it
 914              at the tree level.  */
 915           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 916           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 917                              invariants);
 918           gcd1 = gcd (gcd1, gcd2);
 919           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 920           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 921           for (j = 0; j < depth; j++)
 922             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 923           for (j = 0; j < invariants; j++)
 924             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 925           LLE_CONSTANT (target_expr) /= gcd1;
 926           LLE_DENOMINATOR (target_expr) /= gcd1;
 927           /* Ignore if identical to existing bound.  */
 928           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 929                           invariants))
 930             {
 931               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 932               LL_LOWER_BOUND (target_loop) = target_expr;
 933             }
 934         }
 935       /* Now do the upper bound.  */
 936       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 937
 938       for (; auxillary_expr != NULL;
 939            auxillary_expr = LLE_NEXT (auxillary_expr))
 940         {
 941           target_expr = lambda_linear_expression_new (depth, invariants);
 942           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 943                                      depth, inverse, depth,
 944                                      LLE_COEFFICIENTS (target_expr));
 945           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 946                                     LLE_COEFFICIENTS (target_expr), depth,
 947                                     factor);
 948           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 949           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 950                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 951                               invariants);
 952           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 954                                     invariants, factor);
 955           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 956
 957           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 958             {
 959               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 960                 * determinant;
 961               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 962                                         (target_expr),
 963                                         LLE_INVARIANT_COEFFICIENTS
 964                                         (target_expr), invariants,
 965                                         determinant);
 966               LLE_DENOMINATOR (target_expr) =
 967                 LLE_DENOMINATOR (target_expr) * determinant;
 968             }
 969           /* Find the gcd and divide by it here, instead of at the
 970              tree level.  */
 971           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 972           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 973                              invariants);
 974           gcd1 = gcd (gcd1, gcd2);
 975           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 976           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 977           for (j = 0; j < depth; j++)
 978             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 979           for (j = 0; j < invariants; j++)
 980             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 981           LLE_CONSTANT (target_expr) /= gcd1;
 982           LLE_DENOMINATOR (target_expr) /= gcd1;
 983           /* Ignore if equal to existing bound.  */
 984           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 985                           invariants))
 986             {
 987               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 988               LL_UPPER_BOUND (target_loop) = target_expr;
 989             }
 990         }
 991     }
 992   for (i = 0; i < depth; i++)
 993     {
 994       target_loop = LN_LOOPS (target_nest)[i];
 995       /* If necessary, exchange the upper and lower bounds and negate
 996          the step size.  */
 997       if (stepsigns[i] < 0)
 998         {
 999           LL_STEP (target_loop) *= -1;
1000           tmp_expr = LL_LOWER_BOUND (target_loop);
1001           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1002           LL_UPPER_BOUND (target_loop) = tmp_expr;
1003         }
1004     }
1005   return target_nest;
1006 }
1007
1008 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1009    result.  */
1010
1011 static lambda_vector
1012 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1013 {
1014   lambda_matrix matrix, H;
1015   int size;
1016   lambda_vector newsteps;
1017   int i, j, factor, minimum_column;
1018   int temp;
1019
1020   matrix = LTM_MATRIX (trans);
1021   size = LTM_ROWSIZE (trans);
1022   H = lambda_matrix_new (size, size);
1023
1024   newsteps = lambda_vector_new (size);
1025   lambda_vector_copy (stepsigns, newsteps, size);
1026
1027   lambda_matrix_copy (matrix, H, size, size);
1028
1029   for (j = 0; j < size; j++)
1030     {
1031       lambda_vector row;
1032       row = H[j];
1033       for (i = j; i < size; i++)
1034         if (row[i] < 0)
1035           lambda_matrix_col_negate (H, size, i);
1036       while (lambda_vector_first_nz (row, size, j + 1) < size)
1037         {
1038           minimum_column = lambda_vector_min_nz (row, size, j);
1039           lambda_matrix_col_exchange (H, size, j, minimum_column);
1040
1041           temp = newsteps[j];
1042           newsteps[j] = newsteps[minimum_column];
1043           newsteps[minimum_column] = temp;
1044
1045           for (i = j + 1; i < size; i++)
1046             {
1047               factor = row[i] / row[j];
1048               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1049             }
1050         }
1051     }
1052   return newsteps;
1053 }
1054
1055 /* Transform NEST according to TRANS, and return the new loopnest.
1056    This involves
1057    1. Computing a lattice base for the transformation
1058    2. Composing the dense base with the specified transformation (TRANS)
1059    3. Decomposing the combined transformation into a lower triangular portion,
1060    and a unimodular portion.
1061    4. Computing the auxiliary nest using the unimodular portion.
1062    5. Computing the target nest using the auxiliary nest and the lower
1063    triangular portion.  */
1064
1065 lambda_loopnest
1066 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1067 {
1068   lambda_loopnest auxillary_nest, target_nest;
1069
1070   int depth, invariants;
1071   int i, j;
1072   lambda_lattice lattice;
1073   lambda_trans_matrix trans1, H, U;
1074   lambda_loop loop;
1075   lambda_linear_expression expression;
1076   lambda_vector origin;
1077   lambda_matrix origin_invariants;
1078   lambda_vector stepsigns;
1079   int f;
1080
1081   depth = LN_DEPTH (nest);
1082   invariants = LN_INVARIANTS (nest);
1083
1084   /* Keep track of the signs of the loop steps.  */
1085   stepsigns = lambda_vector_new (depth);
1086   for (i = 0; i < depth; i++)
1087     {
1088       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1089         stepsigns[i] = 1;
1090       else
1091         stepsigns[i] = -1;
1092     }
1093
1094   /* Compute the lattice base.  */
1095   lattice = lambda_lattice_compute_base (nest);
1096   trans1 = lambda_trans_matrix_new (depth, depth);
1097
1098   /* Multiply the transformation matrix by the lattice base.  */
1099
1100   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1101                       LTM_MATRIX (trans1), depth, depth, depth);
1102
1103   /* Compute the Hermite normal form for the new transformation matrix.  */
1104   H = lambda_trans_matrix_new (depth, depth);
1105   U = lambda_trans_matrix_new (depth, depth);
1106   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1107                          LTM_MATRIX (U));
1108
1109   /* Compute the auxiliary loop nest's space from the unimodular
1110      portion.  */
1111   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1112
1113   /* Compute the loop step signs from the old step signs and the
1114      transformation matrix.  */
1115   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1116
1117   /* Compute the target loop nest space from the auxiliary nest and
1118      the lower triangular matrix H.  */
1119   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1120   origin = lambda_vector_new (depth);
1121   origin_invariants = lambda_matrix_new (depth, invariants);
1122   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1123                              LATTICE_ORIGIN (lattice), origin);
1124   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1125                       origin_invariants, depth, depth, invariants);
1126
1127   for (i = 0; i < depth; i++)
1128     {
1129       loop = LN_LOOPS (target_nest)[i];
1130       expression = LL_LINEAR_OFFSET (loop);
1131       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1132         f = 1;
1133       else
1134         f = LLE_DENOMINATOR (expression);
1135
1136       LLE_CONSTANT (expression) += f * origin[i];
1137
1138       for (j = 0; j < invariants; j++)
1139         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1140           f * origin_invariants[i][j];
1141     }
1142
1143   return target_nest;
1144
1145 }
1146
1147 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1148    return the new expression.  DEPTH is the depth of the loopnest.
1149    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1150    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1151    is the amount we have to add/subtract from the expression because of the
1152    type of comparison it is used in.  */
1153
1154 static lambda_linear_expression
1155 gcc_tree_to_linear_expression (int depth, tree expr,
1156                                VEC(tree,heap) *outerinductionvars,
1157                                VEC(tree,heap) *invariants, int extra)
1158 {
1159   lambda_linear_expression lle = NULL;
1160   switch (TREE_CODE (expr))
1161     {
1162     case INTEGER_CST:
1163       {
1164         lle = lambda_linear_expression_new (depth, 2 * depth);
1165         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1166         if (extra != 0)
1167           LLE_CONSTANT (lle) += extra;
1168
1169         LLE_DENOMINATOR (lle) = 1;
1170       }
1171       break;
1172     case SSA_NAME:
1173       {
1174         tree iv, invar;
1175         size_t i;
1176         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1177           if (iv != NULL)
1178             {
1179               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1180                 {
1181                   lle = lambda_linear_expression_new (depth, 2 * depth);
1182                   LLE_COEFFICIENTS (lle)[i] = 1;
1183                   if (extra != 0)
1184                     LLE_CONSTANT (lle) = extra;
1185
1186                   LLE_DENOMINATOR (lle) = 1;
1187                 }
1188             }
1189         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1190           if (invar != NULL)
1191             {
1192               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1193                 {
1194                   lle = lambda_linear_expression_new (depth, 2 * depth);
1195                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1196                   if (extra != 0)
1197                     LLE_CONSTANT (lle) = extra;
1198                   LLE_DENOMINATOR (lle) = 1;
1199                 }
1200             }
1201       }
1202       break;
1203     default:
1204       return NULL;
1205     }
1206
1207   return lle;
1208 }
1209
1210 /* Return the depth of the loopnest NEST */
1211
1212 static int
1213 depth_of_nest (struct loop *nest)
1214 {
1215   size_t depth = 0;
1216   while (nest)
1217     {
1218       depth++;
1219       nest = nest->inner;
1220     }
1221   return depth;
1222 }
1223
1224
1225 /* Return true if OP is invariant in LOOP and all outer loops.  */
1226
1227 static bool
1228 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1229 {
1230   if (is_gimple_min_invariant (op))
1231     return true;
1232   if (loop->depth == 0)
1233     return true;
1234   if (!expr_invariant_in_loop_p (loop, op))
1235     return false;
1236   if (loop->outer
1237       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1238     return false;
1239   return true;
1240 }
1241
1242 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1243    or NULL if it could not be converted.
1244    DEPTH is the depth of the loop.
1245    INVARIANTS is a pointer to the array of loop invariants.
1246    The induction variable for this loop should be stored in the parameter
1247    OURINDUCTIONVAR.
1248    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1249
1250 static lambda_loop
1251 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1252                          VEC(tree,heap) ** invariants,
1253                          tree * ourinductionvar,
1254                          VEC(tree,heap) * outerinductionvars,
1255                          VEC(tree,heap) ** lboundvars,
1256                          VEC(tree,heap) ** uboundvars,
1257                          VEC(int,heap) ** steps)
1258 {
1259   tree phi;
1260   tree exit_cond;
1261   tree access_fn, inductionvar;
1262   tree step;
1263   lambda_loop lloop = NULL;
1264   lambda_linear_expression lbound, ubound;
1265   tree test;
1266   int stepint;
1267   int extra = 0;
1268   tree lboundvar, uboundvar, uboundresult;
1269
1270   /* Find out induction var and exit condition.  */
1271   inductionvar = find_induction_var_from_exit_cond (loop);
1272   exit_cond = get_loop_exit_condition (loop);
1273
1274   if (inductionvar == NULL || exit_cond == NULL)
1275     {
1276       if (dump_file && (dump_flags & TDF_DETAILS))
1277         fprintf (dump_file,
1278                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1279       return NULL;
1280     }
1281
1282   test = TREE_OPERAND (exit_cond, 0);
1283
1284   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1285     {
1286
1287       if (dump_file && (dump_flags & TDF_DETAILS))
1288         fprintf (dump_file,
1289                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290
1291       return NULL;
1292     }
1293
1294   phi = SSA_NAME_DEF_STMT (inductionvar);
1295   if (TREE_CODE (phi) != PHI_NODE)
1296     {
1297       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1298       if (!phi)
1299         {
1300
1301           if (dump_file && (dump_flags & TDF_DETAILS))
1302             fprintf (dump_file,
1303                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1304
1305           return NULL;
1306         }
1307
1308       phi = SSA_NAME_DEF_STMT (phi);
1309       if (TREE_CODE (phi) != PHI_NODE)
1310         {
1311
1312           if (dump_file && (dump_flags & TDF_DETAILS))
1313             fprintf (dump_file,
1314                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1315           return NULL;
1316         }
1317
1318     }
1319
1320   /* The induction variable name/version we want to put in the array is the
1321      result of the induction variable phi node.  */
1322   *ourinductionvar = PHI_RESULT (phi);
1323   access_fn = instantiate_parameters
1324     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1325   if (access_fn == chrec_dont_know)
1326     {
1327       if (dump_file && (dump_flags & TDF_DETAILS))
1328         fprintf (dump_file,
1329                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1330
1331       return NULL;
1332     }
1333
1334   step = evolution_part_in_loop_num (access_fn, loop->num);
1335   if (!step || step == chrec_dont_know)
1336     {
1337       if (dump_file && (dump_flags & TDF_DETAILS))
1338         fprintf (dump_file,
1339                  "Unable to convert loop: Cannot determine step of loop.\n");
1340
1341       return NULL;
1342     }
1343   if (TREE_CODE (step) != INTEGER_CST)
1344     {
1345
1346       if (dump_file && (dump_flags & TDF_DETAILS))
1347         fprintf (dump_file,
1348                  "Unable to convert loop: Step of loop is not integer.\n");
1349       return NULL;
1350     }
1351
1352   stepint = TREE_INT_CST_LOW (step);
1353
1354   /* Only want phis for induction vars, which will have two
1355      arguments.  */
1356   if (PHI_NUM_ARGS (phi) != 2)
1357     {
1358       if (dump_file && (dump_flags & TDF_DETAILS))
1359         fprintf (dump_file,
1360                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1361       return NULL;
1362     }
1363
1364   /* Another induction variable check. One argument's source should be
1365      in the loop, one outside the loop.  */
1366   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1367       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1368     {
1369
1370       if (dump_file && (dump_flags & TDF_DETAILS))
1371         fprintf (dump_file,
1372                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1373
1374       return NULL;
1375     }
1376
1377   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1378     {
1379       lboundvar = PHI_ARG_DEF (phi, 1);
1380       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1381                                               outerinductionvars, *invariants,
1382                                               0);
1383     }
1384   else
1385     {
1386       lboundvar = PHI_ARG_DEF (phi, 0);
1387       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1388                                               outerinductionvars, *invariants,
1389                                               0);
1390     }
1391
1392   if (!lbound)
1393     {
1394
1395       if (dump_file && (dump_flags & TDF_DETAILS))
1396         fprintf (dump_file,
1397                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1398
1399       return NULL;
1400     }
1401   /* One part of the test may be a loop invariant tree.  */
1402   VEC_reserve (tree, heap, *invariants, 1);
1403   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1404       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1405     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1406   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1407            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1408     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1409
1410   /* The non-induction variable part of the test is the upper bound variable.
1411    */
1412   if (TREE_OPERAND (test, 0) == inductionvar)
1413     uboundvar = TREE_OPERAND (test, 1);
1414   else
1415     uboundvar = TREE_OPERAND (test, 0);
1416
1417
1418   /* We only size the vectors assuming we have, at max, 2 times as many
1419      invariants as we do loops (one for each bound).
1420      This is just an arbitrary number, but it has to be matched against the
1421      code below.  */
1422   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1423
1424
1425   /* We might have some leftover.  */
1426   if (TREE_CODE (test) == LT_EXPR)
1427     extra = -1 * stepint;
1428   else if (TREE_CODE (test) == NE_EXPR)
1429     extra = -1 * stepint;
1430   else if (TREE_CODE (test) == GT_EXPR)
1431     extra = -1 * stepint;
1432   else if (TREE_CODE (test) == EQ_EXPR)
1433     extra = 1 * stepint;
1434
1435   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1436                                           outerinductionvars,
1437                                           *invariants, extra);
1438   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1439                         build_int_cst (TREE_TYPE (uboundvar), extra));
1440   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1441   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1442   VEC_safe_push (int, heap, *steps, stepint);
1443   if (!ubound)
1444     {
1445       if (dump_file && (dump_flags & TDF_DETAILS))
1446         fprintf (dump_file,
1447                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1448       return NULL;
1449     }
1450
1451   lloop = lambda_loop_new ();
1452   LL_STEP (lloop) = stepint;
1453   LL_LOWER_BOUND (lloop) = lbound;
1454   LL_UPPER_BOUND (lloop) = ubound;
1455   return lloop;
1456 }
1457
1458 /* Given a LOOP, find the induction variable it is testing against in the exit
1459    condition.  Return the induction variable if found, NULL otherwise.  */
1460
1461 static tree
1462 find_induction_var_from_exit_cond (struct loop *loop)
1463 {
1464   tree expr = get_loop_exit_condition (loop);
1465   tree ivarop;
1466   tree test;
1467   if (expr == NULL_TREE)
1468     return NULL_TREE;
1469   if (TREE_CODE (expr) != COND_EXPR)
1470     return NULL_TREE;
1471   test = TREE_OPERAND (expr, 0);
1472   if (!COMPARISON_CLASS_P (test))
1473     return NULL_TREE;
1474
1475   /* Find the side that is invariant in this loop. The ivar must be the other
1476      side.  */
1477
1478   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1479       ivarop = TREE_OPERAND (test, 1);
1480   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1481       ivarop = TREE_OPERAND (test, 0);
1482   else
1483     return NULL_TREE;
1484
1485   if (TREE_CODE (ivarop) != SSA_NAME)
1486     return NULL_TREE;
1487   return ivarop;
1488 }
1489
1490 DEF_VEC_P(lambda_loop);
1491 DEF_VEC_ALLOC_P(lambda_loop,heap);
1492
1493 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1494    Return the new loop nest.
1495    INDUCTIONVARS is a pointer to an array of induction variables for the
1496    loopnest that will be filled in during this process.
1497    INVARIANTS is a pointer to an array of invariants that will be filled in
1498    during this process.  */
1499
1500 lambda_loopnest
1501 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1502                                  struct loop *loop_nest,
1503                                  VEC(tree,heap) **inductionvars,
1504                                  VEC(tree,heap) **invariants)
1505 {
1506   lambda_loopnest ret = NULL;
1507   struct loop *temp = loop_nest;
1508   int depth = depth_of_nest (loop_nest);
1509   size_t i;
1510   VEC(lambda_loop,heap) *loops = NULL;
1511   VEC(tree,heap) *uboundvars = NULL;
1512   VEC(tree,heap) *lboundvars  = NULL;
1513   VEC(int,heap) *steps = NULL;
1514   lambda_loop newloop;
1515   tree inductionvar = NULL;
1516   bool perfect_nest = perfect_nest_p (loop_nest);
1517
1518   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1519     goto fail;
1520
1521   while (temp)
1522     {
1523       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1524                                          &inductionvar, *inductionvars,
1525                                          &lboundvars, &uboundvars,
1526                                          &steps);
1527       if (!newloop)
1528         goto fail;
1529
1530       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1531       VEC_safe_push (lambda_loop, heap, loops, newloop);
1532       temp = temp->inner;
1533     }
1534
1535   if (!perfect_nest)
1536     {
1537       if (!perfect_nestify (currloops, loop_nest,
1538                             lboundvars, uboundvars, steps, *inductionvars))
1539         {
1540           if (dump_file)
1541             fprintf (dump_file,
1542                      "Not a perfect loop nest and couldn't convert to one.\n");
1543           goto fail;
1544         }
1545       else if (dump_file)
1546         fprintf (dump_file,
1547                  "Successfully converted loop nest to perfect loop nest.\n");
1548     }
1549
1550   ret = lambda_loopnest_new (depth, 2 * depth);
1551
1552   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1553     LN_LOOPS (ret)[i] = newloop;
1554
1555  fail:
1556   VEC_free (lambda_loop, heap, loops);
1557   VEC_free (tree, heap, uboundvars);
1558   VEC_free (tree, heap, lboundvars);
1559   VEC_free (int, heap, steps);
1560
1561   return ret;
1562 }
1563
1564 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1565    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1566    inserted for us are stored.  INDUCTION_VARS is the array of induction
1567    variables for the loop this LBV is from.  TYPE is the tree type to use for
1568    the variables and trees involved.  */
1569
1570 static tree
1571 lbv_to_gcc_expression (lambda_body_vector lbv,
1572                        tree type, VEC(tree,heap) *induction_vars,
1573                        tree *stmts_to_insert)
1574 {
1575   tree stmts, stmt, resvar, name;
1576   tree iv;
1577   size_t i;
1578   tree_stmt_iterator tsi;
1579
1580   /* Create a statement list and a linear expression temporary.  */
1581   stmts = alloc_stmt_list ();
1582   resvar = create_tmp_var (type, "lbvtmp");
1583   add_referenced_tmp_var (resvar);
1584
1585   /* Start at 0.  */
1586   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1587   name = make_ssa_name (resvar, stmt);
1588   TREE_OPERAND (stmt, 0) = name;
1589   tsi = tsi_last (stmts);
1590   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1591
1592   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1593     {
1594       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1595         {
1596           tree newname;
1597           tree coeffmult;
1598
1599           /* newname = coefficient * induction_variable */
1600           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1601           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1602                         fold_build2 (MULT_EXPR, type, iv, coeffmult));
1603
1604           newname = make_ssa_name (resvar, stmt);
1605           TREE_OPERAND (stmt, 0) = newname;
1606           fold_stmt (&stmt);
1607           tsi = tsi_last (stmts);
1608           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1609
1610           /* name = name + newname */
1611           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1612                         build (PLUS_EXPR, type, name, newname));
1613           name = make_ssa_name (resvar, stmt);
1614           TREE_OPERAND (stmt, 0) = name;
1615           fold_stmt (&stmt);
1616           tsi = tsi_last (stmts);
1617           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1618
1619         }
1620     }
1621
1622   /* Handle any denominator that occurs.  */
1623   if (LBV_DENOMINATOR (lbv) != 1)
1624     {
1625       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1626       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1627                     build (CEIL_DIV_EXPR, type, name, denominator));
1628       name = make_ssa_name (resvar, stmt);
1629       TREE_OPERAND (stmt, 0) = name;
1630       fold_stmt (&stmt);
1631       tsi = tsi_last (stmts);
1632       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1633     }
1634   *stmts_to_insert = stmts;
1635   return name;
1636 }
1637
1638 /* Convert a linear expression from coefficient and constant form to a
1639    gcc tree.
1640    Return the tree that represents the final value of the expression.
1641    LLE is the linear expression to convert.
1642    OFFSET is the linear offset to apply to the expression.
1643    TYPE is the tree type to use for the variables and math.
1644    INDUCTION_VARS is a vector of induction variables for the loops.
1645    INVARIANTS is a vector of the loop nest invariants.
1646    WRAP specifies what tree code to wrap the results in, if there is more than
1647    one (it is either MAX_EXPR, or MIN_EXPR).
1648    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1649    statements that need to be inserted for the linear expression.  */
1650
1651 static tree
1652 lle_to_gcc_expression (lambda_linear_expression lle,
1653                        lambda_linear_expression offset,
1654                        tree type,
1655                        VEC(tree,heap) *induction_vars,
1656                        VEC(tree,heap) *invariants,
1657                        enum tree_code wrap, tree *stmts_to_insert)
1658 {
1659   tree stmts, stmt, resvar, name;
1660   size_t i;
1661   tree_stmt_iterator tsi;
1662   tree iv, invar;
1663   VEC(tree,heap) *results = NULL;
1664
1665   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1666   name = NULL_TREE;
1667   /* Create a statement list and a linear expression temporary.  */
1668   stmts = alloc_stmt_list ();
1669   resvar = create_tmp_var (type, "lletmp");
1670   add_referenced_tmp_var (resvar);
1671
1672   /* Build up the linear expressions, and put the variable representing the
1673      result in the results array.  */
1674   for (; lle != NULL; lle = LLE_NEXT (lle))
1675     {
1676       /* Start at name = 0.  */
1677       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1678       name = make_ssa_name (resvar, stmt);
1679       TREE_OPERAND (stmt, 0) = name;
1680       fold_stmt (&stmt);
1681       tsi = tsi_last (stmts);
1682       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1683
1684       /* First do the induction variables.
1685          at the end, name = name + all the induction variables added
1686          together.  */
1687       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1688         {
1689           if (LLE_COEFFICIENTS (lle)[i] != 0)
1690             {
1691               tree newname;
1692               tree mult;
1693               tree coeff;
1694
1695               /* mult = induction variable * coefficient.  */
1696               if (LLE_COEFFICIENTS (lle)[i] == 1)
1697                 {
1698                   mult = VEC_index (tree, induction_vars, i);
1699                 }
1700               else
1701                 {
1702                   coeff = build_int_cst (type,
1703                                          LLE_COEFFICIENTS (lle)[i]);
1704                   mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1705                 }
1706
1707               /* newname = mult */
1708               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1709               newname = make_ssa_name (resvar, stmt);
1710               TREE_OPERAND (stmt, 0) = newname;
1711               fold_stmt (&stmt);
1712               tsi = tsi_last (stmts);
1713               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1714
1715               /* name = name + newname */
1716               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1717                             build (PLUS_EXPR, type, name, newname));
1718               name = make_ssa_name (resvar, stmt);
1719               TREE_OPERAND (stmt, 0) = name;
1720               fold_stmt (&stmt);
1721               tsi = tsi_last (stmts);
1722               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723             }
1724         }
1725
1726       /* Handle our invariants.
1727          At the end, we have name = name + result of adding all multiplied
1728          invariants.  */
1729       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1730         {
1731           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1732             {
1733               tree newname;
1734               tree mult;
1735               tree coeff;
1736               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1737               /* mult = invariant * coefficient  */
1738               if (invcoeff == 1)
1739                 {
1740                   mult = invar;
1741                 }
1742               else
1743                 {
1744                   coeff = build_int_cst (type, invcoeff);
1745                   mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1746                 }
1747
1748               /* newname = mult */
1749               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1750               newname = make_ssa_name (resvar, stmt);
1751               TREE_OPERAND (stmt, 0) = newname;
1752               fold_stmt (&stmt);
1753               tsi = tsi_last (stmts);
1754               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1755
1756               /* name = name + newname */
1757               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1758                             build (PLUS_EXPR, type, name, newname));
1759               name = make_ssa_name (resvar, stmt);
1760               TREE_OPERAND (stmt, 0) = name;
1761               fold_stmt (&stmt);
1762               tsi = tsi_last (stmts);
1763               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1764             }
1765         }
1766
1767       /* Now handle the constant.
1768          name = name + constant.  */
1769       if (LLE_CONSTANT (lle) != 0)
1770         {
1771           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1772                         build (PLUS_EXPR, type, name,
1773                                build_int_cst (type, LLE_CONSTANT (lle))));
1774           name = make_ssa_name (resvar, stmt);
1775           TREE_OPERAND (stmt, 0) = name;
1776           fold_stmt (&stmt);
1777           tsi = tsi_last (stmts);
1778           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1779         }
1780
1781       /* Now handle the offset.
1782          name = name + linear offset.  */
1783       if (LLE_CONSTANT (offset) != 0)
1784         {
1785           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1786                         build (PLUS_EXPR, type, name,
1787                                build_int_cst (type, LLE_CONSTANT (offset))));
1788           name = make_ssa_name (resvar, stmt);
1789           TREE_OPERAND (stmt, 0) = name;
1790           fold_stmt (&stmt);
1791           tsi = tsi_last (stmts);
1792           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1793         }
1794
1795       /* Handle any denominator that occurs.  */
1796       if (LLE_DENOMINATOR (lle) != 1)
1797         {
1798           stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1799           stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1800                         type, name, stmt);
1801           stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1802
1803           /* name = {ceil, floor}(name/denominator) */
1804           name = make_ssa_name (resvar, stmt);
1805           TREE_OPERAND (stmt, 0) = name;
1806           tsi = tsi_last (stmts);
1807           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1808         }
1809       VEC_safe_push (tree, heap, results, name);
1810     }
1811
1812   /* Again, out of laziness, we don't handle this case yet.  It's not
1813      hard, it just hasn't occurred.  */
1814   gcc_assert (VEC_length (tree, results) <= 2);
1815
1816   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1817   if (VEC_length (tree, results) > 1)
1818     {
1819       tree op1 = VEC_index (tree, results, 0);
1820       tree op2 = VEC_index (tree, results, 1);
1821       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1822                     build (wrap, type, op1, op2));
1823       name = make_ssa_name (resvar, stmt);
1824       TREE_OPERAND (stmt, 0) = name;
1825       tsi = tsi_last (stmts);
1826       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1827     }
1828
1829   VEC_free (tree, heap, results);
1830
1831   *stmts_to_insert = stmts;
1832   return name;
1833 }
1834
1835 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1836    it, back into gcc code.  This changes the
1837    loops, their induction variables, and their bodies, so that they
1838    match the transformed loopnest.
1839    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1840    loopnest.
1841    OLD_IVS is a vector of induction variables from the old loopnest.
1842    INVARIANTS is a vector of loop invariants from the old loopnest.
1843    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1844    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1845    NEW_LOOPNEST.  */
1846
1847 void
1848 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1849                                  VEC(tree,heap) *old_ivs,
1850                                  VEC(tree,heap) *invariants,
1851                                  lambda_loopnest new_loopnest,
1852                                  lambda_trans_matrix transform)
1853 {
1854   struct loop *temp;
1855   size_t i = 0;
1856   size_t depth = 0;
1857   VEC(tree,heap) *new_ivs = NULL;
1858   tree oldiv;
1859
1860   block_stmt_iterator bsi;
1861
1862   if (dump_file)
1863     {
1864       transform = lambda_trans_matrix_inverse (transform);
1865       fprintf (dump_file, "Inverse of transformation matrix:\n");
1866       print_lambda_trans_matrix (dump_file, transform);
1867     }
1868   depth = depth_of_nest (old_loopnest);
1869   temp = old_loopnest;
1870
1871   while (temp)
1872     {
1873       lambda_loop newloop;
1874       basic_block bb;
1875       edge exit;
1876       tree ivvar, ivvarinced, exitcond, stmts;
1877       enum tree_code testtype;
1878       tree newupperbound, newlowerbound;
1879       lambda_linear_expression offset;
1880       tree type;
1881       bool insert_after;
1882       tree inc_stmt;
1883
1884       oldiv = VEC_index (tree, old_ivs, i);
1885       type = TREE_TYPE (oldiv);
1886
1887       /* First, build the new induction variable temporary  */
1888
1889       ivvar = create_tmp_var (type, "lnivtmp");
1890       add_referenced_tmp_var (ivvar);
1891
1892       VEC_safe_push (tree, heap, new_ivs, ivvar);
1893
1894       newloop = LN_LOOPS (new_loopnest)[i];
1895
1896       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1897          cases for now.  */
1898       offset = LL_LINEAR_OFFSET (newloop);
1899
1900       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1901                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1902
1903       /* Now build the  new lower bounds, and insert the statements
1904          necessary to generate it on the loop preheader.  */
1905       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1906                                              LL_LINEAR_OFFSET (newloop),
1907                                              type,
1908                                              new_ivs,
1909                                              invariants, MAX_EXPR, &stmts);
1910       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1911       bsi_commit_edge_inserts ();
1912       /* Build the new upper bound and insert its statements in the
1913          basic block of the exit condition */
1914       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1915                                              LL_LINEAR_OFFSET (newloop),
1916                                              type,
1917                                              new_ivs,
1918                                              invariants, MIN_EXPR, &stmts);
1919       exit = temp->single_exit;
1920       exitcond = get_loop_exit_condition (temp);
1921       bb = bb_for_stmt (exitcond);
1922       bsi = bsi_start (bb);
1923       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1924
1925       /* Create the new iv.  */
1926
1927       standard_iv_increment_position (temp, &bsi, &insert_after);
1928       create_iv (newlowerbound,
1929                  build_int_cst (type, LL_STEP (newloop)),
1930                  ivvar, temp, &bsi, insert_after, &ivvar,
1931                  NULL);
1932
1933       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1934          dominate the block containing the exit condition.
1935          So we simply create our own incremented iv to use in the new exit
1936          test,  and let redundancy elimination sort it out.  */
1937       inc_stmt = build (PLUS_EXPR, type,
1938                         ivvar, build_int_cst (type, LL_STEP (newloop)));
1939       inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1940                         inc_stmt);
1941       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1942       TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1943       bsi = bsi_for_stmt (exitcond);
1944       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1945
1946       /* Replace the exit condition with the new upper bound
1947          comparison.  */
1948
1949       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1950
1951       /* We want to build a conditional where true means exit the loop, and
1952          false means continue the loop.
1953          So swap the testtype if this isn't the way things are.*/
1954
1955       if (exit->flags & EDGE_FALSE_VALUE)
1956         testtype = swap_tree_comparison (testtype);
1957
1958       COND_EXPR_COND (exitcond) = build (testtype,
1959                                          boolean_type_node,
1960                                          newupperbound, ivvarinced);
1961       update_stmt (exitcond);
1962       VEC_replace (tree, new_ivs, i, ivvar);
1963
1964       i++;
1965       temp = temp->inner;
1966     }
1967
1968   /* Rewrite uses of the old ivs so that they are now specified in terms of
1969      the new ivs.  */
1970
1971   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1972     {
1973       imm_use_iterator imm_iter;
1974       use_operand_p imm_use;
1975       tree oldiv_def;
1976       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1977
1978       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1979         oldiv_def = PHI_RESULT (oldiv_stmt);
1980       else
1981         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1982       gcc_assert (oldiv_def != NULL_TREE);
1983
1984       FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1985         {
1986           tree stmt = USE_STMT (imm_use);
1987           use_operand_p use_p;
1988           ssa_op_iter iter;
1989           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1990           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1991             {
1992               if (USE_FROM_PTR (use_p) == oldiv)
1993                 {
1994                   tree newiv, stmts;
1995                   lambda_body_vector lbv, newlbv;
1996                   /* Compute the new expression for the induction
1997                      variable.  */
1998                   depth = VEC_length (tree, new_ivs);
1999                   lbv = lambda_body_vector_new (depth);
2000                   LBV_COEFFICIENTS (lbv)[i] = 1;
2001
2002                   newlbv = lambda_body_vector_compute_new (transform, lbv);
2003
2004                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
2005                                                  new_ivs, &stmts);
2006                   bsi = bsi_for_stmt (stmt);
2007                   /* Insert the statements to build that
2008                      expression.  */
2009                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2010                   propagate_value (use_p, newiv);
2011                   update_stmt (stmt);
2012
2013                 }
2014             }
2015         }
2016     }
2017   VEC_free (tree, heap, new_ivs);
2018 }
2019
2020 /* Return TRUE if this is not interesting statement from the perspective of
2021    determining if we have a perfect loop nest.  */
2022
2023 static bool
2024 not_interesting_stmt (tree stmt)
2025 {
2026   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2027      loop, we would have already failed the number of exits tests.  */
2028   if (TREE_CODE (stmt) == LABEL_EXPR
2029       || TREE_CODE (stmt) == GOTO_EXPR
2030       || TREE_CODE (stmt) == COND_EXPR)
2031     return true;
2032   return false;
2033 }
2034
2035 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2036
2037 static bool
2038 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2039 {
2040   int i;
2041   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2042     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2043       if (PHI_ARG_DEF (phi, i) == def)
2044         return true;
2045   return false;
2046 }
2047
2048 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2049
2050 static bool
2051 stmt_uses_phi_result (tree stmt, tree phi_result)
2052 {
2053   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2054
2055   /* This is conservatively true, because we only want SIMPLE bumpers
2056      of the form x +- constant for our pass.  */
2057   return (use == phi_result);
2058 }
2059
2060 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2061    in-loop-edge in a phi node, and the operand it uses is the result of that
2062    phi node.
2063    I.E. i_29 = i_3 + 1
2064         i_3 = PHI (0, i_29);  */
2065
2066 static bool
2067 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2068 {
2069   tree use;
2070   tree def;
2071   imm_use_iterator iter;
2072   use_operand_p use_p;
2073
2074   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2075   if (!def)
2076     return false;
2077
2078   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2079     {
2080       use = USE_STMT (use_p);
2081       if (TREE_CODE (use) == PHI_NODE)
2082         {
2083           if (phi_loop_edge_uses_def (loop, use, def))
2084             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2085               return true;
2086         }
2087     }
2088   return false;
2089 }
2090
2091
2092 /* Return true if LOOP is a perfect loop nest.
2093    Perfect loop nests are those loop nests where all code occurs in the
2094    innermost loop body.
2095    If S is a program statement, then
2096
2097    i.e.
2098    DO I = 1, 20
2099        S1
2100        DO J = 1, 20
2101        ...
2102        END DO
2103    END DO
2104    is not a perfect loop nest because of S1.
2105
2106    DO I = 1, 20
2107       DO J = 1, 20
2108         S1
2109         ...
2110       END DO
2111    END DO
2112    is a perfect loop nest.
2113
2114    Since we don't have high level loops anymore, we basically have to walk our
2115    statements and ignore those that are there because the loop needs them (IE
2116    the induction variable increment, and jump back to the top of the loop).  */
2117
2118 bool
2119 perfect_nest_p (struct loop *loop)
2120 {
2121   basic_block *bbs;
2122   size_t i;
2123   tree exit_cond;
2124
2125   if (!loop->inner)
2126     return true;
2127   bbs = get_loop_body (loop);
2128   exit_cond = get_loop_exit_condition (loop);
2129   for (i = 0; i < loop->num_nodes; i++)
2130     {
2131       if (bbs[i]->loop_father == loop)
2132         {
2133           block_stmt_iterator bsi;
2134           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2135             {
2136               tree stmt = bsi_stmt (bsi);
2137               if (stmt == exit_cond
2138                   || not_interesting_stmt (stmt)
2139                   || stmt_is_bumper_for_loop (loop, stmt))
2140                 continue;
2141               free (bbs);
2142               return false;
2143             }
2144         }
2145     }
2146   free (bbs);
2147   /* See if the inner loops are perfectly nested as well.  */
2148   if (loop->inner)
2149     return perfect_nest_p (loop->inner);
2150   return true;
2151 }
2152
2153 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2154    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2155    avoid creating duplicate temporaries and FIRSTBSI is statement
2156    iterator where new temporaries should be inserted at the beginning
2157    of body basic block.  */
2158
2159 static void
2160 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2161                                 int xstep, tree y, tree yinit,
2162                                 htab_t replacements,
2163                                 block_stmt_iterator *firstbsi)
2164 {
2165   ssa_op_iter iter;
2166   use_operand_p use_p;
2167
2168   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2169     {
2170       tree use = USE_FROM_PTR (use_p);
2171       tree step = NULL_TREE;
2172       tree scev, init, val, var, setstmt;
2173       struct tree_map *h, in;
2174       void **loc;
2175
2176       /* Replace uses of X with Y right away.  */
2177       if (use == x)
2178         {
2179           SET_USE (use_p, y);
2180           continue;
2181         }
2182
2183       scev = instantiate_parameters (loop,
2184                                      analyze_scalar_evolution (loop, use));
2185
2186       if (scev == NULL || scev == chrec_dont_know)
2187         continue;
2188
2189       step = evolution_part_in_loop_num (scev, loop->num);
2190       if (step == NULL
2191           || step == chrec_dont_know
2192           || TREE_CODE (step) != INTEGER_CST
2193           || int_cst_value (step) != xstep)
2194         continue;
2195
2196       /* Use REPLACEMENTS hash table to cache already created
2197          temporaries.  */
2198       in.hash = htab_hash_pointer (use);
2199       in.from = use;
2200       h = htab_find_with_hash (replacements, &in, in.hash);
2201       if (h != NULL)
2202         {
2203           SET_USE (use_p, h->to);
2204           continue;
2205         }
2206
2207       /* USE which has the same step as X should be replaced
2208          with a temporary set to Y + YINIT - INIT.  */
2209       init = initial_condition_in_loop_num (scev, loop->num);
2210       gcc_assert (init != NULL && init != chrec_dont_know);
2211       if (TREE_TYPE (use) == TREE_TYPE (y))
2212         {
2213           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2214           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2215           if (val == y)
2216             {
2217               /* If X has the same type as USE, the same step
2218                  and same initial value, it can be replaced by Y.  */
2219               SET_USE (use_p, y);
2220               continue;
2221             }
2222         }
2223       else
2224         {
2225           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2226           val = fold_convert (TREE_TYPE (use), val);
2227           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2228         }
2229
2230       /* Create a temporary variable and insert it at the beginning
2231          of the loop body basic block, right after the PHI node
2232          which sets Y.  */
2233       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2234       add_referenced_tmp_var (var);
2235       val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2236       setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2237       var = make_ssa_name (var, setstmt);
2238       TREE_OPERAND (setstmt, 0) = var;
2239       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2240       update_stmt (setstmt);
2241       SET_USE (use_p, var);
2242       h = ggc_alloc (sizeof (struct tree_map));
2243       h->hash = in.hash;
2244       h->from = use;
2245       h->to = var;
2246       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2247       gcc_assert ((*(struct tree_map **)loc) == NULL);
2248       *(struct tree_map **) loc = h;
2249     }
2250 }
2251
2252 /* Return true if STMT is an exit PHI for LOOP */
2253
2254 static bool
2255 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2256 {
2257
2258   if (TREE_CODE (stmt) != PHI_NODE
2259       || PHI_NUM_ARGS (stmt) != 1
2260       || bb_for_stmt (stmt) != loop->single_exit->dest)
2261     return false;
2262
2263   return true;
2264 }
2265
2266 /* Return true if STMT can be put back into INNER, a loop by moving it to the
2267    beginning of that loop.  */
2268
2269 static bool
2270 can_put_in_inner_loop (struct loop *inner, tree stmt)
2271 {
2272   imm_use_iterator imm_iter;
2273   use_operand_p use_p;
2274   basic_block use_bb = NULL;
2275
2276   gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2277   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2278       || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2279     return false;
2280
2281   /* We require that the basic block of all uses be the same, or the use be an
2282      exit phi.  */
2283   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2284     {
2285       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2286         {
2287           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2288
2289           if (!flow_bb_inside_loop_p (inner, immbb))
2290             return false;
2291           if (use_bb == NULL)
2292             use_bb = immbb;
2293           else if (immbb != use_bb)
2294             return false;
2295         }
2296     }
2297   return true;
2298
2299 }
2300
2301 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2302
2303 static bool
2304 can_put_after_inner_loop (struct loop *loop, tree stmt)
2305 {
2306   imm_use_iterator imm_iter;
2307   use_operand_p use_p;
2308
2309   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2310     return false;
2311
2312   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2313     {
2314       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2315         {
2316           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2317
2318           if (!dominated_by_p (CDI_DOMINATORS,
2319                                immbb,
2320                                loop->inner->header)
2321               && !can_put_in_inner_loop (loop->inner, stmt))
2322             return false;
2323         }
2324     }
2325   return true;
2326 }
2327
2328 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2329    perfect one.  At the moment, we only handle imperfect nests of
2330    depth 2, where all of the statements occur after the inner loop.  */
2331
2332 static bool
2333 can_convert_to_perfect_nest (struct loop *loop)
2334 {
2335   basic_block *bbs;
2336   tree exit_condition, phi;
2337   size_t i;
2338   block_stmt_iterator bsi;
2339   basic_block exitdest;
2340
2341   /* Can't handle triply nested+ loops yet.  */
2342   if (!loop->inner || loop->inner->inner)
2343     return false;
2344
2345   bbs = get_loop_body (loop);
2346   exit_condition = get_loop_exit_condition (loop);
2347   for (i = 0; i < loop->num_nodes; i++)
2348     {
2349       if (bbs[i]->loop_father == loop)
2350         {
2351           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2352             {
2353               tree stmt = bsi_stmt (bsi);
2354
2355               if (stmt == exit_condition
2356                   || not_interesting_stmt (stmt)
2357                   || stmt_is_bumper_for_loop (loop, stmt))
2358                 continue;
2359
2360               /* If this is a simple operation like a cast that is invariant
2361                  in the inner loop, only used there, and we can place it
2362                  there, then it's not going to hurt us.
2363                  This means that we will propagate casts and other cheap
2364                  invariant operations *back*
2365                  into the inner loop if we can interchange the loop, on the
2366                  theory that we are going to gain a lot more by interchanging
2367                  the loop than we are by leaving some invariant code there for
2368                  some other pass to clean up.  */
2369               if (TREE_CODE (stmt) == MODIFY_EXPR)
2370                 {
2371                   use_operand_p use_a, use_b;
2372                   imm_use_iterator imm_iter;
2373                   ssa_op_iter op_iter, op_iter1;
2374                   tree op0 = TREE_OPERAND (stmt, 0);
2375                   tree scev = instantiate_parameters
2376                     (loop, analyze_scalar_evolution (loop, op0));
2377
2378                   /* If the IV is simple, it can be duplicated.  */
2379                   if (!automatically_generated_chrec_p (scev))
2380                     {
2381                       tree step = evolution_part_in_loop_num (scev, loop->num);
2382                       if (step && step != chrec_dont_know
2383                           && TREE_CODE (step) == INTEGER_CST)
2384                         continue;
2385                     }
2386
2387                   /* The statement should not define a variable used
2388                      in the inner loop.  */
2389                   if (TREE_CODE (op0) == SSA_NAME)
2390                     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2391                       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2392                           == loop->inner)
2393                         goto fail;
2394
2395                   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2396                     {
2397                       tree node, op = USE_FROM_PTR (use_a);
2398
2399                       /* The variables should not be used in both loops.  */
2400                       FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2401                       if (bb_for_stmt (USE_STMT (use_b))->loop_father
2402                           == loop->inner)
2403                         goto fail;
2404
2405                       /* The statement should not use the value of a
2406                          scalar that was modified in the loop.  */
2407                       node = SSA_NAME_DEF_STMT (op);
2408                       if (TREE_CODE (node) == PHI_NODE)
2409                         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2410                           {
2411                             tree arg = USE_FROM_PTR (use_b);
2412
2413                             if (TREE_CODE (arg) == SSA_NAME)
2414                               {
2415                                 tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2416
2417                                 if (bb_for_stmt (arg_stmt)->loop_father
2418                                     == loop->inner)
2419                                   goto fail;
2420                               }
2421                           }
2422                     }
2423
2424                   if (can_put_in_inner_loop (loop->inner, stmt)
2425                       || can_put_after_inner_loop (loop, stmt))
2426                     continue;
2427                 }
2428
2429               /* Otherwise, if the bb of a statement we care about isn't
2430                  dominated by the header of the inner loop, then we can't
2431                  handle this case right now.  This test ensures that the
2432                  statement comes completely *after* the inner loop.  */
2433               if (!dominated_by_p (CDI_DOMINATORS,
2434                                    bb_for_stmt (stmt),
2435                                    loop->inner->header))
2436                 goto fail;
2437             }
2438         }
2439     }
2440
2441   /* We also need to make sure the loop exit only has simple copy phis in it,
2442      otherwise we don't know how to transform it into a perfect nest right
2443      now.  */
2444   exitdest = loop->single_exit->dest;
2445
2446   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2447     if (PHI_NUM_ARGS (phi) != 1)
2448       goto fail;
2449
2450   free (bbs);
2451   return true;
2452
2453  fail:
2454   free (bbs);
2455   return false;
2456 }
2457
2458 /* Transform the loop nest into a perfect nest, if possible.
2459    LOOPS is the current struct loops *
2460    LOOP is the loop nest to transform into a perfect nest
2461    LBOUNDS are the lower bounds for the loops to transform
2462    UBOUNDS are the upper bounds for the loops to transform
2463    STEPS is the STEPS for the loops to transform.
2464    LOOPIVS is the induction variables for the loops to transform.
2465
2466    Basically, for the case of
2467
2468    FOR (i = 0; i < 50; i++)
2469     {
2470      FOR (j =0; j < 50; j++)
2471      {
2472         <whatever>
2473      }
2474      <some code>
2475     }
2476
2477    This function will transform it into a perfect loop nest by splitting the
2478    outer loop into two loops, like so:
2479
2480    FOR (i = 0; i < 50; i++)
2481    {
2482      FOR (j = 0; j < 50; j++)
2483      {
2484          <whatever>
2485      }
2486    }
2487
2488    FOR (i = 0; i < 50; i ++)
2489    {
2490     <some code>
2491    }
2492
2493    Return FALSE if we can't make this loop into a perfect nest.  */
2494
2495 static bool
2496 perfect_nestify (struct loops *loops,
2497                  struct loop *loop,
2498                  VEC(tree,heap) *lbounds,
2499                  VEC(tree,heap) *ubounds,
2500                  VEC(int,heap) *steps,
2501                  VEC(tree,heap) *loopivs)
2502 {
2503   basic_block *bbs;
2504   tree exit_condition;
2505   tree then_label, else_label, cond_stmt;
2506   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2507   int i;
2508   block_stmt_iterator bsi, firstbsi;
2509   bool insert_after;
2510   edge e;
2511   struct loop *newloop;
2512   tree phi;
2513   tree uboundvar;
2514   tree stmt;
2515   tree oldivvar, ivvar, ivvarinced;
2516   VEC(tree,heap) *phis = NULL;
2517   htab_t replacements = NULL;
2518
2519   /* Create the new loop.  */
2520   olddest = loop->single_exit->dest;
2521   preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2522   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2523
2524   /* Push the exit phi nodes that we are moving.  */
2525   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2526     {
2527       VEC_reserve (tree, heap, phis, 2);
2528       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2529       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2530     }
2531   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2532
2533   /* Remove the exit phis from the old basic block.  Make sure to set
2534      PHI_RESULT to null so it doesn't get released.  */
2535   while (phi_nodes (olddest) != NULL)
2536     {
2537       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2538       remove_phi_node (phi_nodes (olddest), NULL);
2539     }
2540
2541   /* and add them back to the new basic block.  */
2542   while (VEC_length (tree, phis) != 0)
2543     {
2544       tree def;
2545       tree phiname;
2546       def = VEC_pop (tree, phis);
2547       phiname = VEC_pop (tree, phis);
2548       phi = create_phi_node (phiname, preheaderbb);
2549       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2550     }
2551   flush_pending_stmts (e);
2552   VEC_free (tree, heap, phis);
2553
2554   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2555   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2556   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2557   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2558   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2559   cond_stmt = build (COND_EXPR, void_type_node,
2560                      build (NE_EXPR, boolean_type_node,
2561                             integer_one_node,
2562                             integer_zero_node),
2563                      then_label, else_label);
2564   bsi = bsi_start (bodybb);
2565   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2566   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2567   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2568   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2569
2570   /* Update the loop structures.  */
2571   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2572   newloop->header = headerbb;
2573   newloop->latch = latchbb;
2574   newloop->single_exit = e;
2575   add_bb_to_loop (latchbb, newloop);
2576   add_bb_to_loop (bodybb, newloop);
2577   add_bb_to_loop (headerbb, newloop);
2578   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2579   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2580   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2581                            loop->single_exit->src);
2582   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2583   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2584   /* Create the new iv.  */
2585   oldivvar = VEC_index (tree, loopivs, 0);
2586   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2587   add_referenced_tmp_var (ivvar);
2588   standard_iv_increment_position (newloop, &bsi, &insert_after);
2589   create_iv (VEC_index (tree, lbounds, 0),
2590              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2591              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2592
2593   /* Create the new upper bound.  This may be not just a variable, so we copy
2594      it to one just in case.  */
2595
2596   exit_condition = get_loop_exit_condition (newloop);
2597   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2598   add_referenced_tmp_var (uboundvar);
2599   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2600                 VEC_index (tree, ubounds, 0));
2601   uboundvar = make_ssa_name (uboundvar, stmt);
2602   TREE_OPERAND (stmt, 0) = uboundvar;
2603
2604   if (insert_after)
2605     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2606   else
2607     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2608   update_stmt (stmt);
2609   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2610                                            boolean_type_node,
2611                                            uboundvar,
2612                                            ivvarinced);
2613   update_stmt (exit_condition);
2614   replacements = htab_create_ggc (20, tree_map_hash,
2615                                   tree_map_eq, NULL);
2616   bbs = get_loop_body_in_dom_order (loop);
2617   /* Now move the statements, and replace the induction variable in the moved
2618      statements with the correct loop induction variable.  */
2619   oldivvar = VEC_index (tree, loopivs, 0);
2620   firstbsi = bsi_start (bodybb);
2621   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2622     {
2623       block_stmt_iterator tobsi = bsi_last (bodybb);
2624       if (bbs[i]->loop_father == loop)
2625         {
2626           /* If this is true, we are *before* the inner loop.
2627              If this isn't true, we are *after* it.
2628
2629              The only time can_convert_to_perfect_nest returns true when we
2630              have statements before the inner loop is if they can be moved
2631              into the inner loop.
2632
2633              The only time can_convert_to_perfect_nest returns true when we
2634              have statements after the inner loop is if they can be moved into
2635              the new split loop.  */
2636
2637           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2638             {
2639               block_stmt_iterator header_bsi
2640                 = bsi_after_labels (loop->inner->header);
2641
2642               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2643                 {
2644                   tree stmt = bsi_stmt (bsi);
2645
2646                   if (stmt == exit_condition
2647                       || not_interesting_stmt (stmt)
2648                       || stmt_is_bumper_for_loop (loop, stmt))
2649                     {
2650                       bsi_next (&bsi);
2651                       continue;
2652                     }
2653
2654                   bsi_move_before (&bsi, &header_bsi);
2655                 }
2656             }
2657           else
2658             {
2659               /* Note that the bsi only needs to be explicitly incremented
2660                  when we don't move something, since it is automatically
2661                  incremented when we do.  */
2662               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2663                 {
2664                   ssa_op_iter i;
2665                   tree n, stmt = bsi_stmt (bsi);
2666
2667                   if (stmt == exit_condition
2668                       || not_interesting_stmt (stmt)
2669                       || stmt_is_bumper_for_loop (loop, stmt))
2670                     {
2671                       bsi_next (&bsi);
2672                       continue;
2673                     }
2674
2675                   replace_uses_equiv_to_x_with_y
2676                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2677                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2678
2679                   bsi_move_before (&bsi, &tobsi);
2680
2681                   /* If the statement has any virtual operands, they may
2682                      need to be rewired because the original loop may
2683                      still reference them.  */
2684                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2685                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2686                 }
2687             }
2688
2689         }
2690     }
2691
2692   free (bbs);
2693   htab_delete (replacements);
2694   return perfect_nest_p (loop);
2695 }
2696
2697 /* Return true if TRANS is a legal transformation matrix that respects
2698    the dependence vectors in DISTS and DIRS.  The conservative answer
2699    is false.
2700
2701    "Wolfe proves that a unimodular transformation represented by the
2702    matrix T is legal when applied to a loop nest with a set of
2703    lexicographically non-negative distance vectors RDG if and only if
2704    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2705    i.e.: if and only if it transforms the lexicographically positive
2706    distance vectors to lexicographically positive vectors.  Note that
2707    a unimodular matrix must transform the zero vector (and only it) to
2708    the zero vector." S.Muchnick.  */
2709
2710 bool
2711 lambda_transform_legal_p (lambda_trans_matrix trans,
2712                           int nb_loops,
2713                           varray_type dependence_relations)
2714 {
2715   unsigned int i, j;
2716   lambda_vector distres;
2717   struct data_dependence_relation *ddr;
2718
2719   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2720               && LTM_ROWSIZE (trans) == nb_loops);
2721
2722   /* When there is an unknown relation in the dependence_relations, we
2723      know that it is no worth looking at this loop nest: give up.  */
2724   ddr = (struct data_dependence_relation *)
2725     VARRAY_GENERIC_PTR (dependence_relations, 0);
2726   if (ddr == NULL)
2727     return true;
2728   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2729     return false;
2730
2731   distres = lambda_vector_new (nb_loops);
2732
2733   /* For each distance vector in the dependence graph.  */
2734   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2735     {
2736       ddr = (struct data_dependence_relation *)
2737         VARRAY_GENERIC_PTR (dependence_relations, i);
2738
2739       /* Don't care about relations for which we know that there is no
2740          dependence, nor about read-read (aka. output-dependences):
2741          these data accesses can happen in any order.  */
2742       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2743           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2744         continue;
2745
2746       /* Conservatively answer: "this transformation is not valid".  */
2747       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2748         return false;
2749
2750       /* If the dependence could not be captured by a distance vector,
2751          conservatively answer that the transform is not valid.  */
2752       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2753         return false;
2754
2755       /* Compute trans.dist_vect */
2756       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2757         {
2758           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2759                                      DDR_DIST_VECT (ddr, j), distres);
2760
2761           if (!lambda_vector_lexico_pos (distres, nb_loops))
2762             return false;
2763         }
2764     }
2765   return true;
2766 }