gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3     Free Software Foundation, Inc.
   4     Contributed by Daniel Berlin <dberlin@dberlin.org>
   5
   6     This file is part of GCC.
   7
   8     GCC is free software; you can redistribute it and/or modify it under
   9     the terms of the GNU General Public License as published by the Free
  10     Software Foundation; either version 3, or (at your option) any later
  11     version.
  12
  13     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16     for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with GCC; see the file COPYING3.  If not see
  20     <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "ggc.h"
  27 #include "tree.h"
  28 #include "target.h"
  29 #include "rtl.h"
  30 #include "basic-block.h"
  31 #include "diagnostic.h"
  32 #include "obstack.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "timevar.h"
  36 #include "cfgloop.h"
  37 #include "expr.h"
  38 #include "optabs.h"
  39 #include "tree-chrec.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-pass.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "vec.h"
  44 #include "lambda.h"
  45 #include "vecprim.h"
  46 #include "pointer-set.h"
  47
  48 /* This loop nest code generation is based on non-singular matrix
  49    math.
  50
  51  A little terminology and a general sketch of the algorithm.  See "A singular
  52  loop transformation framework based on non-singular matrices" by Wei Li and
  53  Keshav Pingali for formal proofs that the various statements below are
  54  correct.
  55
  56  A loop iteration space represents the points traversed by the loop.  A point in the
  57  iteration space can be represented by a vector of size <loop depth>.  You can
  58  therefore represent the iteration space as an integral combinations of a set
  59  of basis vectors.
  60
  61  A loop iteration space is dense if every integer point between the loop
  62  bounds is a point in the iteration space.  Every loop with a step of 1
  63  therefore has a dense iteration space.
  64
  65  for i = 1 to 3, step 1 is a dense iteration space.
  66
  67  A loop iteration space is sparse if it is not dense.  That is, the iteration
  68  space skips integer points that are within the loop bounds.
  69
  70  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  71  2 is skipped.
  72
  73  Dense source spaces are easy to transform, because they don't skip any
  74  points to begin with.  Thus we can compute the exact bounds of the target
  75  space using min/max and floor/ceil.
  76
  77  For a dense source space, we take the transformation matrix, decompose it
  78  into a lower triangular part (H) and a unimodular part (U).
  79  We then compute the auxiliary space from the unimodular part (source loop
  80  nest . U = auxiliary space) , which has two important properties:
  81   1. It traverses the iterations in the same lexicographic order as the source
  82   space.
  83   2. It is a dense space when the source is a dense space (even if the target
  84   space is going to be sparse).
  85
  86  Given the auxiliary space, we use the lower triangular part to compute the
  87  bounds in the target space by simple matrix multiplication.
  88  The gaps in the target space (IE the new loop step sizes) will be the
  89  diagonals of the H matrix.
  90
  91  Sparse source spaces require another step, because you can't directly compute
  92  the exact bounds of the auxiliary and target space from the sparse space.
  93  Rather than try to come up with a separate algorithm to handle sparse source
  94  spaces directly, we just find a legal transformation matrix that gives you
  95  the sparse source space, from a dense space, and then transform the dense
  96  space.
  97
  98  For a regular sparse space, you can represent the source space as an integer
  99  lattice, and the base space of that lattice will always be dense.  Thus, we
 100  effectively use the lattice to figure out the transformation from the lattice
 101  base space, to the sparse iteration space (IE what transform was applied to
 102  the dense space to make it sparse).  We then compose this transform with the
 103  transformation matrix specified by the user (since our matrix transformations
 104  are closed under composition, this is okay).  We can then use the base space
 105  (which is dense) plus the composed transformation matrix, to compute the rest
 106  of the transform using the dense space algorithm above.
 107
 108  In other words, our sparse source space (B) is decomposed into a dense base
 109  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 110  We then compute the composition of L and the user transformation matrix (T),
 111  so that T is now a transform from A to the result, instead of from B to the
 112  result.
 113  IE A.(LT) = result instead of B.T = result
 114  Since A is now a dense source space, we can use the dense source space
 115  algorithm above to compute the result of applying transform (LT) to A.
 116
 117  Fourier-Motzkin elimination is used to compute the bounds of the base space
 118  of the lattice.  */
 119
 120 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 121                              VEC(tree,heap) *, VEC(int,heap) *,
 122                              VEC(tree,heap) *);
 123 /* Lattice stuff that is internal to the code generation algorithm.  */
 124
 125 typedef struct lambda_lattice_s
 126 {
 127   /* Lattice base matrix.  */
 128   lambda_matrix base;
 129   /* Lattice dimension.  */
 130   int dimension;
 131   /* Origin vector for the coefficients.  */
 132   lambda_vector origin;
 133   /* Origin matrix for the invariants.  */
 134   lambda_matrix origin_invariants;
 135   /* Number of invariants.  */
 136   int invariants;
 137 } *lambda_lattice;
 138
 139 #define LATTICE_BASE(T) ((T)->base)
 140 #define LATTICE_DIMENSION(T) ((T)->dimension)
 141 #define LATTICE_ORIGIN(T) ((T)->origin)
 142 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 143 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 144
 145 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 146                        int, int);
 147 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 148 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 149                                                    struct obstack *);
 150
 151 static bool can_convert_to_perfect_nest (struct loop *);
 152
 153 /* Create a new lambda loop in LAMBDA_OBSTACK.  */
 154
 155 static lambda_loop
 156 lambda_loop_new (struct obstack * lambda_obstack)
 157 {
 158   lambda_loop result = (lambda_loop)
 159     obstack_alloc (lambda_obstack, sizeof (struct lambda_loop_s));
 160   memset (result, 0, sizeof (struct lambda_loop_s));
 161   return result;
 162 }
 163
 164 /* Create a new lambda body vector.  */
 165
 166 lambda_body_vector
 167 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 168 {
 169   lambda_body_vector ret;
 170
 171   ret = (lambda_body_vector) obstack_alloc (lambda_obstack,
 172                                             sizeof (*ret));
 173   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 174   LBV_SIZE (ret) = size;
 175   LBV_DENOMINATOR (ret) = 1;
 176   return ret;
 177 }
 178
 179 /* Compute the new coefficients for the vector based on the
 180   *inverse* of the transformation matrix.  */
 181
 182 lambda_body_vector
 183 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 184                                 lambda_body_vector vect,
 185                                 struct obstack * lambda_obstack)
 186 {
 187   lambda_body_vector temp;
 188   int depth;
 189
 190   /* Make sure the matrix is square.  */
 191   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 192
 193   depth = LTM_ROWSIZE (transform);
 194
 195   temp = lambda_body_vector_new (depth, lambda_obstack);
 196   LBV_DENOMINATOR (temp) =
 197     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 198   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 199                              LTM_MATRIX (transform), depth,
 200                              LBV_COEFFICIENTS (temp));
 201   LBV_SIZE (temp) = LBV_SIZE (vect);
 202   return temp;
 203 }
 204
 205 /* Print out a lambda body vector.  */
 206
 207 void
 208 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 209 {
 210   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 211 }
 212
 213 /* Return TRUE if two linear expressions are equal.  */
 214
 215 static bool
 216 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 217            int depth, int invariants)
 218 {
 219   int i;
 220
 221   if (lle1 == NULL || lle2 == NULL)
 222     return false;
 223   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 224     return false;
 225   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 226     return false;
 227   for (i = 0; i < depth; i++)
 228     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 229       return false;
 230   for (i = 0; i < invariants; i++)
 231     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 232         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 233       return false;
 234   return true;
 235 }
 236
 237 /* Create a new linear expression with dimension DIM, and total number
 238    of invariants INVARIANTS.  */
 239
 240 lambda_linear_expression
 241 lambda_linear_expression_new (int dim, int invariants,
 242                               struct obstack * lambda_obstack)
 243 {
 244   lambda_linear_expression ret;
 245
 246   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 247                                                  sizeof (*ret));
 248   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 249   LLE_CONSTANT (ret) = 0;
 250   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 251   LLE_DENOMINATOR (ret) = 1;
 252   LLE_NEXT (ret) = NULL;
 253
 254   return ret;
 255 }
 256
 257 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 258    The starting letter used for variable names is START.  */
 259
 260 static void
 261 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 262                          char start)
 263 {
 264   int i;
 265   bool first = true;
 266   for (i = 0; i < size; i++)
 267     {
 268       if (expr[i] != 0)
 269         {
 270           if (first)
 271             {
 272               if (expr[i] < 0)
 273                 fprintf (outfile, "-");
 274               first = false;
 275             }
 276           else if (expr[i] > 0)
 277             fprintf (outfile, " + ");
 278           else
 279             fprintf (outfile, " - ");
 280           if (abs (expr[i]) == 1)
 281             fprintf (outfile, "%c", start + i);
 282           else
 283             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 284         }
 285     }
 286 }
 287
 288 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 289    depth/number of coefficients is given by DEPTH, the number of invariants is
 290    given by INVARIANTS, and the character to start variable names with is given
 291    by START.  */
 292
 293 void
 294 print_lambda_linear_expression (FILE * outfile,
 295                                 lambda_linear_expression expr,
 296                                 int depth, int invariants, char start)
 297 {
 298   fprintf (outfile, "\tLinear expression: ");
 299   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 300   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 301   fprintf (outfile, "  invariants: ");
 302   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 303                            invariants, 'A');
 304   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 305 }
 306
 307 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 308    coefficients is given by DEPTH, the number of invariants is
 309    given by INVARIANTS, and the character to start variable names with is given
 310    by START.  */
 311
 312 void
 313 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 314                    int invariants, char start)
 315 {
 316   int step;
 317   lambda_linear_expression expr;
 318
 319   gcc_assert (loop);
 320
 321   expr = LL_LINEAR_OFFSET (loop);
 322   step = LL_STEP (loop);
 323   fprintf (outfile, "  step size = %d \n", step);
 324
 325   if (expr)
 326     {
 327       fprintf (outfile, "  linear offset: \n");
 328       print_lambda_linear_expression (outfile, expr, depth, invariants,
 329                                       start);
 330     }
 331
 332   fprintf (outfile, "  lower bound: \n");
 333   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 334     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 335   fprintf (outfile, "  upper bound: \n");
 336   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 337     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 338 }
 339
 340 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 341    number of invariants.  */
 342
 343 lambda_loopnest
 344 lambda_loopnest_new (int depth, int invariants,
 345                      struct obstack * lambda_obstack)
 346 {
 347   lambda_loopnest ret;
 348   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 349
 350   LN_LOOPS (ret) = (lambda_loop *)
 351       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 352   LN_DEPTH (ret) = depth;
 353   LN_INVARIANTS (ret) = invariants;
 354
 355   return ret;
 356 }
 357
 358 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 359    character to use for loop names is given by START.  */
 360
 361 void
 362 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 363 {
 364   int i;
 365   for (i = 0; i < LN_DEPTH (nest); i++)
 366     {
 367       fprintf (outfile, "Loop %c\n", start + i);
 368       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 369                          LN_INVARIANTS (nest), 'i');
 370       fprintf (outfile, "\n");
 371     }
 372 }
 373
 374 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 375    of invariants.  */
 376
 377 static lambda_lattice
 378 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 379 {
 380   lambda_lattice ret
 381       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 382   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth, lambda_obstack);
 383   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 384   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants,
 385                                                        lambda_obstack);
 386   LATTICE_DIMENSION (ret) = depth;
 387   LATTICE_INVARIANTS (ret) = invariants;
 388   return ret;
 389 }
 390
 391 /* Compute the lattice base for NEST.  The lattice base is essentially a
 392    non-singular transform from a dense base space to a sparse iteration space.
 393    We use it so that we don't have to specially handle the case of a sparse
 394    iteration space in other parts of the algorithm.  As a result, this routine
 395    only does something interesting (IE produce a matrix that isn't the
 396    identity matrix) if NEST is a sparse space.  */
 397
 398 static lambda_lattice
 399 lambda_lattice_compute_base (lambda_loopnest nest,
 400                              struct obstack * lambda_obstack)
 401 {
 402   lambda_lattice ret;
 403   int depth, invariants;
 404   lambda_matrix base;
 405
 406   int i, j, step;
 407   lambda_loop loop;
 408   lambda_linear_expression expression;
 409
 410   depth = LN_DEPTH (nest);
 411   invariants = LN_INVARIANTS (nest);
 412
 413   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 414   base = LATTICE_BASE (ret);
 415   for (i = 0; i < depth; i++)
 416     {
 417       loop = LN_LOOPS (nest)[i];
 418       gcc_assert (loop);
 419       step = LL_STEP (loop);
 420       /* If we have a step of 1, then the base is one, and the
 421          origin and invariant coefficients are 0.  */
 422       if (step == 1)
 423         {
 424           for (j = 0; j < depth; j++)
 425             base[i][j] = 0;
 426           base[i][i] = 1;
 427           LATTICE_ORIGIN (ret)[i] = 0;
 428           for (j = 0; j < invariants; j++)
 429             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 430         }
 431       else
 432         {
 433           /* Otherwise, we need the lower bound expression (which must
 434              be an affine function)  to determine the base.  */
 435           expression = LL_LOWER_BOUND (loop);
 436           gcc_assert (expression && !LLE_NEXT (expression)
 437                       && LLE_DENOMINATOR (expression) == 1);
 438
 439           /* The lower triangular portion of the base is going to be the
 440              coefficient times the step */
 441           for (j = 0; j < i; j++)
 442             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 443               * LL_STEP (LN_LOOPS (nest)[j]);
 444           base[i][i] = step;
 445           for (j = i + 1; j < depth; j++)
 446             base[i][j] = 0;
 447
 448           /* Origin for this loop is the constant of the lower bound
 449              expression.  */
 450           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 451
 452           /* Coefficient for the invariants are equal to the invariant
 453              coefficients in the expression.  */
 454           for (j = 0; j < invariants; j++)
 455             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 456               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 457         }
 458     }
 459   return ret;
 460 }
 461
 462 /* Compute the least common multiple of two numbers A and B .  */
 463
 464 int
 465 least_common_multiple (int a, int b)
 466 {
 467   return (abs (a) * abs (b) / gcd (a, b));
 468 }
 469
 470 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 471    auxiliary nest.
 472    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 473    it is easy to calculate the answer and bounds.
 474    A sketch of how it works:
 475    Given a system of linear inequalities, ai * xj >= bk, you can always
 476    rewrite the constraints so they are all of the form
 477    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 478    in b1 ... bk, and some a in a1...ai)
 479    You can then eliminate this x from the non-constant inequalities by
 480    rewriting these as a <= b, x >= constant, and delete the x variable.
 481    You can then repeat this for any remaining x variables, and then we have
 482    an easy to use variable <= constant (or no variables at all) form that we
 483    can construct our bounds from.
 484
 485    In our case, each time we eliminate, we construct part of the bound from
 486    the ith variable, then delete the ith variable.
 487
 488    Remember the constant are in our vector a, our coefficient matrix is A,
 489    and our invariant coefficient matrix is B.
 490
 491    SIZE is the size of the matrices being passed.
 492    DEPTH is the loop nest depth.
 493    INVARIANTS is the number of loop invariants.
 494    A, B, and a are the coefficient matrix, invariant coefficient, and a
 495    vector of constants, respectively.  */
 496
 497 static lambda_loopnest
 498 compute_nest_using_fourier_motzkin (int size,
 499                                     int depth,
 500                                     int invariants,
 501                                     lambda_matrix A,
 502                                     lambda_matrix B,
 503                                     lambda_vector a,
 504                                     struct obstack * lambda_obstack)
 505 {
 506
 507   int multiple, f1, f2;
 508   int i, j, k;
 509   lambda_linear_expression expression;
 510   lambda_loop loop;
 511   lambda_loopnest auxillary_nest;
 512   lambda_matrix swapmatrix, A1, B1;
 513   lambda_vector swapvector, a1;
 514   int newsize;
 515
 516   A1 = lambda_matrix_new (128, depth, lambda_obstack);
 517   B1 = lambda_matrix_new (128, invariants, lambda_obstack);
 518   a1 = lambda_vector_new (128);
 519
 520   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 521
 522   for (i = depth - 1; i >= 0; i--)
 523     {
 524       loop = lambda_loop_new (lambda_obstack);
 525       LN_LOOPS (auxillary_nest)[i] = loop;
 526       LL_STEP (loop) = 1;
 527
 528       for (j = 0; j < size; j++)
 529         {
 530           if (A[j][i] < 0)
 531             {
 532               /* Any linear expression in the matrix with a coefficient less
 533                  than 0 becomes part of the new lower bound.  */
 534               expression = lambda_linear_expression_new (depth, invariants,
 535                                                          lambda_obstack);
 536
 537               for (k = 0; k < i; k++)
 538                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 539
 540               for (k = 0; k < invariants; k++)
 541                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 542
 543               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 544               LLE_CONSTANT (expression) = -1 * a[j];
 545
 546               /* Ignore if identical to the existing lower bound.  */
 547               if (!lle_equal (LL_LOWER_BOUND (loop),
 548                               expression, depth, invariants))
 549                 {
 550                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 551                   LL_LOWER_BOUND (loop) = expression;
 552                 }
 553
 554             }
 555           else if (A[j][i] > 0)
 556             {
 557               /* Any linear expression with a coefficient greater than 0
 558                  becomes part of the new upper bound.  */
 559               expression = lambda_linear_expression_new (depth, invariants,
 560                                                          lambda_obstack);
 561               for (k = 0; k < i; k++)
 562                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 563
 564               for (k = 0; k < invariants; k++)
 565                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 566
 567               LLE_DENOMINATOR (expression) = A[j][i];
 568               LLE_CONSTANT (expression) = a[j];
 569
 570               /* Ignore if identical to the existing upper bound.  */
 571               if (!lle_equal (LL_UPPER_BOUND (loop),
 572                               expression, depth, invariants))
 573                 {
 574                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 575                   LL_UPPER_BOUND (loop) = expression;
 576                 }
 577
 578             }
 579         }
 580
 581       /* This portion creates a new system of linear inequalities by deleting
 582          the i'th variable, reducing the system by one variable.  */
 583       newsize = 0;
 584       for (j = 0; j < size; j++)
 585         {
 586           /* If the coefficient for the i'th variable is 0, then we can just
 587              eliminate the variable straightaway.  Otherwise, we have to
 588              multiply through by the coefficients we are eliminating.  */
 589           if (A[j][i] == 0)
 590             {
 591               lambda_vector_copy (A[j], A1[newsize], depth);
 592               lambda_vector_copy (B[j], B1[newsize], invariants);
 593               a1[newsize] = a[j];
 594               newsize++;
 595             }
 596           else if (A[j][i] > 0)
 597             {
 598               for (k = 0; k < size; k++)
 599                 {
 600                   if (A[k][i] < 0)
 601                     {
 602                       multiple = least_common_multiple (A[j][i], A[k][i]);
 603                       f1 = multiple / A[j][i];
 604                       f2 = -1 * multiple / A[k][i];
 605
 606                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 607                                             A1[newsize], depth);
 608                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 609                                             B1[newsize], invariants);
 610                       a1[newsize] = f1 * a[j] + f2 * a[k];
 611                       newsize++;
 612                     }
 613                 }
 614             }
 615         }
 616
 617       swapmatrix = A;
 618       A = A1;
 619       A1 = swapmatrix;
 620
 621       swapmatrix = B;
 622       B = B1;
 623       B1 = swapmatrix;
 624
 625       swapvector = a;
 626       a = a1;
 627       a1 = swapvector;
 628
 629       size = newsize;
 630     }
 631
 632   return auxillary_nest;
 633 }
 634
 635 /* Compute the loop bounds for the auxiliary space NEST.
 636    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 637    Given the original nest, this function will
 638    1. Convert the nest into matrix form, which consists of a matrix for the
 639    coefficients, a matrix for the
 640    invariant coefficients, and a vector for the constants.
 641    2. Use the matrix form to calculate the lattice base for the nest (which is
 642    a dense space)
 643    3. Compose the dense space transform with the user specified transform, to
 644    get a transform we can easily calculate transformed bounds for.
 645    4. Multiply the composed transformation matrix times the matrix form of the
 646    loop.
 647    5. Transform the newly created matrix (from step 4) back into a loop nest
 648    using Fourier-Motzkin elimination to figure out the bounds.  */
 649
 650 static lambda_loopnest
 651 lambda_compute_auxillary_space (lambda_loopnest nest,
 652                                 lambda_trans_matrix trans,
 653                                 struct obstack * lambda_obstack)
 654 {
 655   lambda_matrix A, B, A1, B1;
 656   lambda_vector a, a1;
 657   lambda_matrix invertedtrans;
 658   int depth, invariants, size;
 659   int i, j;
 660   lambda_loop loop;
 661   lambda_linear_expression expression;
 662   lambda_lattice lattice;
 663
 664   depth = LN_DEPTH (nest);
 665   invariants = LN_INVARIANTS (nest);
 666
 667   /* Unfortunately, we can't know the number of constraints we'll have
 668      ahead of time, but this should be enough even in ridiculous loop nest
 669      cases. We must not go over this limit.  */
 670   A = lambda_matrix_new (128, depth, lambda_obstack);
 671   B = lambda_matrix_new (128, invariants, lambda_obstack);
 672   a = lambda_vector_new (128);
 673
 674   A1 = lambda_matrix_new (128, depth, lambda_obstack);
 675   B1 = lambda_matrix_new (128, invariants, lambda_obstack);
 676   a1 = lambda_vector_new (128);
 677
 678   /* Store the bounds in the equation matrix A, constant vector a, and
 679      invariant matrix B, so that we have Ax <= a + B.
 680      This requires a little equation rearranging so that everything is on the
 681      correct side of the inequality.  */
 682   size = 0;
 683   for (i = 0; i < depth; i++)
 684     {
 685       loop = LN_LOOPS (nest)[i];
 686
 687       /* First we do the lower bound.  */
 688       if (LL_STEP (loop) > 0)
 689         expression = LL_LOWER_BOUND (loop);
 690       else
 691         expression = LL_UPPER_BOUND (loop);
 692
 693       for (; expression != NULL; expression = LLE_NEXT (expression))
 694         {
 695           /* Fill in the coefficient.  */
 696           for (j = 0; j < i; j++)
 697             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 698
 699           /* And the invariant coefficient.  */
 700           for (j = 0; j < invariants; j++)
 701             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 702
 703           /* And the constant.  */
 704           a[size] = LLE_CONSTANT (expression);
 705
 706           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 707              constants and single variables on   */
 708           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 709           a[size] *= -1;
 710           for (j = 0; j < invariants; j++)
 711             B[size][j] *= -1;
 712
 713           size++;
 714           /* Need to increase matrix sizes above.  */
 715           gcc_assert (size <= 127);
 716
 717         }
 718
 719       /* Then do the exact same thing for the upper bounds.  */
 720       if (LL_STEP (loop) > 0)
 721         expression = LL_UPPER_BOUND (loop);
 722       else
 723         expression = LL_LOWER_BOUND (loop);
 724
 725       for (; expression != NULL; expression = LLE_NEXT (expression))
 726         {
 727           /* Fill in the coefficient.  */
 728           for (j = 0; j < i; j++)
 729             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 730
 731           /* And the invariant coefficient.  */
 732           for (j = 0; j < invariants; j++)
 733             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 734
 735           /* And the constant.  */
 736           a[size] = LLE_CONSTANT (expression);
 737
 738           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 739           for (j = 0; j < i; j++)
 740             A[size][j] *= -1;
 741           A[size][i] = LLE_DENOMINATOR (expression);
 742           size++;
 743           /* Need to increase matrix sizes above.  */
 744           gcc_assert (size <= 127);
 745
 746         }
 747     }
 748
 749   /* Compute the lattice base x = base * y + origin, where y is the
 750      base space.  */
 751   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 752
 753   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 754
 755   /* A1 = A * L */
 756   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 757
 758   /* a1 = a - A * origin constant.  */
 759   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 760   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 761
 762   /* B1 = B - A * origin invariant.  */
 763   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 764                       invariants);
 765   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 766
 767   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 768      it by A1, then performing Fourier-Motzkin.  */
 769
 770   invertedtrans = lambda_matrix_new (depth, depth, lambda_obstack);
 771
 772   /* Compute the inverse of U.  */
 773   lambda_matrix_inverse (LTM_MATRIX (trans),
 774                          invertedtrans, depth, lambda_obstack);
 775
 776   /* A = A1 inv(U).  */
 777   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 778
 779   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 780                                              A, B1, a1, lambda_obstack);
 781 }
 782
 783 /* Compute the loop bounds for the target space, using the bounds of
 784    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 785    The target space loop bounds are computed by multiplying the triangular
 786    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 787    the loop steps (positive or negative) is then used to swap the bounds if
 788    the loop counts downwards.
 789    Return the target loopnest.  */
 790
 791 static lambda_loopnest
 792 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 793                              lambda_trans_matrix H, lambda_vector stepsigns,
 794                              struct obstack * lambda_obstack)
 795 {
 796   lambda_matrix inverse, H1;
 797   int determinant, i, j;
 798   int gcd1, gcd2;
 799   int factor;
 800
 801   lambda_loopnest target_nest;
 802   int depth, invariants;
 803   lambda_matrix target;
 804
 805   lambda_loop auxillary_loop, target_loop;
 806   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 807
 808   depth = LN_DEPTH (auxillary_nest);
 809   invariants = LN_INVARIANTS (auxillary_nest);
 810
 811   inverse = lambda_matrix_new (depth, depth, lambda_obstack);
 812   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth,
 813                                        lambda_obstack);
 814
 815   /* H1 is H excluding its diagonal.  */
 816   H1 = lambda_matrix_new (depth, depth, lambda_obstack);
 817   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 818
 819   for (i = 0; i < depth; i++)
 820     H1[i][i] = 0;
 821
 822   /* Computes the linear offsets of the loop bounds.  */
 823   target = lambda_matrix_new (depth, depth, lambda_obstack);
 824   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 825
 826   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 827
 828   for (i = 0; i < depth; i++)
 829     {
 830
 831       /* Get a new loop structure.  */
 832       target_loop = lambda_loop_new (lambda_obstack);
 833       LN_LOOPS (target_nest)[i] = target_loop;
 834
 835       /* Computes the gcd of the coefficients of the linear part.  */
 836       gcd1 = lambda_vector_gcd (target[i], i);
 837
 838       /* Include the denominator in the GCD.  */
 839       gcd1 = gcd (gcd1, determinant);
 840
 841       /* Now divide through by the gcd.  */
 842       for (j = 0; j < i; j++)
 843         target[i][j] = target[i][j] / gcd1;
 844
 845       expression = lambda_linear_expression_new (depth, invariants,
 846                                                  lambda_obstack);
 847       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 848       LLE_DENOMINATOR (expression) = determinant / gcd1;
 849       LLE_CONSTANT (expression) = 0;
 850       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 851                            invariants);
 852       LL_LINEAR_OFFSET (target_loop) = expression;
 853     }
 854
 855   /* For each loop, compute the new bounds from H.  */
 856   for (i = 0; i < depth; i++)
 857     {
 858       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 859       target_loop = LN_LOOPS (target_nest)[i];
 860       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 861       factor = LTM_MATRIX (H)[i][i];
 862
 863       /* First we do the lower bound.  */
 864       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 865
 866       for (; auxillary_expr != NULL;
 867            auxillary_expr = LLE_NEXT (auxillary_expr))
 868         {
 869           target_expr = lambda_linear_expression_new (depth, invariants,
 870                                                       lambda_obstack);
 871           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 872                                      depth, inverse, depth,
 873                                      LLE_COEFFICIENTS (target_expr));
 874           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 875                                     LLE_COEFFICIENTS (target_expr), depth,
 876                                     factor);
 877
 878           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 879           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 880                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 881                               invariants);
 882           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 883                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 884                                     invariants, factor);
 885           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 886
 887           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 888             {
 889               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 890                 * determinant;
 891               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 892                                         (target_expr),
 893                                         LLE_INVARIANT_COEFFICIENTS
 894                                         (target_expr), invariants,
 895                                         determinant);
 896               LLE_DENOMINATOR (target_expr) =
 897                 LLE_DENOMINATOR (target_expr) * determinant;
 898             }
 899           /* Find the gcd and divide by it here, rather than doing it
 900              at the tree level.  */
 901           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 902           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 903                                     invariants);
 904           gcd1 = gcd (gcd1, gcd2);
 905           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 906           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 907           for (j = 0; j < depth; j++)
 908             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 909           for (j = 0; j < invariants; j++)
 910             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 911           LLE_CONSTANT (target_expr) /= gcd1;
 912           LLE_DENOMINATOR (target_expr) /= gcd1;
 913           /* Ignore if identical to existing bound.  */
 914           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 915                           invariants))
 916             {
 917               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 918               LL_LOWER_BOUND (target_loop) = target_expr;
 919             }
 920         }
 921       /* Now do the upper bound.  */
 922       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 923
 924       for (; auxillary_expr != NULL;
 925            auxillary_expr = LLE_NEXT (auxillary_expr))
 926         {
 927           target_expr = lambda_linear_expression_new (depth, invariants,
 928                                                       lambda_obstack);
 929           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 930                                      depth, inverse, depth,
 931                                      LLE_COEFFICIENTS (target_expr));
 932           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 933                                     LLE_COEFFICIENTS (target_expr), depth,
 934                                     factor);
 935           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 936           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 937                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 938                               invariants);
 939           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 940                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 941                                     invariants, factor);
 942           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 943
 944           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 945             {
 946               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 947                 * determinant;
 948               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 949                                         (target_expr),
 950                                         LLE_INVARIANT_COEFFICIENTS
 951                                         (target_expr), invariants,
 952                                         determinant);
 953               LLE_DENOMINATOR (target_expr) =
 954                 LLE_DENOMINATOR (target_expr) * determinant;
 955             }
 956           /* Find the gcd and divide by it here, instead of at the
 957              tree level.  */
 958           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 959           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 960                                     invariants);
 961           gcd1 = gcd (gcd1, gcd2);
 962           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 963           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 964           for (j = 0; j < depth; j++)
 965             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 966           for (j = 0; j < invariants; j++)
 967             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 968           LLE_CONSTANT (target_expr) /= gcd1;
 969           LLE_DENOMINATOR (target_expr) /= gcd1;
 970           /* Ignore if equal to existing bound.  */
 971           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 972                           invariants))
 973             {
 974               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 975               LL_UPPER_BOUND (target_loop) = target_expr;
 976             }
 977         }
 978     }
 979   for (i = 0; i < depth; i++)
 980     {
 981       target_loop = LN_LOOPS (target_nest)[i];
 982       /* If necessary, exchange the upper and lower bounds and negate
 983          the step size.  */
 984       if (stepsigns[i] < 0)
 985         {
 986           LL_STEP (target_loop) *= -1;
 987           tmp_expr = LL_LOWER_BOUND (target_loop);
 988           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 989           LL_UPPER_BOUND (target_loop) = tmp_expr;
 990         }
 991     }
 992   return target_nest;
 993 }
 994
 995 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 996    result.  */
 997
 998 static lambda_vector
 999 lambda_compute_step_signs (lambda_trans_matrix trans,
1000                            lambda_vector stepsigns,
1001                            struct obstack * lambda_obstack)
1002 {
1003   lambda_matrix matrix, H;
1004   int size;
1005   lambda_vector newsteps;
1006   int i, j, factor, minimum_column;
1007   int temp;
1008
1009   matrix = LTM_MATRIX (trans);
1010   size = LTM_ROWSIZE (trans);
1011   H = lambda_matrix_new (size, size, lambda_obstack);
1012
1013   newsteps = lambda_vector_new (size);
1014   lambda_vector_copy (stepsigns, newsteps, size);
1015
1016   lambda_matrix_copy (matrix, H, size, size);
1017
1018   for (j = 0; j < size; j++)
1019     {
1020       lambda_vector row;
1021       row = H[j];
1022       for (i = j; i < size; i++)
1023         if (row[i] < 0)
1024           lambda_matrix_col_negate (H, size, i);
1025       while (lambda_vector_first_nz (row, size, j + 1) < size)
1026         {
1027           minimum_column = lambda_vector_min_nz (row, size, j);
1028           lambda_matrix_col_exchange (H, size, j, minimum_column);
1029
1030           temp = newsteps[j];
1031           newsteps[j] = newsteps[minimum_column];
1032           newsteps[minimum_column] = temp;
1033
1034           for (i = j + 1; i < size; i++)
1035             {
1036               factor = row[i] / row[j];
1037               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1038             }
1039         }
1040     }
1041   return newsteps;
1042 }
1043
1044 /* Transform NEST according to TRANS, and return the new loopnest.
1045    This involves
1046    1. Computing a lattice base for the transformation
1047    2. Composing the dense base with the specified transformation (TRANS)
1048    3. Decomposing the combined transformation into a lower triangular portion,
1049    and a unimodular portion.
1050    4. Computing the auxiliary nest using the unimodular portion.
1051    5. Computing the target nest using the auxiliary nest and the lower
1052    triangular portion.  */
1053
1054 lambda_loopnest
1055 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1056                            struct obstack * lambda_obstack)
1057 {
1058   lambda_loopnest auxillary_nest, target_nest;
1059
1060   int depth, invariants;
1061   int i, j;
1062   lambda_lattice lattice;
1063   lambda_trans_matrix trans1, H, U;
1064   lambda_loop loop;
1065   lambda_linear_expression expression;
1066   lambda_vector origin;
1067   lambda_matrix origin_invariants;
1068   lambda_vector stepsigns;
1069   int f;
1070
1071   depth = LN_DEPTH (nest);
1072   invariants = LN_INVARIANTS (nest);
1073
1074   /* Keep track of the signs of the loop steps.  */
1075   stepsigns = lambda_vector_new (depth);
1076   for (i = 0; i < depth; i++)
1077     {
1078       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1079         stepsigns[i] = 1;
1080       else
1081         stepsigns[i] = -1;
1082     }
1083
1084   /* Compute the lattice base.  */
1085   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1086   trans1 = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1087
1088   /* Multiply the transformation matrix by the lattice base.  */
1089
1090   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1091                       LTM_MATRIX (trans1), depth, depth, depth);
1092
1093   /* Compute the Hermite normal form for the new transformation matrix.  */
1094   H = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1095   U = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1096   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1097                          LTM_MATRIX (U));
1098
1099   /* Compute the auxiliary loop nest's space from the unimodular
1100      portion.  */
1101   auxillary_nest = lambda_compute_auxillary_space (nest, U,
1102                                                    lambda_obstack);
1103
1104   /* Compute the loop step signs from the old step signs and the
1105      transformation matrix.  */
1106   stepsigns = lambda_compute_step_signs (trans1, stepsigns,
1107                                          lambda_obstack);
1108
1109   /* Compute the target loop nest space from the auxiliary nest and
1110      the lower triangular matrix H.  */
1111   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1112                                              lambda_obstack);
1113   origin = lambda_vector_new (depth);
1114   origin_invariants = lambda_matrix_new (depth, invariants, lambda_obstack);
1115   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1116                              LATTICE_ORIGIN (lattice), origin);
1117   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1118                       origin_invariants, depth, depth, invariants);
1119
1120   for (i = 0; i < depth; i++)
1121     {
1122       loop = LN_LOOPS (target_nest)[i];
1123       expression = LL_LINEAR_OFFSET (loop);
1124       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1125         f = 1;
1126       else
1127         f = LLE_DENOMINATOR (expression);
1128
1129       LLE_CONSTANT (expression) += f * origin[i];
1130
1131       for (j = 0; j < invariants; j++)
1132         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1133           f * origin_invariants[i][j];
1134     }
1135
1136   return target_nest;
1137
1138 }
1139
1140 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1141    return the new expression.  DEPTH is the depth of the loopnest.
1142    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1143    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1144    is the amount we have to add/subtract from the expression because of the
1145    type of comparison it is used in.  */
1146
1147 static lambda_linear_expression
1148 gcc_tree_to_linear_expression (int depth, tree expr,
1149                                VEC(tree,heap) *outerinductionvars,
1150                                VEC(tree,heap) *invariants, int extra,
1151                                struct obstack * lambda_obstack)
1152 {
1153   lambda_linear_expression lle = NULL;
1154   switch (TREE_CODE (expr))
1155     {
1156     case INTEGER_CST:
1157       {
1158         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1159         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1160         if (extra != 0)
1161           LLE_CONSTANT (lle) += extra;
1162
1163         LLE_DENOMINATOR (lle) = 1;
1164       }
1165       break;
1166     case SSA_NAME:
1167       {
1168         tree iv, invar;
1169         size_t i;
1170         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1171           if (iv != NULL)
1172             {
1173               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1174                 {
1175                   lle = lambda_linear_expression_new (depth, 2 * depth,
1176                                                       lambda_obstack);
1177                   LLE_COEFFICIENTS (lle)[i] = 1;
1178                   if (extra != 0)
1179                     LLE_CONSTANT (lle) = extra;
1180
1181                   LLE_DENOMINATOR (lle) = 1;
1182                 }
1183             }
1184         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1185           if (invar != NULL)
1186             {
1187               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1188                 {
1189                   lle = lambda_linear_expression_new (depth, 2 * depth,
1190                                                       lambda_obstack);
1191                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1192                   if (extra != 0)
1193                     LLE_CONSTANT (lle) = extra;
1194                   LLE_DENOMINATOR (lle) = 1;
1195                 }
1196             }
1197       }
1198       break;
1199     default:
1200       return NULL;
1201     }
1202
1203   return lle;
1204 }
1205
1206 /* Return the depth of the loopnest NEST */
1207
1208 static int
1209 depth_of_nest (struct loop *nest)
1210 {
1211   size_t depth = 0;
1212   while (nest)
1213     {
1214       depth++;
1215       nest = nest->inner;
1216     }
1217   return depth;
1218 }
1219
1220
1221 /* Return true if OP is invariant in LOOP and all outer loops.  */
1222
1223 static bool
1224 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1225 {
1226   if (is_gimple_min_invariant (op))
1227     return true;
1228   if (loop_depth (loop) == 0)
1229     return true;
1230   if (!expr_invariant_in_loop_p (loop, op))
1231     return false;
1232   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1233     return false;
1234   return true;
1235 }
1236
1237 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1238    or NULL if it could not be converted.
1239    DEPTH is the depth of the loop.
1240    INVARIANTS is a pointer to the array of loop invariants.
1241    The induction variable for this loop should be stored in the parameter
1242    OURINDUCTIONVAR.
1243    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1244
1245 static lambda_loop
1246 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1247                          VEC(tree,heap) ** invariants,
1248                          tree * ourinductionvar,
1249                          VEC(tree,heap) * outerinductionvars,
1250                          VEC(tree,heap) ** lboundvars,
1251                          VEC(tree,heap) ** uboundvars,
1252                          VEC(int,heap) ** steps,
1253                          struct obstack * lambda_obstack)
1254 {
1255   gimple phi;
1256   gimple exit_cond;
1257   tree access_fn, inductionvar;
1258   tree step;
1259   lambda_loop lloop = NULL;
1260   lambda_linear_expression lbound, ubound;
1261   tree test_lhs, test_rhs;
1262   int stepint;
1263   int extra = 0;
1264   tree lboundvar, uboundvar, uboundresult;
1265
1266   /* Find out induction var and exit condition.  */
1267   inductionvar = find_induction_var_from_exit_cond (loop);
1268   exit_cond = get_loop_exit_condition (loop);
1269
1270   if (inductionvar == NULL || exit_cond == NULL)
1271     {
1272       if (dump_file && (dump_flags & TDF_DETAILS))
1273         fprintf (dump_file,
1274                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1275       return NULL;
1276     }
1277
1278   if (SSA_NAME_DEF_STMT (inductionvar) == NULL)
1279     {
1280
1281       if (dump_file && (dump_flags & TDF_DETAILS))
1282         fprintf (dump_file,
1283                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1284
1285       return NULL;
1286     }
1287
1288   phi = SSA_NAME_DEF_STMT (inductionvar);
1289   if (gimple_code (phi) != GIMPLE_PHI)
1290     {
1291       tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1292       if (!op)
1293         {
1294
1295           if (dump_file && (dump_flags & TDF_DETAILS))
1296             fprintf (dump_file,
1297                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1298
1299           return NULL;
1300         }
1301
1302       phi = SSA_NAME_DEF_STMT (op);
1303       if (gimple_code (phi) != GIMPLE_PHI)
1304         {
1305           if (dump_file && (dump_flags & TDF_DETAILS))
1306             fprintf (dump_file,
1307                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1308           return NULL;
1309         }
1310     }
1311
1312   /* The induction variable name/version we want to put in the array is the
1313      result of the induction variable phi node.  */
1314   *ourinductionvar = PHI_RESULT (phi);
1315   access_fn = instantiate_parameters
1316     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1317   if (access_fn == chrec_dont_know)
1318     {
1319       if (dump_file && (dump_flags & TDF_DETAILS))
1320         fprintf (dump_file,
1321                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1322
1323       return NULL;
1324     }
1325
1326   step = evolution_part_in_loop_num (access_fn, loop->num);
1327   if (!step || step == chrec_dont_know)
1328     {
1329       if (dump_file && (dump_flags & TDF_DETAILS))
1330         fprintf (dump_file,
1331                  "Unable to convert loop: Cannot determine step of loop.\n");
1332
1333       return NULL;
1334     }
1335   if (TREE_CODE (step) != INTEGER_CST)
1336     {
1337
1338       if (dump_file && (dump_flags & TDF_DETAILS))
1339         fprintf (dump_file,
1340                  "Unable to convert loop: Step of loop is not integer.\n");
1341       return NULL;
1342     }
1343
1344   stepint = TREE_INT_CST_LOW (step);
1345
1346   /* Only want phis for induction vars, which will have two
1347      arguments.  */
1348   if (gimple_phi_num_args (phi) != 2)
1349     {
1350       if (dump_file && (dump_flags & TDF_DETAILS))
1351         fprintf (dump_file,
1352                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1353       return NULL;
1354     }
1355
1356   /* Another induction variable check. One argument's source should be
1357      in the loop, one outside the loop.  */
1358   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)
1359       && flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 1)->src))
1360     {
1361
1362       if (dump_file && (dump_flags & TDF_DETAILS))
1363         fprintf (dump_file,
1364                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1365
1366       return NULL;
1367     }
1368
1369   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
1370     {
1371       lboundvar = PHI_ARG_DEF (phi, 1);
1372       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1373                                               outerinductionvars, *invariants,
1374                                               0, lambda_obstack);
1375     }
1376   else
1377     {
1378       lboundvar = PHI_ARG_DEF (phi, 0);
1379       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1380                                               outerinductionvars, *invariants,
1381                                               0, lambda_obstack);
1382     }
1383
1384   if (!lbound)
1385     {
1386
1387       if (dump_file && (dump_flags & TDF_DETAILS))
1388         fprintf (dump_file,
1389                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1390
1391       return NULL;
1392     }
1393   /* One part of the test may be a loop invariant tree.  */
1394   VEC_reserve (tree, heap, *invariants, 1);
1395   test_lhs = gimple_cond_lhs (exit_cond);
1396   test_rhs = gimple_cond_rhs (exit_cond);
1397
1398   if (TREE_CODE (test_rhs) == SSA_NAME
1399       && invariant_in_loop_and_outer_loops (loop, test_rhs))
1400     VEC_quick_push (tree, *invariants, test_rhs);
1401   else if (TREE_CODE (test_lhs) == SSA_NAME
1402            && invariant_in_loop_and_outer_loops (loop, test_lhs))
1403     VEC_quick_push (tree, *invariants, test_lhs);
1404
1405   /* The non-induction variable part of the test is the upper bound variable.
1406    */
1407   if (test_lhs == inductionvar)
1408     uboundvar = test_rhs;
1409   else
1410     uboundvar = test_lhs;
1411
1412   /* We only size the vectors assuming we have, at max, 2 times as many
1413      invariants as we do loops (one for each bound).
1414      This is just an arbitrary number, but it has to be matched against the
1415      code below.  */
1416   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1417
1418
1419   /* We might have some leftover.  */
1420   if (gimple_cond_code (exit_cond) == LT_EXPR)
1421     extra = -1 * stepint;
1422   else if (gimple_cond_code (exit_cond) == NE_EXPR)
1423     extra = -1 * stepint;
1424   else if (gimple_cond_code (exit_cond) == GT_EXPR)
1425     extra = -1 * stepint;
1426   else if (gimple_cond_code (exit_cond) == EQ_EXPR)
1427     extra = 1 * stepint;
1428
1429   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1430                                           outerinductionvars,
1431                                           *invariants, extra, lambda_obstack);
1432   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1433                          build_int_cst (TREE_TYPE (uboundvar), extra));
1434   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1435   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1436   VEC_safe_push (int, heap, *steps, stepint);
1437   if (!ubound)
1438     {
1439       if (dump_file && (dump_flags & TDF_DETAILS))
1440         fprintf (dump_file,
1441                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1442       return NULL;
1443     }
1444
1445   lloop = lambda_loop_new (lambda_obstack);
1446   LL_STEP (lloop) = stepint;
1447   LL_LOWER_BOUND (lloop) = lbound;
1448   LL_UPPER_BOUND (lloop) = ubound;
1449   return lloop;
1450 }
1451
1452 /* Given a LOOP, find the induction variable it is testing against in the exit
1453    condition.  Return the induction variable if found, NULL otherwise.  */
1454
1455 tree
1456 find_induction_var_from_exit_cond (struct loop *loop)
1457 {
1458   gimple expr = get_loop_exit_condition (loop);
1459   tree ivarop;
1460   tree test_lhs, test_rhs;
1461   if (expr == NULL)
1462     return NULL_TREE;
1463   if (gimple_code (expr) != GIMPLE_COND)
1464     return NULL_TREE;
1465   test_lhs = gimple_cond_lhs (expr);
1466   test_rhs = gimple_cond_rhs (expr);
1467
1468   /* Find the side that is invariant in this loop. The ivar must be the other
1469      side.  */
1470
1471   if (expr_invariant_in_loop_p (loop, test_lhs))
1472       ivarop = test_rhs;
1473   else if (expr_invariant_in_loop_p (loop, test_rhs))
1474       ivarop = test_lhs;
1475   else
1476     return NULL_TREE;
1477
1478   if (TREE_CODE (ivarop) != SSA_NAME)
1479     return NULL_TREE;
1480   return ivarop;
1481 }
1482
1483 DEF_VEC_P(lambda_loop);
1484 DEF_VEC_ALLOC_P(lambda_loop,heap);
1485
1486 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1487    Return the new loop nest.
1488    INDUCTIONVARS is a pointer to an array of induction variables for the
1489    loopnest that will be filled in during this process.
1490    INVARIANTS is a pointer to an array of invariants that will be filled in
1491    during this process.  */
1492
1493 lambda_loopnest
1494 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1495                                  VEC(tree,heap) **inductionvars,
1496                                  VEC(tree,heap) **invariants,
1497                                  struct obstack * lambda_obstack)
1498 {
1499   lambda_loopnest ret = NULL;
1500   struct loop *temp = loop_nest;
1501   int depth = depth_of_nest (loop_nest);
1502   size_t i;
1503   VEC(lambda_loop,heap) *loops = NULL;
1504   VEC(tree,heap) *uboundvars = NULL;
1505   VEC(tree,heap) *lboundvars  = NULL;
1506   VEC(int,heap) *steps = NULL;
1507   lambda_loop newloop;
1508   tree inductionvar = NULL;
1509   bool perfect_nest = perfect_nest_p (loop_nest);
1510
1511   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1512     goto fail;
1513
1514   while (temp)
1515     {
1516       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1517                                          &inductionvar, *inductionvars,
1518                                          &lboundvars, &uboundvars,
1519                                          &steps, lambda_obstack);
1520       if (!newloop)
1521         goto fail;
1522
1523       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1524       VEC_safe_push (lambda_loop, heap, loops, newloop);
1525       temp = temp->inner;
1526     }
1527
1528   if (!perfect_nest)
1529     {
1530       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1531                             *inductionvars))
1532         {
1533           if (dump_file)
1534             fprintf (dump_file,
1535                      "Not a perfect loop nest and couldn't convert to one.\n");
1536           goto fail;
1537         }
1538       else if (dump_file)
1539         fprintf (dump_file,
1540                  "Successfully converted loop nest to perfect loop nest.\n");
1541     }
1542
1543   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1544
1545   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1546     LN_LOOPS (ret)[i] = newloop;
1547
1548  fail:
1549   VEC_free (lambda_loop, heap, loops);
1550   VEC_free (tree, heap, uboundvars);
1551   VEC_free (tree, heap, lboundvars);
1552   VEC_free (int, heap, steps);
1553
1554   return ret;
1555 }
1556
1557 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1558    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1559    inserted for us are stored.  INDUCTION_VARS is the array of induction
1560    variables for the loop this LBV is from.  TYPE is the tree type to use for
1561    the variables and trees involved.  */
1562
1563 static tree
1564 lbv_to_gcc_expression (lambda_body_vector lbv,
1565                        tree type, VEC(tree,heap) *induction_vars,
1566                        gimple_seq *stmts_to_insert)
1567 {
1568   int k;
1569   tree resvar;
1570   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1571
1572   k = LBV_DENOMINATOR (lbv);
1573   gcc_assert (k != 0);
1574   if (k != 1)
1575     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1576
1577   resvar = create_tmp_var (type, "lbvtmp");
1578   add_referenced_var (resvar);
1579   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1580 }
1581
1582 /* Convert a linear expression from coefficient and constant form to a
1583    gcc tree.
1584    Return the tree that represents the final value of the expression.
1585    LLE is the linear expression to convert.
1586    OFFSET is the linear offset to apply to the expression.
1587    TYPE is the tree type to use for the variables and math.
1588    INDUCTION_VARS is a vector of induction variables for the loops.
1589    INVARIANTS is a vector of the loop nest invariants.
1590    WRAP specifies what tree code to wrap the results in, if there is more than
1591    one (it is either MAX_EXPR, or MIN_EXPR).
1592    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1593    statements that need to be inserted for the linear expression.  */
1594
1595 static tree
1596 lle_to_gcc_expression (lambda_linear_expression lle,
1597                        lambda_linear_expression offset,
1598                        tree type,
1599                        VEC(tree,heap) *induction_vars,
1600                        VEC(tree,heap) *invariants,
1601                        enum tree_code wrap, gimple_seq *stmts_to_insert)
1602 {
1603   int k;
1604   tree resvar;
1605   tree expr = NULL_TREE;
1606   VEC(tree,heap) *results = NULL;
1607
1608   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1609
1610   /* Build up the linear expressions.  */
1611   for (; lle != NULL; lle = LLE_NEXT (lle))
1612     {
1613       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1614       expr = fold_build2 (PLUS_EXPR, type, expr,
1615                           build_linear_expr (type,
1616                                              LLE_INVARIANT_COEFFICIENTS (lle),
1617                                              invariants));
1618
1619       k = LLE_CONSTANT (lle);
1620       if (k)
1621         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1622
1623       k = LLE_CONSTANT (offset);
1624       if (k)
1625         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1626
1627       k = LLE_DENOMINATOR (lle);
1628       if (k != 1)
1629         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1630                             type, expr, build_int_cst (type, k));
1631
1632       expr = fold (expr);
1633       VEC_safe_push (tree, heap, results, expr);
1634     }
1635
1636   gcc_assert (expr);
1637
1638   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1639   if (VEC_length (tree, results) > 1)
1640     {
1641       size_t i;
1642       tree op;
1643
1644       expr = VEC_index (tree, results, 0);
1645       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1646         expr = fold_build2 (wrap, type, expr, op);
1647     }
1648
1649   VEC_free (tree, heap, results);
1650
1651   resvar = create_tmp_var (type, "lletmp");
1652   add_referenced_var (resvar);
1653   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1654 }
1655
1656 /* Remove the induction variable defined at IV_STMT.  */
1657
1658 void
1659 remove_iv (gimple iv_stmt)
1660 {
1661   gimple_stmt_iterator si = gsi_for_stmt (iv_stmt);
1662
1663   if (gimple_code (iv_stmt) == GIMPLE_PHI)
1664     {
1665       unsigned i;
1666
1667       for (i = 0; i < gimple_phi_num_args (iv_stmt); i++)
1668         {
1669           gimple stmt;
1670           imm_use_iterator imm_iter;
1671           tree arg = gimple_phi_arg_def (iv_stmt, i);
1672           bool used = false;
1673
1674           if (TREE_CODE (arg) != SSA_NAME)
1675             continue;
1676
1677           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1678             if (stmt != iv_stmt && !is_gimple_debug (stmt))
1679               used = true;
1680
1681           if (!used)
1682             remove_iv (SSA_NAME_DEF_STMT (arg));
1683         }
1684
1685       remove_phi_node (&si, true);
1686     }
1687   else
1688     {
1689       gsi_remove (&si, true);
1690       release_defs (iv_stmt);
1691     }
1692 }
1693
1694 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1695    it, back into gcc code.  This changes the
1696    loops, their induction variables, and their bodies, so that they
1697    match the transformed loopnest.
1698    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1699    loopnest.
1700    OLD_IVS is a vector of induction variables from the old loopnest.
1701    INVARIANTS is a vector of loop invariants from the old loopnest.
1702    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1703    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1704    NEW_LOOPNEST.  */
1705
1706 void
1707 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1708                                  VEC(tree,heap) *old_ivs,
1709                                  VEC(tree,heap) *invariants,
1710                                  VEC(gimple,heap) **remove_ivs,
1711                                  lambda_loopnest new_loopnest,
1712                                  lambda_trans_matrix transform,
1713                                  struct obstack * lambda_obstack)
1714 {
1715   struct loop *temp;
1716   size_t i = 0;
1717   unsigned j;
1718   size_t depth = 0;
1719   VEC(tree,heap) *new_ivs = NULL;
1720   tree oldiv;
1721   gimple_stmt_iterator bsi;
1722
1723   transform = lambda_trans_matrix_inverse (transform, lambda_obstack);
1724
1725   if (dump_file)
1726     {
1727       fprintf (dump_file, "Inverse of transformation matrix:\n");
1728       print_lambda_trans_matrix (dump_file, transform);
1729     }
1730   depth = depth_of_nest (old_loopnest);
1731   temp = old_loopnest;
1732
1733   while (temp)
1734     {
1735       lambda_loop newloop;
1736       basic_block bb;
1737       edge exit;
1738       tree ivvar, ivvarinced;
1739       gimple exitcond;
1740       gimple_seq stmts;
1741       enum tree_code testtype;
1742       tree newupperbound, newlowerbound;
1743       lambda_linear_expression offset;
1744       tree type;
1745       bool insert_after;
1746       gimple inc_stmt;
1747
1748       oldiv = VEC_index (tree, old_ivs, i);
1749       type = TREE_TYPE (oldiv);
1750
1751       /* First, build the new induction variable temporary  */
1752
1753       ivvar = create_tmp_var (type, "lnivtmp");
1754       add_referenced_var (ivvar);
1755
1756       VEC_safe_push (tree, heap, new_ivs, ivvar);
1757
1758       newloop = LN_LOOPS (new_loopnest)[i];
1759
1760       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1761          cases for now.  */
1762       offset = LL_LINEAR_OFFSET (newloop);
1763
1764       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1765                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1766
1767       /* Now build the  new lower bounds, and insert the statements
1768          necessary to generate it on the loop preheader.  */
1769       stmts = NULL;
1770       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1771                                              LL_LINEAR_OFFSET (newloop),
1772                                              type,
1773                                              new_ivs,
1774                                              invariants, MAX_EXPR, &stmts);
1775
1776       if (stmts)
1777         {
1778           gsi_insert_seq_on_edge (loop_preheader_edge (temp), stmts);
1779           gsi_commit_edge_inserts ();
1780         }
1781       /* Build the new upper bound and insert its statements in the
1782          basic block of the exit condition */
1783       stmts = NULL;
1784       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1785                                              LL_LINEAR_OFFSET (newloop),
1786                                              type,
1787                                              new_ivs,
1788                                              invariants, MIN_EXPR, &stmts);
1789       exit = single_exit (temp);
1790       exitcond = get_loop_exit_condition (temp);
1791       bb = gimple_bb (exitcond);
1792       bsi = gsi_after_labels (bb);
1793       if (stmts)
1794         gsi_insert_seq_before (&bsi, stmts, GSI_NEW_STMT);
1795
1796       /* Create the new iv.  */
1797
1798       standard_iv_increment_position (temp, &bsi, &insert_after);
1799       create_iv (newlowerbound,
1800                  build_int_cst (type, LL_STEP (newloop)),
1801                  ivvar, temp, &bsi, insert_after, &ivvar,
1802                  NULL);
1803
1804       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1805          dominate the block containing the exit condition.
1806          So we simply create our own incremented iv to use in the new exit
1807          test,  and let redundancy elimination sort it out.  */
1808       inc_stmt = gimple_build_assign_with_ops (PLUS_EXPR, SSA_NAME_VAR (ivvar),
1809                                                ivvar,
1810                                                build_int_cst (type, LL_STEP (newloop)));
1811
1812       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1813       gimple_assign_set_lhs (inc_stmt, ivvarinced);
1814       bsi = gsi_for_stmt (exitcond);
1815       gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT);
1816
1817       /* Replace the exit condition with the new upper bound
1818          comparison.  */
1819
1820       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1821
1822       /* We want to build a conditional where true means exit the loop, and
1823          false means continue the loop.
1824          So swap the testtype if this isn't the way things are.*/
1825
1826       if (exit->flags & EDGE_FALSE_VALUE)
1827         testtype = swap_tree_comparison (testtype);
1828
1829       gimple_cond_set_condition (exitcond, testtype, newupperbound, ivvarinced);
1830       update_stmt (exitcond);
1831       VEC_replace (tree, new_ivs, i, ivvar);
1832
1833       i++;
1834       temp = temp->inner;
1835     }
1836
1837   /* Rewrite uses of the old ivs so that they are now specified in terms of
1838      the new ivs.  */
1839
1840   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1841     {
1842       imm_use_iterator imm_iter;
1843       use_operand_p use_p;
1844       tree oldiv_def;
1845       gimple oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1846       gimple stmt;
1847
1848       if (gimple_code (oldiv_stmt) == GIMPLE_PHI)
1849         oldiv_def = PHI_RESULT (oldiv_stmt);
1850       else
1851         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1852       gcc_assert (oldiv_def != NULL_TREE);
1853
1854       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1855         {
1856           tree newiv;
1857           gimple_seq stmts;
1858           lambda_body_vector lbv, newlbv;
1859
1860           if (is_gimple_debug (stmt))
1861             continue;
1862
1863           /* Compute the new expression for the induction
1864              variable.  */
1865           depth = VEC_length (tree, new_ivs);
1866           lbv = lambda_body_vector_new (depth, lambda_obstack);
1867           LBV_COEFFICIENTS (lbv)[i] = 1;
1868
1869           newlbv = lambda_body_vector_compute_new (transform, lbv,
1870                                                    lambda_obstack);
1871
1872           stmts = NULL;
1873           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1874                                          new_ivs, &stmts);
1875
1876           if (stmts && gimple_code (stmt) != GIMPLE_PHI)
1877             {
1878               bsi = gsi_for_stmt (stmt);
1879               gsi_insert_seq_before (&bsi, stmts, GSI_SAME_STMT);
1880             }
1881
1882           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1883             propagate_value (use_p, newiv);
1884
1885           if (stmts && gimple_code (stmt) == GIMPLE_PHI)
1886             for (j = 0; j < gimple_phi_num_args (stmt); j++)
1887               if (gimple_phi_arg_def (stmt, j) == newiv)
1888                 gsi_insert_seq_on_edge (gimple_phi_arg_edge (stmt, j), stmts);
1889
1890           update_stmt (stmt);
1891         }
1892
1893       /* Remove the now unused induction variable.  */
1894       VEC_safe_push (gimple, heap, *remove_ivs, oldiv_stmt);
1895     }
1896   VEC_free (tree, heap, new_ivs);
1897 }
1898
1899 /* Return TRUE if this is not interesting statement from the perspective of
1900    determining if we have a perfect loop nest.  */
1901
1902 static bool
1903 not_interesting_stmt (gimple stmt)
1904 {
1905   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1906      loop, we would have already failed the number of exits tests.  */
1907   if (gimple_code (stmt) == GIMPLE_LABEL
1908       || gimple_code (stmt) == GIMPLE_GOTO
1909       || gimple_code (stmt) == GIMPLE_COND
1910       || is_gimple_debug (stmt))
1911     return true;
1912   return false;
1913 }
1914
1915 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1916
1917 static bool
1918 phi_loop_edge_uses_def (struct loop *loop, gimple phi, tree def)
1919 {
1920   unsigned i;
1921   for (i = 0; i < gimple_phi_num_args (phi); i++)
1922     if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, i)->src))
1923       if (PHI_ARG_DEF (phi, i) == def)
1924         return true;
1925   return false;
1926 }
1927
1928 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1929
1930 static bool
1931 stmt_uses_phi_result (gimple stmt, tree phi_result)
1932 {
1933   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1934
1935   /* This is conservatively true, because we only want SIMPLE bumpers
1936      of the form x +- constant for our pass.  */
1937   return (use == phi_result);
1938 }
1939
1940 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1941    in-loop-edge in a phi node, and the operand it uses is the result of that
1942    phi node.
1943    I.E. i_29 = i_3 + 1
1944         i_3 = PHI (0, i_29);  */
1945
1946 static bool
1947 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt)
1948 {
1949   gimple use;
1950   tree def;
1951   imm_use_iterator iter;
1952   use_operand_p use_p;
1953
1954   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1955   if (!def)
1956     return false;
1957
1958   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1959     {
1960       use = USE_STMT (use_p);
1961       if (gimple_code (use) == GIMPLE_PHI)
1962         {
1963           if (phi_loop_edge_uses_def (loop, use, def))
1964             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1965               return true;
1966         }
1967     }
1968   return false;
1969 }
1970
1971
1972 /* Return true if LOOP is a perfect loop nest.
1973    Perfect loop nests are those loop nests where all code occurs in the
1974    innermost loop body.
1975    If S is a program statement, then
1976
1977    i.e.
1978    DO I = 1, 20
1979        S1
1980        DO J = 1, 20
1981        ...
1982        END DO
1983    END DO
1984    is not a perfect loop nest because of S1.
1985
1986    DO I = 1, 20
1987       DO J = 1, 20
1988         S1
1989         ...
1990       END DO
1991    END DO
1992    is a perfect loop nest.
1993
1994    Since we don't have high level loops anymore, we basically have to walk our
1995    statements and ignore those that are there because the loop needs them (IE
1996    the induction variable increment, and jump back to the top of the loop).  */
1997
1998 bool
1999 perfect_nest_p (struct loop *loop)
2000 {
2001   basic_block *bbs;
2002   size_t i;
2003   gimple exit_cond;
2004
2005   /* Loops at depth 0 are perfect nests.  */
2006   if (!loop->inner)
2007     return true;
2008
2009   bbs = get_loop_body (loop);
2010   exit_cond = get_loop_exit_condition (loop);
2011
2012   for (i = 0; i < loop->num_nodes; i++)
2013     {
2014       if (bbs[i]->loop_father == loop)
2015         {
2016           gimple_stmt_iterator bsi;
2017
2018           for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi); gsi_next (&bsi))
2019             {
2020               gimple stmt = gsi_stmt (bsi);
2021
2022               if (gimple_code (stmt) == GIMPLE_COND
2023                   && exit_cond != stmt)
2024                 goto non_perfectly_nested;
2025
2026               if (stmt == exit_cond
2027                   || not_interesting_stmt (stmt)
2028                   || stmt_is_bumper_for_loop (loop, stmt))
2029                 continue;
2030
2031             non_perfectly_nested:
2032               free (bbs);
2033               return false;
2034             }
2035         }
2036     }
2037
2038   free (bbs);
2039
2040   return perfect_nest_p (loop->inner);
2041 }
2042
2043 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2044    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2045    avoid creating duplicate temporaries and FIRSTBSI is statement
2046    iterator where new temporaries should be inserted at the beginning
2047    of body basic block.  */
2048
2049 static void
2050 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x,
2051                                 int xstep, tree y, tree yinit,
2052                                 htab_t replacements,
2053                                 gimple_stmt_iterator *firstbsi)
2054 {
2055   ssa_op_iter iter;
2056   use_operand_p use_p;
2057
2058   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2059     {
2060       tree use = USE_FROM_PTR (use_p);
2061       tree step = NULL_TREE;
2062       tree scev, init, val, var;
2063       gimple setstmt;
2064       struct tree_map *h, in;
2065       void **loc;
2066
2067       /* Replace uses of X with Y right away.  */
2068       if (use == x)
2069         {
2070           SET_USE (use_p, y);
2071           continue;
2072         }
2073
2074       scev = instantiate_parameters (loop,
2075                                      analyze_scalar_evolution (loop, use));
2076
2077       if (scev == NULL || scev == chrec_dont_know)
2078         continue;
2079
2080       step = evolution_part_in_loop_num (scev, loop->num);
2081       if (step == NULL
2082           || step == chrec_dont_know
2083           || TREE_CODE (step) != INTEGER_CST
2084           || int_cst_value (step) != xstep)
2085         continue;
2086
2087       /* Use REPLACEMENTS hash table to cache already created
2088          temporaries.  */
2089       in.hash = htab_hash_pointer (use);
2090       in.base.from = use;
2091       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2092       if (h != NULL)
2093         {
2094           SET_USE (use_p, h->to);
2095           continue;
2096         }
2097
2098       /* USE which has the same step as X should be replaced
2099          with a temporary set to Y + YINIT - INIT.  */
2100       init = initial_condition_in_loop_num (scev, loop->num);
2101       gcc_assert (init != NULL && init != chrec_dont_know);
2102       if (TREE_TYPE (use) == TREE_TYPE (y))
2103         {
2104           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2105           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2106           if (val == y)
2107             {
2108               /* If X has the same type as USE, the same step
2109                  and same initial value, it can be replaced by Y.  */
2110               SET_USE (use_p, y);
2111               continue;
2112             }
2113         }
2114       else
2115         {
2116           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2117           val = fold_convert (TREE_TYPE (use), val);
2118           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2119         }
2120
2121       /* Create a temporary variable and insert it at the beginning
2122          of the loop body basic block, right after the PHI node
2123          which sets Y.  */
2124       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2125       add_referenced_var (var);
2126       val = force_gimple_operand_gsi (firstbsi, val, false, NULL,
2127                                       true, GSI_SAME_STMT);
2128       setstmt = gimple_build_assign (var, val);
2129       var = make_ssa_name (var, setstmt);
2130       gimple_assign_set_lhs (setstmt, var);
2131       gsi_insert_before (firstbsi, setstmt, GSI_SAME_STMT);
2132       update_stmt (setstmt);
2133       SET_USE (use_p, var);
2134       h = GGC_NEW (struct tree_map);
2135       h->hash = in.hash;
2136       h->base.from = use;
2137       h->to = var;
2138       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2139       gcc_assert ((*(struct tree_map **)loc) == NULL);
2140       *(struct tree_map **) loc = h;
2141     }
2142 }
2143
2144 /* Return true if STMT is an exit PHI for LOOP */
2145
2146 static bool
2147 exit_phi_for_loop_p (struct loop *loop, gimple stmt)
2148 {
2149   if (gimple_code (stmt) != GIMPLE_PHI
2150       || gimple_phi_num_args (stmt) != 1
2151       || gimple_bb (stmt) != single_exit (loop)->dest)
2152     return false;
2153
2154   return true;
2155 }
2156
2157 /* Return true if STMT can be put back into the loop INNER, by
2158    copying it to the beginning of that loop and changing the uses.  */
2159
2160 static bool
2161 can_put_in_inner_loop (struct loop *inner, gimple stmt)
2162 {
2163   imm_use_iterator imm_iter;
2164   use_operand_p use_p;
2165
2166   gcc_assert (is_gimple_assign (stmt));
2167   if (gimple_vuse (stmt)
2168       || !stmt_invariant_in_loop_p (inner, stmt))
2169     return false;
2170
2171   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2172     {
2173       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2174         {
2175           basic_block immbb = gimple_bb (USE_STMT (use_p));
2176
2177           if (!flow_bb_inside_loop_p (inner, immbb))
2178             return false;
2179         }
2180     }
2181   return true;
2182 }
2183
2184 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2185
2186 static bool
2187 can_put_after_inner_loop (struct loop *loop, gimple stmt)
2188 {
2189   imm_use_iterator imm_iter;
2190   use_operand_p use_p;
2191
2192   if (gimple_vuse (stmt))
2193     return false;
2194
2195   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2196     {
2197       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2198         {
2199           basic_block immbb = gimple_bb (USE_STMT (use_p));
2200
2201           if (!dominated_by_p (CDI_DOMINATORS,
2202                                immbb,
2203                                loop->inner->header)
2204               && !can_put_in_inner_loop (loop->inner, stmt))
2205             return false;
2206         }
2207     }
2208   return true;
2209 }
2210
2211 /* Return true when the induction variable IV is simple enough to be
2212    re-synthesized.  */
2213
2214 static bool
2215 can_duplicate_iv (tree iv, struct loop *loop)
2216 {
2217   tree scev = instantiate_parameters
2218     (loop, analyze_scalar_evolution (loop, iv));
2219
2220   if (!automatically_generated_chrec_p (scev))
2221     {
2222       tree step = evolution_part_in_loop_num (scev, loop->num);
2223
2224       if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2225         return true;
2226     }
2227
2228   return false;
2229 }
2230
2231 /* If this is a scalar operation that can be put back into the inner
2232    loop, or after the inner loop, through copying, then do so. This
2233    works on the theory that any amount of scalar code we have to
2234    reduplicate into or after the loops is less expensive that the win
2235    we get from rearranging the memory walk the loop is doing so that
2236    it has better cache behavior.  */
2237
2238 static bool
2239 cannot_convert_modify_to_perfect_nest (gimple stmt, struct loop *loop)
2240 {
2241   use_operand_p use_a, use_b;
2242   imm_use_iterator imm_iter;
2243   ssa_op_iter op_iter, op_iter1;
2244   tree op0 = gimple_assign_lhs (stmt);
2245
2246   /* The statement should not define a variable used in the inner
2247      loop.  */
2248   if (TREE_CODE (op0) == SSA_NAME
2249       && !can_duplicate_iv (op0, loop))
2250     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2251       if (gimple_bb (USE_STMT (use_a))->loop_father == loop->inner)
2252         return true;
2253
2254   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2255     {
2256       gimple node;
2257       tree op = USE_FROM_PTR (use_a);
2258
2259       /* The variables should not be used in both loops.  */
2260       if (!can_duplicate_iv (op, loop))
2261         FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2262           if (gimple_bb (USE_STMT (use_b))->loop_father == loop->inner)
2263             return true;
2264
2265       /* The statement should not use the value of a scalar that was
2266          modified in the loop.  */
2267       node = SSA_NAME_DEF_STMT (op);
2268       if (gimple_code (node) == GIMPLE_PHI)
2269         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2270           {
2271             tree arg = USE_FROM_PTR (use_b);
2272
2273             if (TREE_CODE (arg) == SSA_NAME)
2274               {
2275                 gimple arg_stmt = SSA_NAME_DEF_STMT (arg);
2276
2277                 if (gimple_bb (arg_stmt)
2278                     && (gimple_bb (arg_stmt)->loop_father == loop->inner))
2279                   return true;
2280               }
2281           }
2282     }
2283
2284   return false;
2285 }
2286 /* Return true when BB contains statements that can harm the transform
2287    to a perfect loop nest.  */
2288
2289 static bool
2290 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2291 {
2292   gimple_stmt_iterator bsi;
2293   gimple exit_condition = get_loop_exit_condition (loop);
2294
2295   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2296     {
2297       gimple stmt = gsi_stmt (bsi);
2298
2299       if (stmt == exit_condition
2300           || not_interesting_stmt (stmt)
2301           || stmt_is_bumper_for_loop (loop, stmt))
2302         continue;
2303
2304       if (is_gimple_assign (stmt))
2305         {
2306           if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2307             return true;
2308
2309           if (can_duplicate_iv (gimple_assign_lhs (stmt), loop))
2310             continue;
2311
2312           if (can_put_in_inner_loop (loop->inner, stmt)
2313               || can_put_after_inner_loop (loop, stmt))
2314             continue;
2315         }
2316
2317       /* If the bb of a statement we care about isn't dominated by the
2318          header of the inner loop, then we can't handle this case
2319          right now.  This test ensures that the statement comes
2320          completely *after* the inner loop.  */
2321       if (!dominated_by_p (CDI_DOMINATORS,
2322                            gimple_bb (stmt),
2323                            loop->inner->header))
2324         return true;
2325     }
2326
2327   return false;
2328 }
2329
2330
2331 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2332    perfect one.  At the moment, we only handle imperfect nests of
2333    depth 2, where all of the statements occur after the inner loop.  */
2334
2335 static bool
2336 can_convert_to_perfect_nest (struct loop *loop)
2337 {
2338   basic_block *bbs;
2339   size_t i;
2340   gimple_stmt_iterator si;
2341
2342   /* Can't handle triply nested+ loops yet.  */
2343   if (!loop->inner || loop->inner->inner)
2344     return false;
2345
2346   bbs = get_loop_body (loop);
2347   for (i = 0; i < loop->num_nodes; i++)
2348     if (bbs[i]->loop_father == loop
2349         && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2350       goto fail;
2351
2352   /* We also need to make sure the loop exit only has simple copy phis in it,
2353      otherwise we don't know how to transform it into a perfect nest.  */
2354   for (si = gsi_start_phis (single_exit (loop)->dest);
2355        !gsi_end_p (si);
2356        gsi_next (&si))
2357     if (gimple_phi_num_args (gsi_stmt (si)) != 1)
2358       goto fail;
2359
2360   free (bbs);
2361   return true;
2362
2363  fail:
2364   free (bbs);
2365   return false;
2366 }
2367
2368
2369 DEF_VEC_I(source_location);
2370 DEF_VEC_ALLOC_I(source_location,heap);
2371
2372 /* Transform the loop nest into a perfect nest, if possible.
2373    LOOP is the loop nest to transform into a perfect nest
2374    LBOUNDS are the lower bounds for the loops to transform
2375    UBOUNDS are the upper bounds for the loops to transform
2376    STEPS is the STEPS for the loops to transform.
2377    LOOPIVS is the induction variables for the loops to transform.
2378
2379    Basically, for the case of
2380
2381    FOR (i = 0; i < 50; i++)
2382     {
2383      FOR (j =0; j < 50; j++)
2384      {
2385         <whatever>
2386      }
2387      <some code>
2388     }
2389
2390    This function will transform it into a perfect loop nest by splitting the
2391    outer loop into two loops, like so:
2392
2393    FOR (i = 0; i < 50; i++)
2394    {
2395      FOR (j = 0; j < 50; j++)
2396      {
2397          <whatever>
2398      }
2399    }
2400
2401    FOR (i = 0; i < 50; i ++)
2402    {
2403     <some code>
2404    }
2405
2406    Return FALSE if we can't make this loop into a perfect nest.  */
2407
2408 static bool
2409 perfect_nestify (struct loop *loop,
2410                  VEC(tree,heap) *lbounds,
2411                  VEC(tree,heap) *ubounds,
2412                  VEC(int,heap) *steps,
2413                  VEC(tree,heap) *loopivs)
2414 {
2415   basic_block *bbs;
2416   gimple exit_condition;
2417   gimple cond_stmt;
2418   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2419   int i;
2420   gimple_stmt_iterator bsi, firstbsi;
2421   bool insert_after;
2422   edge e;
2423   struct loop *newloop;
2424   gimple phi;
2425   tree uboundvar;
2426   gimple stmt;
2427   tree oldivvar, ivvar, ivvarinced;
2428   VEC(tree,heap) *phis = NULL;
2429   VEC(source_location,heap) *locations = NULL;
2430   htab_t replacements = NULL;
2431
2432   /* Create the new loop.  */
2433   olddest = single_exit (loop)->dest;
2434   preheaderbb = split_edge (single_exit (loop));
2435   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2436
2437   /* Push the exit phi nodes that we are moving.  */
2438   for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi))
2439     {
2440       phi = gsi_stmt (bsi);
2441       VEC_reserve (tree, heap, phis, 2);
2442       VEC_reserve (source_location, heap, locations, 1);
2443       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2444       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2445       VEC_quick_push (source_location, locations,
2446                       gimple_phi_arg_location (phi, 0));
2447     }
2448   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2449
2450   /* Remove the exit phis from the old basic block.  */
2451   for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); )
2452     remove_phi_node (&bsi, false);
2453
2454   /* and add them back to the new basic block.  */
2455   while (VEC_length (tree, phis) != 0)
2456     {
2457       tree def;
2458       tree phiname;
2459       source_location locus;
2460       def = VEC_pop (tree, phis);
2461       phiname = VEC_pop (tree, phis);
2462       locus = VEC_pop (source_location, locations);
2463       phi = create_phi_node (phiname, preheaderbb);
2464       add_phi_arg (phi, def, single_pred_edge (preheaderbb), locus);
2465     }
2466   flush_pending_stmts (e);
2467   VEC_free (tree, heap, phis);
2468
2469   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2470   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2471   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2472   cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node,
2473                                  NULL_TREE, NULL_TREE);
2474   bsi = gsi_start_bb (bodybb);
2475   gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT);
2476   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2477   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2478   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2479
2480   /* Update the loop structures.  */
2481   newloop = duplicate_loop (loop, olddest->loop_father);
2482   newloop->header = headerbb;
2483   newloop->latch = latchbb;
2484   add_bb_to_loop (latchbb, newloop);
2485   add_bb_to_loop (bodybb, newloop);
2486   add_bb_to_loop (headerbb, newloop);
2487   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2488   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2489   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2490                            single_exit (loop)->src);
2491   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2492   set_immediate_dominator (CDI_DOMINATORS, olddest,
2493                            recompute_dominator (CDI_DOMINATORS, olddest));
2494   /* Create the new iv.  */
2495   oldivvar = VEC_index (tree, loopivs, 0);
2496   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2497   add_referenced_var (ivvar);
2498   standard_iv_increment_position (newloop, &bsi, &insert_after);
2499   create_iv (VEC_index (tree, lbounds, 0),
2500              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2501              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2502
2503   /* Create the new upper bound.  This may be not just a variable, so we copy
2504      it to one just in case.  */
2505
2506   exit_condition = get_loop_exit_condition (newloop);
2507   uboundvar = create_tmp_var (TREE_TYPE (VEC_index (tree, ubounds, 0)),
2508                               "uboundvar");
2509   add_referenced_var (uboundvar);
2510   stmt = gimple_build_assign (uboundvar, VEC_index (tree, ubounds, 0));
2511   uboundvar = make_ssa_name (uboundvar, stmt);
2512   gimple_assign_set_lhs (stmt, uboundvar);
2513
2514   if (insert_after)
2515     gsi_insert_after (&bsi, stmt, GSI_SAME_STMT);
2516   else
2517     gsi_insert_before (&bsi, stmt, GSI_SAME_STMT);
2518   update_stmt (stmt);
2519   gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced);
2520   update_stmt (exit_condition);
2521   replacements = htab_create_ggc (20, tree_map_hash,
2522                                   tree_map_eq, NULL);
2523   bbs = get_loop_body_in_dom_order (loop);
2524   /* Now move the statements, and replace the induction variable in the moved
2525      statements with the correct loop induction variable.  */
2526   oldivvar = VEC_index (tree, loopivs, 0);
2527   firstbsi = gsi_start_bb (bodybb);
2528   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2529     {
2530       gimple_stmt_iterator tobsi = gsi_last_bb (bodybb);
2531       if (bbs[i]->loop_father == loop)
2532         {
2533           /* If this is true, we are *before* the inner loop.
2534              If this isn't true, we are *after* it.
2535
2536              The only time can_convert_to_perfect_nest returns true when we
2537              have statements before the inner loop is if they can be moved
2538              into the inner loop.
2539
2540              The only time can_convert_to_perfect_nest returns true when we
2541              have statements after the inner loop is if they can be moved into
2542              the new split loop.  */
2543
2544           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2545             {
2546               gimple_stmt_iterator header_bsi
2547                 = gsi_after_labels (loop->inner->header);
2548
2549               for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2550                 {
2551                   gimple stmt = gsi_stmt (bsi);
2552
2553                   if (stmt == exit_condition
2554                       || not_interesting_stmt (stmt)
2555                       || stmt_is_bumper_for_loop (loop, stmt))
2556                     {
2557                       gsi_next (&bsi);
2558                       continue;
2559                     }
2560
2561                   gsi_move_before (&bsi, &header_bsi);
2562                 }
2563             }
2564           else
2565             {
2566               /* Note that the bsi only needs to be explicitly incremented
2567                  when we don't move something, since it is automatically
2568                  incremented when we do.  */
2569               for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2570                 {
2571                   gimple stmt = gsi_stmt (bsi);
2572
2573                   if (stmt == exit_condition
2574                       || not_interesting_stmt (stmt)
2575                       || stmt_is_bumper_for_loop (loop, stmt))
2576                     {
2577                       gsi_next (&bsi);
2578                       continue;
2579                     }
2580
2581                   replace_uses_equiv_to_x_with_y
2582                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2583                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2584
2585                   gsi_move_before (&bsi, &tobsi);
2586
2587                   /* If the statement has any virtual operands, they may
2588                      need to be rewired because the original loop may
2589                      still reference them.  */
2590                   if (gimple_vuse (stmt))
2591                     mark_sym_for_renaming (gimple_vop (cfun));
2592                 }
2593             }
2594
2595         }
2596     }
2597
2598   free (bbs);
2599   htab_delete (replacements);
2600   return perfect_nest_p (loop);
2601 }
2602
2603 /* Return true if TRANS is a legal transformation matrix that respects
2604    the dependence vectors in DISTS and DIRS.  The conservative answer
2605    is false.
2606
2607    "Wolfe proves that a unimodular transformation represented by the
2608    matrix T is legal when applied to a loop nest with a set of
2609    lexicographically non-negative distance vectors RDG if and only if
2610    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2611    i.e.: if and only if it transforms the lexicographically positive
2612    distance vectors to lexicographically positive vectors.  Note that
2613    a unimodular matrix must transform the zero vector (and only it) to
2614    the zero vector." S.Muchnick.  */
2615
2616 bool
2617 lambda_transform_legal_p (lambda_trans_matrix trans,
2618                           int nb_loops,
2619                           VEC (ddr_p, heap) *dependence_relations)
2620 {
2621   unsigned int i, j;
2622   lambda_vector distres;
2623   struct data_dependence_relation *ddr;
2624
2625   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2626               && LTM_ROWSIZE (trans) == nb_loops);
2627
2628   /* When there are no dependences, the transformation is correct.  */
2629   if (VEC_length (ddr_p, dependence_relations) == 0)
2630     return true;
2631
2632   ddr = VEC_index (ddr_p, dependence_relations, 0);
2633   if (ddr == NULL)
2634     return true;
2635
2636   /* When there is an unknown relation in the dependence_relations, we
2637      know that it is no worth looking at this loop nest: give up.  */
2638   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2639     return false;
2640
2641   distres = lambda_vector_new (nb_loops);
2642
2643   /* For each distance vector in the dependence graph.  */
2644   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2645     {
2646       /* Don't care about relations for which we know that there is no
2647          dependence, nor about read-read (aka. output-dependences):
2648          these data accesses can happen in any order.  */
2649       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2650           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2651         continue;
2652
2653       /* Conservatively answer: "this transformation is not valid".  */
2654       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2655         return false;
2656
2657       /* If the dependence could not be captured by a distance vector,
2658          conservatively answer that the transform is not valid.  */
2659       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2660         return false;
2661
2662       /* Compute trans.dist_vect */
2663       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2664         {
2665           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2666                                      DDR_DIST_VECT (ddr, j), distres);
2667
2668           if (!lambda_vector_lexico_pos (distres, nb_loops))
2669             return false;
2670         }
2671     }
2672   return true;
2673 }
2674
2675
2676 /* Collects parameters from affine function ACCESS_FUNCTION, and push
2677    them in PARAMETERS.  */
2678
2679 static void
2680 lambda_collect_parameters_from_af (tree access_function,
2681                                    struct pointer_set_t *param_set,
2682                                    VEC (tree, heap) **parameters)
2683 {
2684   if (access_function == NULL)
2685     return;
2686
2687   if (TREE_CODE (access_function) == SSA_NAME
2688       && pointer_set_contains (param_set, access_function) == 0)
2689     {
2690       pointer_set_insert (param_set, access_function);
2691       VEC_safe_push (tree, heap, *parameters, access_function);
2692     }
2693   else
2694     {
2695       int i, num_operands = tree_operand_length (access_function);
2696
2697       for (i = 0; i < num_operands; i++)
2698         lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i),
2699                                            param_set, parameters);
2700     }
2701 }
2702
2703 /* Collects parameters from DATAREFS, and push them in PARAMETERS.  */
2704
2705 void
2706 lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs,
2707                            VEC (tree, heap) **parameters)
2708 {
2709   unsigned i, j;
2710   struct pointer_set_t *parameter_set = pointer_set_create ();
2711   data_reference_p data_reference;
2712
2713   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, data_reference); i++)
2714     for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++)
2715       lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j),
2716                                          parameter_set, parameters);
2717   pointer_set_destroy (parameter_set);
2718 }
2719
2720 /* Translates BASE_EXPR to vector CY.  AM is needed for inferring
2721    indexing positions in the data access vector.  CST is the analyzed
2722    integer constant.  */
2723
2724 static bool
2725 av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am,
2726                 int cst)
2727 {
2728   bool result = true;
2729
2730   switch (TREE_CODE (base_expr))
2731     {
2732     case INTEGER_CST:
2733       /* Constant part.  */
2734       cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst;
2735       return true;
2736
2737     case SSA_NAME:
2738       {
2739         int param_index =
2740           access_matrix_get_index_for_parameter (base_expr, am);
2741
2742         if (param_index >= 0)
2743           {
2744             cy[param_index] = cst + cy[param_index];
2745             return true;
2746           }
2747
2748         return false;
2749       }
2750
2751     case PLUS_EXPR:
2752       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2753         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst);
2754
2755     case MINUS_EXPR:
2756       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2757         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2758
2759     case MULT_EXPR:
2760       if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2761         result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2762                                  cy, am, cst *
2763                                  int_cst_value (TREE_OPERAND (base_expr, 0)));
2764       else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2765         result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2766                                  cy, am, cst *
2767                                  int_cst_value (TREE_OPERAND (base_expr, 1)));
2768       else
2769         result = false;
2770
2771       return result;
2772
2773     case NEGATE_EXPR:
2774       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst);
2775
2776     default:
2777       return false;
2778     }
2779
2780   return result;
2781 }
2782
2783 /* Translates ACCESS_FUN to vector CY.  AM is needed for inferring
2784    indexing positions in the data access vector.  */
2785
2786 static bool
2787 av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am)
2788 {
2789   switch (TREE_CODE (access_fun))
2790     {
2791     case POLYNOMIAL_CHREC:
2792       {
2793         tree left = CHREC_LEFT (access_fun);
2794         tree right = CHREC_RIGHT (access_fun);
2795         unsigned var;
2796
2797         if (TREE_CODE (right) != INTEGER_CST)
2798           return false;
2799
2800         var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun));
2801         cy[var] = int_cst_value (right);
2802
2803         if (TREE_CODE (left) == POLYNOMIAL_CHREC)
2804           return av_for_af (left, cy, am);
2805         else
2806           return av_for_af_base (left, cy, am, 1);
2807       }
2808
2809     case INTEGER_CST:
2810       /* Constant part.  */
2811       return av_for_af_base (access_fun, cy, am, 1);
2812
2813     default:
2814       return false;
2815     }
2816 }
2817
2818 /* Initializes the access matrix for DATA_REFERENCE.  */
2819
2820 static bool
2821 build_access_matrix (data_reference_p data_reference,
2822                      VEC (tree, heap) *parameters,
2823                      VEC (loop_p, heap) *nest,
2824                      struct obstack * lambda_obstack)
2825 {
2826   struct access_matrix *am = (struct access_matrix *)
2827     obstack_alloc(lambda_obstack, sizeof (struct access_matrix));
2828   unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference);
2829   unsigned nivs = VEC_length (loop_p, nest);
2830   unsigned lambda_nb_columns;
2831
2832   AM_LOOP_NEST (am) = nest;
2833   AM_NB_INDUCTION_VARS (am) = nivs;
2834   AM_PARAMETERS (am) = parameters;
2835
2836   lambda_nb_columns = AM_NB_COLUMNS (am);
2837   AM_MATRIX (am) = VEC_alloc (lambda_vector, gc, ndim);
2838
2839   for (i = 0; i < ndim; i++)
2840     {
2841       lambda_vector access_vector = lambda_vector_new (lambda_nb_columns);
2842       tree access_function = DR_ACCESS_FN (data_reference, i);
2843
2844       if (!av_for_af (access_function, access_vector, am))
2845         return false;
2846
2847       VEC_quick_push (lambda_vector, AM_MATRIX (am), access_vector);
2848     }
2849
2850   DR_ACCESS_MATRIX (data_reference) = am;
2851   return true;
2852 }
2853
2854 /* Returns false when one of the access matrices cannot be built.  */
2855
2856 bool
2857 lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs,
2858                                 VEC (tree, heap) *parameters,
2859                                 VEC (loop_p, heap) *nest,
2860                                 struct obstack * lambda_obstack)
2861 {
2862   data_reference_p dataref;
2863   unsigned ix;
2864
2865   for (ix = 0; VEC_iterate (data_reference_p, datarefs, ix, dataref); ix++)
2866     if (!build_access_matrix (dataref, parameters, nest, lambda_obstack))
2867       return false;
2868
2869   return true;
2870 }