gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   3     Free Software Foundation, Inc.
   4     Contributed by Daniel Berlin <dberlin@dberlin.org>
   5
   6     This file is part of GCC.
   7
   8     GCC is free software; you can redistribute it and/or modify it under
   9     the terms of the GNU General Public License as published by the Free
  10     Software Foundation; either version 3, or (at your option) any later
  11     version.
  12
  13     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16     for more details.
  17
  18     You should have received a copy of the GNU General Public License
  19     along with GCC; see the file COPYING3.  If not see
  20     <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tree-flow.h"
  26 #include "cfgloop.h"
  27 #include "tree-chrec.h"
  28 #include "tree-data-ref.h"
  29 #include "tree-scalar-evolution.h"
  30 #include "lambda.h"
  31 #include "tree-pass.h"
  32
  33 /* This loop nest code generation is based on non-singular matrix
  34    math.
  35
  36  A little terminology and a general sketch of the algorithm.  See "A singular
  37  loop transformation framework based on non-singular matrices" by Wei Li and
  38  Keshav Pingali for formal proofs that the various statements below are
  39  correct.
  40
  41  A loop iteration space represents the points traversed by the loop.  A point in the
  42  iteration space can be represented by a vector of size <loop depth>.  You can
  43  therefore represent the iteration space as an integral combinations of a set
  44  of basis vectors.
  45
  46  A loop iteration space is dense if every integer point between the loop
  47  bounds is a point in the iteration space.  Every loop with a step of 1
  48  therefore has a dense iteration space.
  49
  50  for i = 1 to 3, step 1 is a dense iteration space.
  51
  52  A loop iteration space is sparse if it is not dense.  That is, the iteration
  53  space skips integer points that are within the loop bounds.
  54
  55  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  56  2 is skipped.
  57
  58  Dense source spaces are easy to transform, because they don't skip any
  59  points to begin with.  Thus we can compute the exact bounds of the target
  60  space using min/max and floor/ceil.
  61
  62  For a dense source space, we take the transformation matrix, decompose it
  63  into a lower triangular part (H) and a unimodular part (U).
  64  We then compute the auxiliary space from the unimodular part (source loop
  65  nest . U = auxiliary space) , which has two important properties:
  66   1. It traverses the iterations in the same lexicographic order as the source
  67   space.
  68   2. It is a dense space when the source is a dense space (even if the target
  69   space is going to be sparse).
  70
  71  Given the auxiliary space, we use the lower triangular part to compute the
  72  bounds in the target space by simple matrix multiplication.
  73  The gaps in the target space (IE the new loop step sizes) will be the
  74  diagonals of the H matrix.
  75
  76  Sparse source spaces require another step, because you can't directly compute
  77  the exact bounds of the auxiliary and target space from the sparse space.
  78  Rather than try to come up with a separate algorithm to handle sparse source
  79  spaces directly, we just find a legal transformation matrix that gives you
  80  the sparse source space, from a dense space, and then transform the dense
  81  space.
  82
  83  For a regular sparse space, you can represent the source space as an integer
  84  lattice, and the base space of that lattice will always be dense.  Thus, we
  85  effectively use the lattice to figure out the transformation from the lattice
  86  base space, to the sparse iteration space (IE what transform was applied to
  87  the dense space to make it sparse).  We then compose this transform with the
  88  transformation matrix specified by the user (since our matrix transformations
  89  are closed under composition, this is okay).  We can then use the base space
  90  (which is dense) plus the composed transformation matrix, to compute the rest
  91  of the transform using the dense space algorithm above.
  92
  93  In other words, our sparse source space (B) is decomposed into a dense base
  94  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
  95  We then compute the composition of L and the user transformation matrix (T),
  96  so that T is now a transform from A to the result, instead of from B to the
  97  result.
  98  IE A.(LT) = result instead of B.T = result
  99  Since A is now a dense source space, we can use the dense source space
 100  algorithm above to compute the result of applying transform (LT) to A.
 101
 102  Fourier-Motzkin elimination is used to compute the bounds of the base space
 103  of the lattice.  */
 104
 105 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 106                              VEC(tree,heap) *, VEC(int,heap) *,
 107                              VEC(tree,heap) *);
 108 /* Lattice stuff that is internal to the code generation algorithm.  */
 109
 110 typedef struct lambda_lattice_s
 111 {
 112   /* Lattice base matrix.  */
 113   lambda_matrix base;
 114   /* Lattice dimension.  */
 115   int dimension;
 116   /* Origin vector for the coefficients.  */
 117   lambda_vector origin;
 118   /* Origin matrix for the invariants.  */
 119   lambda_matrix origin_invariants;
 120   /* Number of invariants.  */
 121   int invariants;
 122 } *lambda_lattice;
 123
 124 #define LATTICE_BASE(T) ((T)->base)
 125 #define LATTICE_DIMENSION(T) ((T)->dimension)
 126 #define LATTICE_ORIGIN(T) ((T)->origin)
 127 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 128 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 129
 130 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 131                        int, int);
 132 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 133 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 134                                                    struct obstack *);
 135
 136 static bool can_convert_to_perfect_nest (struct loop *);
 137
 138 /* Create a new lambda loop in LAMBDA_OBSTACK.  */
 139
 140 static lambda_loop
 141 lambda_loop_new (struct obstack * lambda_obstack)
 142 {
 143   lambda_loop result = (lambda_loop)
 144     obstack_alloc (lambda_obstack, sizeof (struct lambda_loop_s));
 145   memset (result, 0, sizeof (struct lambda_loop_s));
 146   return result;
 147 }
 148
 149 /* Create a new lambda body vector.  */
 150
 151 lambda_body_vector
 152 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 153 {
 154   lambda_body_vector ret;
 155
 156   ret = (lambda_body_vector) obstack_alloc (lambda_obstack,
 157                                             sizeof (*ret));
 158   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 159   LBV_SIZE (ret) = size;
 160   LBV_DENOMINATOR (ret) = 1;
 161   return ret;
 162 }
 163
 164 /* Compute the new coefficients for the vector based on the
 165   *inverse* of the transformation matrix.  */
 166
 167 lambda_body_vector
 168 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 169                                 lambda_body_vector vect,
 170                                 struct obstack * lambda_obstack)
 171 {
 172   lambda_body_vector temp;
 173   int depth;
 174
 175   /* Make sure the matrix is square.  */
 176   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 177
 178   depth = LTM_ROWSIZE (transform);
 179
 180   temp = lambda_body_vector_new (depth, lambda_obstack);
 181   LBV_DENOMINATOR (temp) =
 182     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 183   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 184                              LTM_MATRIX (transform), depth,
 185                              LBV_COEFFICIENTS (temp));
 186   LBV_SIZE (temp) = LBV_SIZE (vect);
 187   return temp;
 188 }
 189
 190 /* Print out a lambda body vector.  */
 191
 192 void
 193 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 194 {
 195   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 196 }
 197
 198 /* Return TRUE if two linear expressions are equal.  */
 199
 200 static bool
 201 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 202            int depth, int invariants)
 203 {
 204   int i;
 205
 206   if (lle1 == NULL || lle2 == NULL)
 207     return false;
 208   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 209     return false;
 210   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 211     return false;
 212   for (i = 0; i < depth; i++)
 213     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 214       return false;
 215   for (i = 0; i < invariants; i++)
 216     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 217         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 218       return false;
 219   return true;
 220 }
 221
 222 /* Create a new linear expression with dimension DIM, and total number
 223    of invariants INVARIANTS.  */
 224
 225 lambda_linear_expression
 226 lambda_linear_expression_new (int dim, int invariants,
 227                               struct obstack * lambda_obstack)
 228 {
 229   lambda_linear_expression ret;
 230
 231   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 232                                                  sizeof (*ret));
 233   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 234   LLE_CONSTANT (ret) = 0;
 235   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 236   LLE_DENOMINATOR (ret) = 1;
 237   LLE_NEXT (ret) = NULL;
 238
 239   return ret;
 240 }
 241
 242 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 243    The starting letter used for variable names is START.  */
 244
 245 static void
 246 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 247                          char start)
 248 {
 249   int i;
 250   bool first = true;
 251   for (i = 0; i < size; i++)
 252     {
 253       if (expr[i] != 0)
 254         {
 255           if (first)
 256             {
 257               if (expr[i] < 0)
 258                 fprintf (outfile, "-");
 259               first = false;
 260             }
 261           else if (expr[i] > 0)
 262             fprintf (outfile, " + ");
 263           else
 264             fprintf (outfile, " - ");
 265           if (abs (expr[i]) == 1)
 266             fprintf (outfile, "%c", start + i);
 267           else
 268             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 269         }
 270     }
 271 }
 272
 273 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 274    depth/number of coefficients is given by DEPTH, the number of invariants is
 275    given by INVARIANTS, and the character to start variable names with is given
 276    by START.  */
 277
 278 void
 279 print_lambda_linear_expression (FILE * outfile,
 280                                 lambda_linear_expression expr,
 281                                 int depth, int invariants, char start)
 282 {
 283   fprintf (outfile, "\tLinear expression: ");
 284   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 285   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 286   fprintf (outfile, "  invariants: ");
 287   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 288                            invariants, 'A');
 289   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 290 }
 291
 292 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 293    coefficients is given by DEPTH, the number of invariants is
 294    given by INVARIANTS, and the character to start variable names with is given
 295    by START.  */
 296
 297 void
 298 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 299                    int invariants, char start)
 300 {
 301   int step;
 302   lambda_linear_expression expr;
 303
 304   gcc_assert (loop);
 305
 306   expr = LL_LINEAR_OFFSET (loop);
 307   step = LL_STEP (loop);
 308   fprintf (outfile, "  step size = %d \n", step);
 309
 310   if (expr)
 311     {
 312       fprintf (outfile, "  linear offset: \n");
 313       print_lambda_linear_expression (outfile, expr, depth, invariants,
 314                                       start);
 315     }
 316
 317   fprintf (outfile, "  lower bound: \n");
 318   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 319     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 320   fprintf (outfile, "  upper bound: \n");
 321   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 322     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 323 }
 324
 325 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 326    number of invariants.  */
 327
 328 lambda_loopnest
 329 lambda_loopnest_new (int depth, int invariants,
 330                      struct obstack * lambda_obstack)
 331 {
 332   lambda_loopnest ret;
 333   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 334
 335   LN_LOOPS (ret) = (lambda_loop *)
 336       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 337   LN_DEPTH (ret) = depth;
 338   LN_INVARIANTS (ret) = invariants;
 339
 340   return ret;
 341 }
 342
 343 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 344    character to use for loop names is given by START.  */
 345
 346 void
 347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 348 {
 349   int i;
 350   for (i = 0; i < LN_DEPTH (nest); i++)
 351     {
 352       fprintf (outfile, "Loop %c\n", start + i);
 353       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 354                          LN_INVARIANTS (nest), 'i');
 355       fprintf (outfile, "\n");
 356     }
 357 }
 358
 359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 360    of invariants.  */
 361
 362 static lambda_lattice
 363 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 364 {
 365   lambda_lattice ret
 366       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 367   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth, lambda_obstack);
 368   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 369   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants,
 370                                                        lambda_obstack);
 371   LATTICE_DIMENSION (ret) = depth;
 372   LATTICE_INVARIANTS (ret) = invariants;
 373   return ret;
 374 }
 375
 376 /* Compute the lattice base for NEST.  The lattice base is essentially a
 377    non-singular transform from a dense base space to a sparse iteration space.
 378    We use it so that we don't have to specially handle the case of a sparse
 379    iteration space in other parts of the algorithm.  As a result, this routine
 380    only does something interesting (IE produce a matrix that isn't the
 381    identity matrix) if NEST is a sparse space.  */
 382
 383 static lambda_lattice
 384 lambda_lattice_compute_base (lambda_loopnest nest,
 385                              struct obstack * lambda_obstack)
 386 {
 387   lambda_lattice ret;
 388   int depth, invariants;
 389   lambda_matrix base;
 390
 391   int i, j, step;
 392   lambda_loop loop;
 393   lambda_linear_expression expression;
 394
 395   depth = LN_DEPTH (nest);
 396   invariants = LN_INVARIANTS (nest);
 397
 398   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 399   base = LATTICE_BASE (ret);
 400   for (i = 0; i < depth; i++)
 401     {
 402       loop = LN_LOOPS (nest)[i];
 403       gcc_assert (loop);
 404       step = LL_STEP (loop);
 405       /* If we have a step of 1, then the base is one, and the
 406          origin and invariant coefficients are 0.  */
 407       if (step == 1)
 408         {
 409           for (j = 0; j < depth; j++)
 410             base[i][j] = 0;
 411           base[i][i] = 1;
 412           LATTICE_ORIGIN (ret)[i] = 0;
 413           for (j = 0; j < invariants; j++)
 414             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 415         }
 416       else
 417         {
 418           /* Otherwise, we need the lower bound expression (which must
 419              be an affine function)  to determine the base.  */
 420           expression = LL_LOWER_BOUND (loop);
 421           gcc_assert (expression && !LLE_NEXT (expression)
 422                       && LLE_DENOMINATOR (expression) == 1);
 423
 424           /* The lower triangular portion of the base is going to be the
 425              coefficient times the step */
 426           for (j = 0; j < i; j++)
 427             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 428               * LL_STEP (LN_LOOPS (nest)[j]);
 429           base[i][i] = step;
 430           for (j = i + 1; j < depth; j++)
 431             base[i][j] = 0;
 432
 433           /* Origin for this loop is the constant of the lower bound
 434              expression.  */
 435           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 436
 437           /* Coefficient for the invariants are equal to the invariant
 438              coefficients in the expression.  */
 439           for (j = 0; j < invariants; j++)
 440             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 441               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 442         }
 443     }
 444   return ret;
 445 }
 446
 447 /* Compute the least common multiple of two numbers A and B .  */
 448
 449 int
 450 least_common_multiple (int a, int b)
 451 {
 452   return (abs (a) * abs (b) / gcd (a, b));
 453 }
 454
 455 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 456    auxiliary nest.
 457    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 458    it is easy to calculate the answer and bounds.
 459    A sketch of how it works:
 460    Given a system of linear inequalities, ai * xj >= bk, you can always
 461    rewrite the constraints so they are all of the form
 462    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 463    in b1 ... bk, and some a in a1...ai)
 464    You can then eliminate this x from the non-constant inequalities by
 465    rewriting these as a <= b, x >= constant, and delete the x variable.
 466    You can then repeat this for any remaining x variables, and then we have
 467    an easy to use variable <= constant (or no variables at all) form that we
 468    can construct our bounds from.
 469
 470    In our case, each time we eliminate, we construct part of the bound from
 471    the ith variable, then delete the ith variable.
 472
 473    Remember the constant are in our vector a, our coefficient matrix is A,
 474    and our invariant coefficient matrix is B.
 475
 476    SIZE is the size of the matrices being passed.
 477    DEPTH is the loop nest depth.
 478    INVARIANTS is the number of loop invariants.
 479    A, B, and a are the coefficient matrix, invariant coefficient, and a
 480    vector of constants, respectively.  */
 481
 482 static lambda_loopnest
 483 compute_nest_using_fourier_motzkin (int size,
 484                                     int depth,
 485                                     int invariants,
 486                                     lambda_matrix A,
 487                                     lambda_matrix B,
 488                                     lambda_vector a,
 489                                     struct obstack * lambda_obstack)
 490 {
 491
 492   int multiple, f1, f2;
 493   int i, j, k;
 494   lambda_linear_expression expression;
 495   lambda_loop loop;
 496   lambda_loopnest auxillary_nest;
 497   lambda_matrix swapmatrix, A1, B1;
 498   lambda_vector swapvector, a1;
 499   int newsize;
 500
 501   A1 = lambda_matrix_new (128, depth, lambda_obstack);
 502   B1 = lambda_matrix_new (128, invariants, lambda_obstack);
 503   a1 = lambda_vector_new (128);
 504
 505   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 506
 507   for (i = depth - 1; i >= 0; i--)
 508     {
 509       loop = lambda_loop_new (lambda_obstack);
 510       LN_LOOPS (auxillary_nest)[i] = loop;
 511       LL_STEP (loop) = 1;
 512
 513       for (j = 0; j < size; j++)
 514         {
 515           if (A[j][i] < 0)
 516             {
 517               /* Any linear expression in the matrix with a coefficient less
 518                  than 0 becomes part of the new lower bound.  */
 519               expression = lambda_linear_expression_new (depth, invariants,
 520                                                          lambda_obstack);
 521
 522               for (k = 0; k < i; k++)
 523                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 524
 525               for (k = 0; k < invariants; k++)
 526                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 527
 528               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 529               LLE_CONSTANT (expression) = -1 * a[j];
 530
 531               /* Ignore if identical to the existing lower bound.  */
 532               if (!lle_equal (LL_LOWER_BOUND (loop),
 533                               expression, depth, invariants))
 534                 {
 535                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 536                   LL_LOWER_BOUND (loop) = expression;
 537                 }
 538
 539             }
 540           else if (A[j][i] > 0)
 541             {
 542               /* Any linear expression with a coefficient greater than 0
 543                  becomes part of the new upper bound.  */
 544               expression = lambda_linear_expression_new (depth, invariants,
 545                                                          lambda_obstack);
 546               for (k = 0; k < i; k++)
 547                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 548
 549               for (k = 0; k < invariants; k++)
 550                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 551
 552               LLE_DENOMINATOR (expression) = A[j][i];
 553               LLE_CONSTANT (expression) = a[j];
 554
 555               /* Ignore if identical to the existing upper bound.  */
 556               if (!lle_equal (LL_UPPER_BOUND (loop),
 557                               expression, depth, invariants))
 558                 {
 559                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 560                   LL_UPPER_BOUND (loop) = expression;
 561                 }
 562
 563             }
 564         }
 565
 566       /* This portion creates a new system of linear inequalities by deleting
 567          the i'th variable, reducing the system by one variable.  */
 568       newsize = 0;
 569       for (j = 0; j < size; j++)
 570         {
 571           /* If the coefficient for the i'th variable is 0, then we can just
 572              eliminate the variable straightaway.  Otherwise, we have to
 573              multiply through by the coefficients we are eliminating.  */
 574           if (A[j][i] == 0)
 575             {
 576               lambda_vector_copy (A[j], A1[newsize], depth);
 577               lambda_vector_copy (B[j], B1[newsize], invariants);
 578               a1[newsize] = a[j];
 579               newsize++;
 580             }
 581           else if (A[j][i] > 0)
 582             {
 583               for (k = 0; k < size; k++)
 584                 {
 585                   if (A[k][i] < 0)
 586                     {
 587                       multiple = least_common_multiple (A[j][i], A[k][i]);
 588                       f1 = multiple / A[j][i];
 589                       f2 = -1 * multiple / A[k][i];
 590
 591                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 592                                             A1[newsize], depth);
 593                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 594                                             B1[newsize], invariants);
 595                       a1[newsize] = f1 * a[j] + f2 * a[k];
 596                       newsize++;
 597                     }
 598                 }
 599             }
 600         }
 601
 602       swapmatrix = A;
 603       A = A1;
 604       A1 = swapmatrix;
 605
 606       swapmatrix = B;
 607       B = B1;
 608       B1 = swapmatrix;
 609
 610       swapvector = a;
 611       a = a1;
 612       a1 = swapvector;
 613
 614       size = newsize;
 615     }
 616
 617   return auxillary_nest;
 618 }
 619
 620 /* Compute the loop bounds for the auxiliary space NEST.
 621    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 622    Given the original nest, this function will
 623    1. Convert the nest into matrix form, which consists of a matrix for the
 624    coefficients, a matrix for the
 625    invariant coefficients, and a vector for the constants.
 626    2. Use the matrix form to calculate the lattice base for the nest (which is
 627    a dense space)
 628    3. Compose the dense space transform with the user specified transform, to
 629    get a transform we can easily calculate transformed bounds for.
 630    4. Multiply the composed transformation matrix times the matrix form of the
 631    loop.
 632    5. Transform the newly created matrix (from step 4) back into a loop nest
 633    using Fourier-Motzkin elimination to figure out the bounds.  */
 634
 635 static lambda_loopnest
 636 lambda_compute_auxillary_space (lambda_loopnest nest,
 637                                 lambda_trans_matrix trans,
 638                                 struct obstack * lambda_obstack)
 639 {
 640   lambda_matrix A, B, A1, B1;
 641   lambda_vector a, a1;
 642   lambda_matrix invertedtrans;
 643   int depth, invariants, size;
 644   int i, j;
 645   lambda_loop loop;
 646   lambda_linear_expression expression;
 647   lambda_lattice lattice;
 648
 649   depth = LN_DEPTH (nest);
 650   invariants = LN_INVARIANTS (nest);
 651
 652   /* Unfortunately, we can't know the number of constraints we'll have
 653      ahead of time, but this should be enough even in ridiculous loop nest
 654      cases. We must not go over this limit.  */
 655   A = lambda_matrix_new (128, depth, lambda_obstack);
 656   B = lambda_matrix_new (128, invariants, lambda_obstack);
 657   a = lambda_vector_new (128);
 658
 659   A1 = lambda_matrix_new (128, depth, lambda_obstack);
 660   B1 = lambda_matrix_new (128, invariants, lambda_obstack);
 661   a1 = lambda_vector_new (128);
 662
 663   /* Store the bounds in the equation matrix A, constant vector a, and
 664      invariant matrix B, so that we have Ax <= a + B.
 665      This requires a little equation rearranging so that everything is on the
 666      correct side of the inequality.  */
 667   size = 0;
 668   for (i = 0; i < depth; i++)
 669     {
 670       loop = LN_LOOPS (nest)[i];
 671
 672       /* First we do the lower bound.  */
 673       if (LL_STEP (loop) > 0)
 674         expression = LL_LOWER_BOUND (loop);
 675       else
 676         expression = LL_UPPER_BOUND (loop);
 677
 678       for (; expression != NULL; expression = LLE_NEXT (expression))
 679         {
 680           /* Fill in the coefficient.  */
 681           for (j = 0; j < i; j++)
 682             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 683
 684           /* And the invariant coefficient.  */
 685           for (j = 0; j < invariants; j++)
 686             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 687
 688           /* And the constant.  */
 689           a[size] = LLE_CONSTANT (expression);
 690
 691           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 692              constants and single variables on   */
 693           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 694           a[size] *= -1;
 695           for (j = 0; j < invariants; j++)
 696             B[size][j] *= -1;
 697
 698           size++;
 699           /* Need to increase matrix sizes above.  */
 700           gcc_assert (size <= 127);
 701
 702         }
 703
 704       /* Then do the exact same thing for the upper bounds.  */
 705       if (LL_STEP (loop) > 0)
 706         expression = LL_UPPER_BOUND (loop);
 707       else
 708         expression = LL_LOWER_BOUND (loop);
 709
 710       for (; expression != NULL; expression = LLE_NEXT (expression))
 711         {
 712           /* Fill in the coefficient.  */
 713           for (j = 0; j < i; j++)
 714             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 715
 716           /* And the invariant coefficient.  */
 717           for (j = 0; j < invariants; j++)
 718             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 719
 720           /* And the constant.  */
 721           a[size] = LLE_CONSTANT (expression);
 722
 723           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 724           for (j = 0; j < i; j++)
 725             A[size][j] *= -1;
 726           A[size][i] = LLE_DENOMINATOR (expression);
 727           size++;
 728           /* Need to increase matrix sizes above.  */
 729           gcc_assert (size <= 127);
 730
 731         }
 732     }
 733
 734   /* Compute the lattice base x = base * y + origin, where y is the
 735      base space.  */
 736   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 737
 738   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 739
 740   /* A1 = A * L */
 741   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 742
 743   /* a1 = a - A * origin constant.  */
 744   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 745   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 746
 747   /* B1 = B - A * origin invariant.  */
 748   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 749                       invariants);
 750   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 751
 752   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 753      it by A1, then performing Fourier-Motzkin.  */
 754
 755   invertedtrans = lambda_matrix_new (depth, depth, lambda_obstack);
 756
 757   /* Compute the inverse of U.  */
 758   lambda_matrix_inverse (LTM_MATRIX (trans),
 759                          invertedtrans, depth, lambda_obstack);
 760
 761   /* A = A1 inv(U).  */
 762   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 763
 764   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 765                                              A, B1, a1, lambda_obstack);
 766 }
 767
 768 /* Compute the loop bounds for the target space, using the bounds of
 769    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 770    The target space loop bounds are computed by multiplying the triangular
 771    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 772    the loop steps (positive or negative) is then used to swap the bounds if
 773    the loop counts downwards.
 774    Return the target loopnest.  */
 775
 776 static lambda_loopnest
 777 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 778                              lambda_trans_matrix H, lambda_vector stepsigns,
 779                              struct obstack * lambda_obstack)
 780 {
 781   lambda_matrix inverse, H1;
 782   int determinant, i, j;
 783   int gcd1, gcd2;
 784   int factor;
 785
 786   lambda_loopnest target_nest;
 787   int depth, invariants;
 788   lambda_matrix target;
 789
 790   lambda_loop auxillary_loop, target_loop;
 791   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 792
 793   depth = LN_DEPTH (auxillary_nest);
 794   invariants = LN_INVARIANTS (auxillary_nest);
 795
 796   inverse = lambda_matrix_new (depth, depth, lambda_obstack);
 797   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth,
 798                                        lambda_obstack);
 799
 800   /* H1 is H excluding its diagonal.  */
 801   H1 = lambda_matrix_new (depth, depth, lambda_obstack);
 802   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 803
 804   for (i = 0; i < depth; i++)
 805     H1[i][i] = 0;
 806
 807   /* Computes the linear offsets of the loop bounds.  */
 808   target = lambda_matrix_new (depth, depth, lambda_obstack);
 809   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 810
 811   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 812
 813   for (i = 0; i < depth; i++)
 814     {
 815
 816       /* Get a new loop structure.  */
 817       target_loop = lambda_loop_new (lambda_obstack);
 818       LN_LOOPS (target_nest)[i] = target_loop;
 819
 820       /* Computes the gcd of the coefficients of the linear part.  */
 821       gcd1 = lambda_vector_gcd (target[i], i);
 822
 823       /* Include the denominator in the GCD.  */
 824       gcd1 = gcd (gcd1, determinant);
 825
 826       /* Now divide through by the gcd.  */
 827       for (j = 0; j < i; j++)
 828         target[i][j] = target[i][j] / gcd1;
 829
 830       expression = lambda_linear_expression_new (depth, invariants,
 831                                                  lambda_obstack);
 832       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 833       LLE_DENOMINATOR (expression) = determinant / gcd1;
 834       LLE_CONSTANT (expression) = 0;
 835       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 836                            invariants);
 837       LL_LINEAR_OFFSET (target_loop) = expression;
 838     }
 839
 840   /* For each loop, compute the new bounds from H.  */
 841   for (i = 0; i < depth; i++)
 842     {
 843       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 844       target_loop = LN_LOOPS (target_nest)[i];
 845       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 846       factor = LTM_MATRIX (H)[i][i];
 847
 848       /* First we do the lower bound.  */
 849       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 850
 851       for (; auxillary_expr != NULL;
 852            auxillary_expr = LLE_NEXT (auxillary_expr))
 853         {
 854           target_expr = lambda_linear_expression_new (depth, invariants,
 855                                                       lambda_obstack);
 856           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 857                                      depth, inverse, depth,
 858                                      LLE_COEFFICIENTS (target_expr));
 859           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 860                                     LLE_COEFFICIENTS (target_expr), depth,
 861                                     factor);
 862
 863           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 864           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 865                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 866                               invariants);
 867           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 868                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                                     invariants, factor);
 870           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 871
 872           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 873             {
 874               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 875                 * determinant;
 876               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 877                                         (target_expr),
 878                                         LLE_INVARIANT_COEFFICIENTS
 879                                         (target_expr), invariants,
 880                                         determinant);
 881               LLE_DENOMINATOR (target_expr) =
 882                 LLE_DENOMINATOR (target_expr) * determinant;
 883             }
 884           /* Find the gcd and divide by it here, rather than doing it
 885              at the tree level.  */
 886           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 887           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 888                                     invariants);
 889           gcd1 = gcd (gcd1, gcd2);
 890           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 891           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 892           for (j = 0; j < depth; j++)
 893             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 894           for (j = 0; j < invariants; j++)
 895             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 896           LLE_CONSTANT (target_expr) /= gcd1;
 897           LLE_DENOMINATOR (target_expr) /= gcd1;
 898           /* Ignore if identical to existing bound.  */
 899           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 900                           invariants))
 901             {
 902               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 903               LL_LOWER_BOUND (target_loop) = target_expr;
 904             }
 905         }
 906       /* Now do the upper bound.  */
 907       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 908
 909       for (; auxillary_expr != NULL;
 910            auxillary_expr = LLE_NEXT (auxillary_expr))
 911         {
 912           target_expr = lambda_linear_expression_new (depth, invariants,
 913                                                       lambda_obstack);
 914           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 915                                      depth, inverse, depth,
 916                                      LLE_COEFFICIENTS (target_expr));
 917           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 918                                     LLE_COEFFICIENTS (target_expr), depth,
 919                                     factor);
 920           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 921           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 922                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 923                               invariants);
 924           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 925                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 926                                     invariants, factor);
 927           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 928
 929           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 930             {
 931               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 932                 * determinant;
 933               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 934                                         (target_expr),
 935                                         LLE_INVARIANT_COEFFICIENTS
 936                                         (target_expr), invariants,
 937                                         determinant);
 938               LLE_DENOMINATOR (target_expr) =
 939                 LLE_DENOMINATOR (target_expr) * determinant;
 940             }
 941           /* Find the gcd and divide by it here, instead of at the
 942              tree level.  */
 943           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 944           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 945                                     invariants);
 946           gcd1 = gcd (gcd1, gcd2);
 947           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 948           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 949           for (j = 0; j < depth; j++)
 950             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 951           for (j = 0; j < invariants; j++)
 952             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 953           LLE_CONSTANT (target_expr) /= gcd1;
 954           LLE_DENOMINATOR (target_expr) /= gcd1;
 955           /* Ignore if equal to existing bound.  */
 956           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 957                           invariants))
 958             {
 959               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 960               LL_UPPER_BOUND (target_loop) = target_expr;
 961             }
 962         }
 963     }
 964   for (i = 0; i < depth; i++)
 965     {
 966       target_loop = LN_LOOPS (target_nest)[i];
 967       /* If necessary, exchange the upper and lower bounds and negate
 968          the step size.  */
 969       if (stepsigns[i] < 0)
 970         {
 971           LL_STEP (target_loop) *= -1;
 972           tmp_expr = LL_LOWER_BOUND (target_loop);
 973           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 974           LL_UPPER_BOUND (target_loop) = tmp_expr;
 975         }
 976     }
 977   return target_nest;
 978 }
 979
 980 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 981    result.  */
 982
 983 static lambda_vector
 984 lambda_compute_step_signs (lambda_trans_matrix trans,
 985                            lambda_vector stepsigns,
 986                            struct obstack * lambda_obstack)
 987 {
 988   lambda_matrix matrix, H;
 989   int size;
 990   lambda_vector newsteps;
 991   int i, j, factor, minimum_column;
 992   int temp;
 993
 994   matrix = LTM_MATRIX (trans);
 995   size = LTM_ROWSIZE (trans);
 996   H = lambda_matrix_new (size, size, lambda_obstack);
 997
 998   newsteps = lambda_vector_new (size);
 999   lambda_vector_copy (stepsigns, newsteps, size);
1000
1001   lambda_matrix_copy (matrix, H, size, size);
1002
1003   for (j = 0; j < size; j++)
1004     {
1005       lambda_vector row;
1006       row = H[j];
1007       for (i = j; i < size; i++)
1008         if (row[i] < 0)
1009           lambda_matrix_col_negate (H, size, i);
1010       while (lambda_vector_first_nz (row, size, j + 1) < size)
1011         {
1012           minimum_column = lambda_vector_min_nz (row, size, j);
1013           lambda_matrix_col_exchange (H, size, j, minimum_column);
1014
1015           temp = newsteps[j];
1016           newsteps[j] = newsteps[minimum_column];
1017           newsteps[minimum_column] = temp;
1018
1019           for (i = j + 1; i < size; i++)
1020             {
1021               factor = row[i] / row[j];
1022               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1023             }
1024         }
1025     }
1026   return newsteps;
1027 }
1028
1029 /* Transform NEST according to TRANS, and return the new loopnest.
1030    This involves
1031    1. Computing a lattice base for the transformation
1032    2. Composing the dense base with the specified transformation (TRANS)
1033    3. Decomposing the combined transformation into a lower triangular portion,
1034    and a unimodular portion.
1035    4. Computing the auxiliary nest using the unimodular portion.
1036    5. Computing the target nest using the auxiliary nest and the lower
1037    triangular portion.  */
1038
1039 lambda_loopnest
1040 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1041                            struct obstack * lambda_obstack)
1042 {
1043   lambda_loopnest auxillary_nest, target_nest;
1044
1045   int depth, invariants;
1046   int i, j;
1047   lambda_lattice lattice;
1048   lambda_trans_matrix trans1, H, U;
1049   lambda_loop loop;
1050   lambda_linear_expression expression;
1051   lambda_vector origin;
1052   lambda_matrix origin_invariants;
1053   lambda_vector stepsigns;
1054   int f;
1055
1056   depth = LN_DEPTH (nest);
1057   invariants = LN_INVARIANTS (nest);
1058
1059   /* Keep track of the signs of the loop steps.  */
1060   stepsigns = lambda_vector_new (depth);
1061   for (i = 0; i < depth; i++)
1062     {
1063       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1064         stepsigns[i] = 1;
1065       else
1066         stepsigns[i] = -1;
1067     }
1068
1069   /* Compute the lattice base.  */
1070   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1071   trans1 = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1072
1073   /* Multiply the transformation matrix by the lattice base.  */
1074
1075   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1076                       LTM_MATRIX (trans1), depth, depth, depth);
1077
1078   /* Compute the Hermite normal form for the new transformation matrix.  */
1079   H = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1080   U = lambda_trans_matrix_new (depth, depth, lambda_obstack);
1081   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1082                          LTM_MATRIX (U));
1083
1084   /* Compute the auxiliary loop nest's space from the unimodular
1085      portion.  */
1086   auxillary_nest = lambda_compute_auxillary_space (nest, U,
1087                                                    lambda_obstack);
1088
1089   /* Compute the loop step signs from the old step signs and the
1090      transformation matrix.  */
1091   stepsigns = lambda_compute_step_signs (trans1, stepsigns,
1092                                          lambda_obstack);
1093
1094   /* Compute the target loop nest space from the auxiliary nest and
1095      the lower triangular matrix H.  */
1096   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1097                                              lambda_obstack);
1098   origin = lambda_vector_new (depth);
1099   origin_invariants = lambda_matrix_new (depth, invariants, lambda_obstack);
1100   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1101                              LATTICE_ORIGIN (lattice), origin);
1102   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1103                       origin_invariants, depth, depth, invariants);
1104
1105   for (i = 0; i < depth; i++)
1106     {
1107       loop = LN_LOOPS (target_nest)[i];
1108       expression = LL_LINEAR_OFFSET (loop);
1109       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1110         f = 1;
1111       else
1112         f = LLE_DENOMINATOR (expression);
1113
1114       LLE_CONSTANT (expression) += f * origin[i];
1115
1116       for (j = 0; j < invariants; j++)
1117         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1118           f * origin_invariants[i][j];
1119     }
1120
1121   return target_nest;
1122
1123 }
1124
1125 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1126    return the new expression.  DEPTH is the depth of the loopnest.
1127    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1128    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1129    is the amount we have to add/subtract from the expression because of the
1130    type of comparison it is used in.  */
1131
1132 static lambda_linear_expression
1133 gcc_tree_to_linear_expression (int depth, tree expr,
1134                                VEC(tree,heap) *outerinductionvars,
1135                                VEC(tree,heap) *invariants, int extra,
1136                                struct obstack * lambda_obstack)
1137 {
1138   lambda_linear_expression lle = NULL;
1139   switch (TREE_CODE (expr))
1140     {
1141     case INTEGER_CST:
1142       {
1143         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1144         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1145         if (extra != 0)
1146           LLE_CONSTANT (lle) += extra;
1147
1148         LLE_DENOMINATOR (lle) = 1;
1149       }
1150       break;
1151     case SSA_NAME:
1152       {
1153         tree iv, invar;
1154         size_t i;
1155         FOR_EACH_VEC_ELT (tree, outerinductionvars, i, iv)
1156           if (iv != NULL)
1157             {
1158               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1159                 {
1160                   lle = lambda_linear_expression_new (depth, 2 * depth,
1161                                                       lambda_obstack);
1162                   LLE_COEFFICIENTS (lle)[i] = 1;
1163                   if (extra != 0)
1164                     LLE_CONSTANT (lle) = extra;
1165
1166                   LLE_DENOMINATOR (lle) = 1;
1167                 }
1168             }
1169         FOR_EACH_VEC_ELT (tree, invariants, i, invar)
1170           if (invar != NULL)
1171             {
1172               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1173                 {
1174                   lle = lambda_linear_expression_new (depth, 2 * depth,
1175                                                       lambda_obstack);
1176                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1177                   if (extra != 0)
1178                     LLE_CONSTANT (lle) = extra;
1179                   LLE_DENOMINATOR (lle) = 1;
1180                 }
1181             }
1182       }
1183       break;
1184     default:
1185       return NULL;
1186     }
1187
1188   return lle;
1189 }
1190
1191 /* Return the depth of the loopnest NEST */
1192
1193 static int
1194 depth_of_nest (struct loop *nest)
1195 {
1196   size_t depth = 0;
1197   while (nest)
1198     {
1199       depth++;
1200       nest = nest->inner;
1201     }
1202   return depth;
1203 }
1204
1205
1206 /* Return true if OP is invariant in LOOP and all outer loops.  */
1207
1208 static bool
1209 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1210 {
1211   if (is_gimple_min_invariant (op))
1212     return true;
1213   if (loop_depth (loop) == 0)
1214     return true;
1215   if (!expr_invariant_in_loop_p (loop, op))
1216     return false;
1217   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1218     return false;
1219   return true;
1220 }
1221
1222 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1223    or NULL if it could not be converted.
1224    DEPTH is the depth of the loop.
1225    INVARIANTS is a pointer to the array of loop invariants.
1226    The induction variable for this loop should be stored in the parameter
1227    OURINDUCTIONVAR.
1228    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1229
1230 static lambda_loop
1231 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1232                          VEC(tree,heap) ** invariants,
1233                          tree * ourinductionvar,
1234                          VEC(tree,heap) * outerinductionvars,
1235                          VEC(tree,heap) ** lboundvars,
1236                          VEC(tree,heap) ** uboundvars,
1237                          VEC(int,heap) ** steps,
1238                          struct obstack * lambda_obstack)
1239 {
1240   gimple phi;
1241   gimple exit_cond;
1242   tree access_fn, inductionvar;
1243   tree step;
1244   lambda_loop lloop = NULL;
1245   lambda_linear_expression lbound, ubound;
1246   tree test_lhs, test_rhs;
1247   int stepint;
1248   int extra = 0;
1249   tree lboundvar, uboundvar, uboundresult;
1250
1251   /* Find out induction var and exit condition.  */
1252   inductionvar = find_induction_var_from_exit_cond (loop);
1253   exit_cond = get_loop_exit_condition (loop);
1254
1255   if (inductionvar == NULL || exit_cond == NULL)
1256     {
1257       if (dump_file && (dump_flags & TDF_DETAILS))
1258         fprintf (dump_file,
1259                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1260       return NULL;
1261     }
1262
1263   if (SSA_NAME_DEF_STMT (inductionvar) == NULL)
1264     {
1265
1266       if (dump_file && (dump_flags & TDF_DETAILS))
1267         fprintf (dump_file,
1268                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1269
1270       return NULL;
1271     }
1272
1273   phi = SSA_NAME_DEF_STMT (inductionvar);
1274   if (gimple_code (phi) != GIMPLE_PHI)
1275     {
1276       tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1277       if (!op)
1278         {
1279
1280           if (dump_file && (dump_flags & TDF_DETAILS))
1281             fprintf (dump_file,
1282                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1283
1284           return NULL;
1285         }
1286
1287       phi = SSA_NAME_DEF_STMT (op);
1288       if (gimple_code (phi) != GIMPLE_PHI)
1289         {
1290           if (dump_file && (dump_flags & TDF_DETAILS))
1291             fprintf (dump_file,
1292                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1293           return NULL;
1294         }
1295     }
1296
1297   /* The induction variable name/version we want to put in the array is the
1298      result of the induction variable phi node.  */
1299   *ourinductionvar = PHI_RESULT (phi);
1300   access_fn = instantiate_parameters
1301     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1302   if (access_fn == chrec_dont_know)
1303     {
1304       if (dump_file && (dump_flags & TDF_DETAILS))
1305         fprintf (dump_file,
1306                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1307
1308       return NULL;
1309     }
1310
1311   step = evolution_part_in_loop_num (access_fn, loop->num);
1312   if (!step || step == chrec_dont_know)
1313     {
1314       if (dump_file && (dump_flags & TDF_DETAILS))
1315         fprintf (dump_file,
1316                  "Unable to convert loop: Cannot determine step of loop.\n");
1317
1318       return NULL;
1319     }
1320   if (TREE_CODE (step) != INTEGER_CST)
1321     {
1322
1323       if (dump_file && (dump_flags & TDF_DETAILS))
1324         fprintf (dump_file,
1325                  "Unable to convert loop: Step of loop is not integer.\n");
1326       return NULL;
1327     }
1328
1329   stepint = TREE_INT_CST_LOW (step);
1330
1331   /* Only want phis for induction vars, which will have two
1332      arguments.  */
1333   if (gimple_phi_num_args (phi) != 2)
1334     {
1335       if (dump_file && (dump_flags & TDF_DETAILS))
1336         fprintf (dump_file,
1337                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1338       return NULL;
1339     }
1340
1341   /* Another induction variable check. One argument's source should be
1342      in the loop, one outside the loop.  */
1343   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)
1344       && flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 1)->src))
1345     {
1346
1347       if (dump_file && (dump_flags & TDF_DETAILS))
1348         fprintf (dump_file,
1349                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1350
1351       return NULL;
1352     }
1353
1354   if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src))
1355     {
1356       lboundvar = PHI_ARG_DEF (phi, 1);
1357       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1358                                               outerinductionvars, *invariants,
1359                                               0, lambda_obstack);
1360     }
1361   else
1362     {
1363       lboundvar = PHI_ARG_DEF (phi, 0);
1364       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1365                                               outerinductionvars, *invariants,
1366                                               0, lambda_obstack);
1367     }
1368
1369   if (!lbound)
1370     {
1371
1372       if (dump_file && (dump_flags & TDF_DETAILS))
1373         fprintf (dump_file,
1374                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1375
1376       return NULL;
1377     }
1378   /* One part of the test may be a loop invariant tree.  */
1379   VEC_reserve (tree, heap, *invariants, 1);
1380   test_lhs = gimple_cond_lhs (exit_cond);
1381   test_rhs = gimple_cond_rhs (exit_cond);
1382
1383   if (TREE_CODE (test_rhs) == SSA_NAME
1384       && invariant_in_loop_and_outer_loops (loop, test_rhs))
1385     VEC_quick_push (tree, *invariants, test_rhs);
1386   else if (TREE_CODE (test_lhs) == SSA_NAME
1387            && invariant_in_loop_and_outer_loops (loop, test_lhs))
1388     VEC_quick_push (tree, *invariants, test_lhs);
1389
1390   /* The non-induction variable part of the test is the upper bound variable.
1391    */
1392   if (test_lhs == inductionvar)
1393     uboundvar = test_rhs;
1394   else
1395     uboundvar = test_lhs;
1396
1397   /* We only size the vectors assuming we have, at max, 2 times as many
1398      invariants as we do loops (one for each bound).
1399      This is just an arbitrary number, but it has to be matched against the
1400      code below.  */
1401   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1402
1403
1404   /* We might have some leftover.  */
1405   if (gimple_cond_code (exit_cond) == LT_EXPR)
1406     extra = -1 * stepint;
1407   else if (gimple_cond_code (exit_cond) == NE_EXPR)
1408     extra = -1 * stepint;
1409   else if (gimple_cond_code (exit_cond) == GT_EXPR)
1410     extra = -1 * stepint;
1411   else if (gimple_cond_code (exit_cond) == EQ_EXPR)
1412     extra = 1 * stepint;
1413
1414   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1415                                           outerinductionvars,
1416                                           *invariants, extra, lambda_obstack);
1417   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1418                          build_int_cst (TREE_TYPE (uboundvar), extra));
1419   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1420   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1421   VEC_safe_push (int, heap, *steps, stepint);
1422   if (!ubound)
1423     {
1424       if (dump_file && (dump_flags & TDF_DETAILS))
1425         fprintf (dump_file,
1426                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1427       return NULL;
1428     }
1429
1430   lloop = lambda_loop_new (lambda_obstack);
1431   LL_STEP (lloop) = stepint;
1432   LL_LOWER_BOUND (lloop) = lbound;
1433   LL_UPPER_BOUND (lloop) = ubound;
1434   return lloop;
1435 }
1436
1437 /* Given a LOOP, find the induction variable it is testing against in the exit
1438    condition.  Return the induction variable if found, NULL otherwise.  */
1439
1440 tree
1441 find_induction_var_from_exit_cond (struct loop *loop)
1442 {
1443   gimple expr = get_loop_exit_condition (loop);
1444   tree ivarop;
1445   tree test_lhs, test_rhs;
1446   if (expr == NULL)
1447     return NULL_TREE;
1448   if (gimple_code (expr) != GIMPLE_COND)
1449     return NULL_TREE;
1450   test_lhs = gimple_cond_lhs (expr);
1451   test_rhs = gimple_cond_rhs (expr);
1452
1453   /* Find the side that is invariant in this loop. The ivar must be the other
1454      side.  */
1455
1456   if (expr_invariant_in_loop_p (loop, test_lhs))
1457       ivarop = test_rhs;
1458   else if (expr_invariant_in_loop_p (loop, test_rhs))
1459       ivarop = test_lhs;
1460   else
1461     return NULL_TREE;
1462
1463   if (TREE_CODE (ivarop) != SSA_NAME)
1464     return NULL_TREE;
1465   return ivarop;
1466 }
1467
1468 DEF_VEC_P(lambda_loop);
1469 DEF_VEC_ALLOC_P(lambda_loop,heap);
1470
1471 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1472    Return the new loop nest.
1473    INDUCTIONVARS is a pointer to an array of induction variables for the
1474    loopnest that will be filled in during this process.
1475    INVARIANTS is a pointer to an array of invariants that will be filled in
1476    during this process.  */
1477
1478 lambda_loopnest
1479 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1480                                  VEC(tree,heap) **inductionvars,
1481                                  VEC(tree,heap) **invariants,
1482                                  struct obstack * lambda_obstack)
1483 {
1484   lambda_loopnest ret = NULL;
1485   struct loop *temp = loop_nest;
1486   int depth = depth_of_nest (loop_nest);
1487   size_t i;
1488   VEC(lambda_loop,heap) *loops = NULL;
1489   VEC(tree,heap) *uboundvars = NULL;
1490   VEC(tree,heap) *lboundvars  = NULL;
1491   VEC(int,heap) *steps = NULL;
1492   lambda_loop newloop;
1493   tree inductionvar = NULL;
1494   bool perfect_nest = perfect_nest_p (loop_nest);
1495
1496   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1497     goto fail;
1498
1499   while (temp)
1500     {
1501       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1502                                          &inductionvar, *inductionvars,
1503                                          &lboundvars, &uboundvars,
1504                                          &steps, lambda_obstack);
1505       if (!newloop)
1506         goto fail;
1507
1508       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1509       VEC_safe_push (lambda_loop, heap, loops, newloop);
1510       temp = temp->inner;
1511     }
1512
1513   if (!perfect_nest)
1514     {
1515       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1516                             *inductionvars))
1517         {
1518           if (dump_file)
1519             fprintf (dump_file,
1520                      "Not a perfect loop nest and couldn't convert to one.\n");
1521           goto fail;
1522         }
1523       else if (dump_file)
1524         fprintf (dump_file,
1525                  "Successfully converted loop nest to perfect loop nest.\n");
1526     }
1527
1528   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1529
1530   FOR_EACH_VEC_ELT (lambda_loop, loops, i, newloop)
1531     LN_LOOPS (ret)[i] = newloop;
1532
1533  fail:
1534   VEC_free (lambda_loop, heap, loops);
1535   VEC_free (tree, heap, uboundvars);
1536   VEC_free (tree, heap, lboundvars);
1537   VEC_free (int, heap, steps);
1538
1539   return ret;
1540 }
1541
1542 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1543    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1544    inserted for us are stored.  INDUCTION_VARS is the array of induction
1545    variables for the loop this LBV is from.  TYPE is the tree type to use for
1546    the variables and trees involved.  */
1547
1548 static tree
1549 lbv_to_gcc_expression (lambda_body_vector lbv,
1550                        tree type, VEC(tree,heap) *induction_vars,
1551                        gimple_seq *stmts_to_insert)
1552 {
1553   int k;
1554   tree resvar;
1555   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1556
1557   k = LBV_DENOMINATOR (lbv);
1558   gcc_assert (k != 0);
1559   if (k != 1)
1560     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1561
1562   resvar = create_tmp_var (type, "lbvtmp");
1563   add_referenced_var (resvar);
1564   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1565 }
1566
1567 /* Convert a linear expression from coefficient and constant form to a
1568    gcc tree.
1569    Return the tree that represents the final value of the expression.
1570    LLE is the linear expression to convert.
1571    OFFSET is the linear offset to apply to the expression.
1572    TYPE is the tree type to use for the variables and math.
1573    INDUCTION_VARS is a vector of induction variables for the loops.
1574    INVARIANTS is a vector of the loop nest invariants.
1575    WRAP specifies what tree code to wrap the results in, if there is more than
1576    one (it is either MAX_EXPR, or MIN_EXPR).
1577    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1578    statements that need to be inserted for the linear expression.  */
1579
1580 static tree
1581 lle_to_gcc_expression (lambda_linear_expression lle,
1582                        lambda_linear_expression offset,
1583                        tree type,
1584                        VEC(tree,heap) *induction_vars,
1585                        VEC(tree,heap) *invariants,
1586                        enum tree_code wrap, gimple_seq *stmts_to_insert)
1587 {
1588   int k;
1589   tree resvar;
1590   tree expr = NULL_TREE;
1591   VEC(tree,heap) *results = NULL;
1592
1593   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1594
1595   /* Build up the linear expressions.  */
1596   for (; lle != NULL; lle = LLE_NEXT (lle))
1597     {
1598       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1599       expr = fold_build2 (PLUS_EXPR, type, expr,
1600                           build_linear_expr (type,
1601                                              LLE_INVARIANT_COEFFICIENTS (lle),
1602                                              invariants));
1603
1604       k = LLE_CONSTANT (lle);
1605       if (k)
1606         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1607
1608       k = LLE_CONSTANT (offset);
1609       if (k)
1610         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1611
1612       k = LLE_DENOMINATOR (lle);
1613       if (k != 1)
1614         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1615                             type, expr, build_int_cst (type, k));
1616
1617       expr = fold (expr);
1618       VEC_safe_push (tree, heap, results, expr);
1619     }
1620
1621   gcc_assert (expr);
1622
1623   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1624   if (VEC_length (tree, results) > 1)
1625     {
1626       size_t i;
1627       tree op;
1628
1629       expr = VEC_index (tree, results, 0);
1630       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1631         expr = fold_build2 (wrap, type, expr, op);
1632     }
1633
1634   VEC_free (tree, heap, results);
1635
1636   resvar = create_tmp_var (type, "lletmp");
1637   add_referenced_var (resvar);
1638   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1639 }
1640
1641 /* Remove the induction variable defined at IV_STMT.  */
1642
1643 void
1644 remove_iv (gimple iv_stmt)
1645 {
1646   gimple_stmt_iterator si = gsi_for_stmt (iv_stmt);
1647
1648   if (gimple_code (iv_stmt) == GIMPLE_PHI)
1649     {
1650       unsigned i;
1651
1652       for (i = 0; i < gimple_phi_num_args (iv_stmt); i++)
1653         {
1654           gimple stmt;
1655           imm_use_iterator imm_iter;
1656           tree arg = gimple_phi_arg_def (iv_stmt, i);
1657           bool used = false;
1658
1659           if (TREE_CODE (arg) != SSA_NAME)
1660             continue;
1661
1662           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1663             if (stmt != iv_stmt && !is_gimple_debug (stmt))
1664               used = true;
1665
1666           if (!used)
1667             remove_iv (SSA_NAME_DEF_STMT (arg));
1668         }
1669
1670       remove_phi_node (&si, true);
1671     }
1672   else
1673     {
1674       gsi_remove (&si, true);
1675       release_defs (iv_stmt);
1676     }
1677 }
1678
1679 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1680    it, back into gcc code.  This changes the
1681    loops, their induction variables, and their bodies, so that they
1682    match the transformed loopnest.
1683    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1684    loopnest.
1685    OLD_IVS is a vector of induction variables from the old loopnest.
1686    INVARIANTS is a vector of loop invariants from the old loopnest.
1687    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1688    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1689    NEW_LOOPNEST.  */
1690
1691 void
1692 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1693                                  VEC(tree,heap) *old_ivs,
1694                                  VEC(tree,heap) *invariants,
1695                                  VEC(gimple,heap) **remove_ivs,
1696                                  lambda_loopnest new_loopnest,
1697                                  lambda_trans_matrix transform,
1698                                  struct obstack * lambda_obstack)
1699 {
1700   struct loop *temp;
1701   size_t i = 0;
1702   unsigned j;
1703   size_t depth = 0;
1704   VEC(tree,heap) *new_ivs = NULL;
1705   tree oldiv;
1706   gimple_stmt_iterator bsi;
1707
1708   transform = lambda_trans_matrix_inverse (transform, lambda_obstack);
1709
1710   if (dump_file)
1711     {
1712       fprintf (dump_file, "Inverse of transformation matrix:\n");
1713       print_lambda_trans_matrix (dump_file, transform);
1714     }
1715   depth = depth_of_nest (old_loopnest);
1716   temp = old_loopnest;
1717
1718   while (temp)
1719     {
1720       lambda_loop newloop;
1721       basic_block bb;
1722       edge exit;
1723       tree ivvar, ivvarinced;
1724       gimple exitcond;
1725       gimple_seq stmts;
1726       enum tree_code testtype;
1727       tree newupperbound, newlowerbound;
1728       lambda_linear_expression offset;
1729       tree type;
1730       bool insert_after;
1731       gimple inc_stmt;
1732
1733       oldiv = VEC_index (tree, old_ivs, i);
1734       type = TREE_TYPE (oldiv);
1735
1736       /* First, build the new induction variable temporary  */
1737
1738       ivvar = create_tmp_var (type, "lnivtmp");
1739       add_referenced_var (ivvar);
1740
1741       VEC_safe_push (tree, heap, new_ivs, ivvar);
1742
1743       newloop = LN_LOOPS (new_loopnest)[i];
1744
1745       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1746          cases for now.  */
1747       offset = LL_LINEAR_OFFSET (newloop);
1748
1749       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1750                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1751
1752       /* Now build the  new lower bounds, and insert the statements
1753          necessary to generate it on the loop preheader.  */
1754       stmts = NULL;
1755       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1756                                              LL_LINEAR_OFFSET (newloop),
1757                                              type,
1758                                              new_ivs,
1759                                              invariants, MAX_EXPR, &stmts);
1760
1761       if (stmts)
1762         {
1763           gsi_insert_seq_on_edge (loop_preheader_edge (temp), stmts);
1764           gsi_commit_edge_inserts ();
1765         }
1766       /* Build the new upper bound and insert its statements in the
1767          basic block of the exit condition */
1768       stmts = NULL;
1769       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1770                                              LL_LINEAR_OFFSET (newloop),
1771                                              type,
1772                                              new_ivs,
1773                                              invariants, MIN_EXPR, &stmts);
1774       exit = single_exit (temp);
1775       exitcond = get_loop_exit_condition (temp);
1776       bb = gimple_bb (exitcond);
1777       bsi = gsi_after_labels (bb);
1778       if (stmts)
1779         gsi_insert_seq_before (&bsi, stmts, GSI_NEW_STMT);
1780
1781       /* Create the new iv.  */
1782
1783       standard_iv_increment_position (temp, &bsi, &insert_after);
1784       create_iv (newlowerbound,
1785                  build_int_cst (type, LL_STEP (newloop)),
1786                  ivvar, temp, &bsi, insert_after, &ivvar,
1787                  NULL);
1788
1789       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1790          dominate the block containing the exit condition.
1791          So we simply create our own incremented iv to use in the new exit
1792          test,  and let redundancy elimination sort it out.  */
1793       inc_stmt = gimple_build_assign_with_ops (PLUS_EXPR, SSA_NAME_VAR (ivvar),
1794                                                ivvar,
1795                                                build_int_cst (type, LL_STEP (newloop)));
1796
1797       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1798       gimple_assign_set_lhs (inc_stmt, ivvarinced);
1799       bsi = gsi_for_stmt (exitcond);
1800       gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT);
1801
1802       /* Replace the exit condition with the new upper bound
1803          comparison.  */
1804
1805       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1806
1807       /* We want to build a conditional where true means exit the loop, and
1808          false means continue the loop.
1809          So swap the testtype if this isn't the way things are.*/
1810
1811       if (exit->flags & EDGE_FALSE_VALUE)
1812         testtype = swap_tree_comparison (testtype);
1813
1814       gimple_cond_set_condition (exitcond, testtype, newupperbound, ivvarinced);
1815       update_stmt (exitcond);
1816       VEC_replace (tree, new_ivs, i, ivvar);
1817
1818       i++;
1819       temp = temp->inner;
1820     }
1821
1822   /* Rewrite uses of the old ivs so that they are now specified in terms of
1823      the new ivs.  */
1824
1825   FOR_EACH_VEC_ELT (tree, old_ivs, i, oldiv)
1826     {
1827       imm_use_iterator imm_iter;
1828       use_operand_p use_p;
1829       tree oldiv_def;
1830       gimple oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1831       gimple stmt;
1832
1833       if (gimple_code (oldiv_stmt) == GIMPLE_PHI)
1834         oldiv_def = PHI_RESULT (oldiv_stmt);
1835       else
1836         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1837       gcc_assert (oldiv_def != NULL_TREE);
1838
1839       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1840         {
1841           tree newiv;
1842           gimple_seq stmts;
1843           lambda_body_vector lbv, newlbv;
1844
1845           if (is_gimple_debug (stmt))
1846             continue;
1847
1848           /* Compute the new expression for the induction
1849              variable.  */
1850           depth = VEC_length (tree, new_ivs);
1851           lbv = lambda_body_vector_new (depth, lambda_obstack);
1852           LBV_COEFFICIENTS (lbv)[i] = 1;
1853
1854           newlbv = lambda_body_vector_compute_new (transform, lbv,
1855                                                    lambda_obstack);
1856
1857           stmts = NULL;
1858           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1859                                          new_ivs, &stmts);
1860
1861           if (stmts && gimple_code (stmt) != GIMPLE_PHI)
1862             {
1863               bsi = gsi_for_stmt (stmt);
1864               gsi_insert_seq_before (&bsi, stmts, GSI_SAME_STMT);
1865             }
1866
1867           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1868             propagate_value (use_p, newiv);
1869
1870           if (stmts && gimple_code (stmt) == GIMPLE_PHI)
1871             for (j = 0; j < gimple_phi_num_args (stmt); j++)
1872               if (gimple_phi_arg_def (stmt, j) == newiv)
1873                 gsi_insert_seq_on_edge (gimple_phi_arg_edge (stmt, j), stmts);
1874
1875           update_stmt (stmt);
1876         }
1877
1878       /* Remove the now unused induction variable.  */
1879       VEC_safe_push (gimple, heap, *remove_ivs, oldiv_stmt);
1880     }
1881   VEC_free (tree, heap, new_ivs);
1882 }
1883
1884 /* Return TRUE if this is not interesting statement from the perspective of
1885    determining if we have a perfect loop nest.  */
1886
1887 static bool
1888 not_interesting_stmt (gimple stmt)
1889 {
1890   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1891      loop, we would have already failed the number of exits tests.  */
1892   if (gimple_code (stmt) == GIMPLE_LABEL
1893       || gimple_code (stmt) == GIMPLE_GOTO
1894       || gimple_code (stmt) == GIMPLE_COND
1895       || is_gimple_debug (stmt))
1896     return true;
1897   return false;
1898 }
1899
1900 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1901
1902 static bool
1903 phi_loop_edge_uses_def (struct loop *loop, gimple phi, tree def)
1904 {
1905   unsigned i;
1906   for (i = 0; i < gimple_phi_num_args (phi); i++)
1907     if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, i)->src))
1908       if (PHI_ARG_DEF (phi, i) == def)
1909         return true;
1910   return false;
1911 }
1912
1913 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1914
1915 static bool
1916 stmt_uses_phi_result (gimple stmt, tree phi_result)
1917 {
1918   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1919
1920   /* This is conservatively true, because we only want SIMPLE bumpers
1921      of the form x +- constant for our pass.  */
1922   return (use == phi_result);
1923 }
1924
1925 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1926    in-loop-edge in a phi node, and the operand it uses is the result of that
1927    phi node.
1928    I.E. i_29 = i_3 + 1
1929         i_3 = PHI (0, i_29);  */
1930
1931 static bool
1932 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt)
1933 {
1934   gimple use;
1935   tree def;
1936   imm_use_iterator iter;
1937   use_operand_p use_p;
1938
1939   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1940   if (!def)
1941     return false;
1942
1943   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1944     {
1945       use = USE_STMT (use_p);
1946       if (gimple_code (use) == GIMPLE_PHI)
1947         {
1948           if (phi_loop_edge_uses_def (loop, use, def))
1949             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1950               return true;
1951         }
1952     }
1953   return false;
1954 }
1955
1956
1957 /* Return true if LOOP is a perfect loop nest.
1958    Perfect loop nests are those loop nests where all code occurs in the
1959    innermost loop body.
1960    If S is a program statement, then
1961
1962    i.e.
1963    DO I = 1, 20
1964        S1
1965        DO J = 1, 20
1966        ...
1967        END DO
1968    END DO
1969    is not a perfect loop nest because of S1.
1970
1971    DO I = 1, 20
1972       DO J = 1, 20
1973         S1
1974         ...
1975       END DO
1976    END DO
1977    is a perfect loop nest.
1978
1979    Since we don't have high level loops anymore, we basically have to walk our
1980    statements and ignore those that are there because the loop needs them (IE
1981    the induction variable increment, and jump back to the top of the loop).  */
1982
1983 bool
1984 perfect_nest_p (struct loop *loop)
1985 {
1986   basic_block *bbs;
1987   size_t i;
1988   gimple exit_cond;
1989
1990   /* Loops at depth 0 are perfect nests.  */
1991   if (!loop->inner)
1992     return true;
1993
1994   bbs = get_loop_body (loop);
1995   exit_cond = get_loop_exit_condition (loop);
1996
1997   for (i = 0; i < loop->num_nodes; i++)
1998     {
1999       if (bbs[i]->loop_father == loop)
2000         {
2001           gimple_stmt_iterator bsi;
2002
2003           for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi); gsi_next (&bsi))
2004             {
2005               gimple stmt = gsi_stmt (bsi);
2006
2007               if (gimple_code (stmt) == GIMPLE_COND
2008                   && exit_cond != stmt)
2009                 goto non_perfectly_nested;
2010
2011               if (stmt == exit_cond
2012                   || not_interesting_stmt (stmt)
2013                   || stmt_is_bumper_for_loop (loop, stmt))
2014                 continue;
2015
2016             non_perfectly_nested:
2017               free (bbs);
2018               return false;
2019             }
2020         }
2021     }
2022
2023   free (bbs);
2024
2025   return perfect_nest_p (loop->inner);
2026 }
2027
2028 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2029    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2030    avoid creating duplicate temporaries and FIRSTBSI is statement
2031    iterator where new temporaries should be inserted at the beginning
2032    of body basic block.  */
2033
2034 static void
2035 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x,
2036                                 int xstep, tree y, tree yinit,
2037                                 htab_t replacements,
2038                                 gimple_stmt_iterator *firstbsi)
2039 {
2040   ssa_op_iter iter;
2041   use_operand_p use_p;
2042
2043   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2044     {
2045       tree use = USE_FROM_PTR (use_p);
2046       tree step = NULL_TREE;
2047       tree scev, init, val, var;
2048       gimple setstmt;
2049       struct tree_map *h, in;
2050       void **loc;
2051
2052       /* Replace uses of X with Y right away.  */
2053       if (use == x)
2054         {
2055           SET_USE (use_p, y);
2056           continue;
2057         }
2058
2059       scev = instantiate_parameters (loop,
2060                                      analyze_scalar_evolution (loop, use));
2061
2062       if (scev == NULL || scev == chrec_dont_know)
2063         continue;
2064
2065       step = evolution_part_in_loop_num (scev, loop->num);
2066       if (step == NULL
2067           || step == chrec_dont_know
2068           || TREE_CODE (step) != INTEGER_CST
2069           || int_cst_value (step) != xstep)
2070         continue;
2071
2072       /* Use REPLACEMENTS hash table to cache already created
2073          temporaries.  */
2074       in.hash = htab_hash_pointer (use);
2075       in.base.from = use;
2076       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2077       if (h != NULL)
2078         {
2079           SET_USE (use_p, h->to);
2080           continue;
2081         }
2082
2083       /* USE which has the same step as X should be replaced
2084          with a temporary set to Y + YINIT - INIT.  */
2085       init = initial_condition_in_loop_num (scev, loop->num);
2086       gcc_assert (init != NULL && init != chrec_dont_know);
2087       if (TREE_TYPE (use) == TREE_TYPE (y))
2088         {
2089           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2090           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2091           if (val == y)
2092             {
2093               /* If X has the same type as USE, the same step
2094                  and same initial value, it can be replaced by Y.  */
2095               SET_USE (use_p, y);
2096               continue;
2097             }
2098         }
2099       else
2100         {
2101           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2102           val = fold_convert (TREE_TYPE (use), val);
2103           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2104         }
2105
2106       /* Create a temporary variable and insert it at the beginning
2107          of the loop body basic block, right after the PHI node
2108          which sets Y.  */
2109       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2110       add_referenced_var (var);
2111       val = force_gimple_operand_gsi (firstbsi, val, false, NULL,
2112                                       true, GSI_SAME_STMT);
2113       setstmt = gimple_build_assign (var, val);
2114       var = make_ssa_name (var, setstmt);
2115       gimple_assign_set_lhs (setstmt, var);
2116       gsi_insert_before (firstbsi, setstmt, GSI_SAME_STMT);
2117       update_stmt (setstmt);
2118       SET_USE (use_p, var);
2119       h = ggc_alloc_tree_map ();
2120       h->hash = in.hash;
2121       h->base.from = use;
2122       h->to = var;
2123       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2124       gcc_assert ((*(struct tree_map **)loc) == NULL);
2125       *(struct tree_map **) loc = h;
2126     }
2127 }
2128
2129 /* Return true if STMT is an exit PHI for LOOP */
2130
2131 static bool
2132 exit_phi_for_loop_p (struct loop *loop, gimple stmt)
2133 {
2134   if (gimple_code (stmt) != GIMPLE_PHI
2135       || gimple_phi_num_args (stmt) != 1
2136       || gimple_bb (stmt) != single_exit (loop)->dest)
2137     return false;
2138
2139   return true;
2140 }
2141
2142 /* Return true if STMT can be put back into the loop INNER, by
2143    copying it to the beginning of that loop and changing the uses.  */
2144
2145 static bool
2146 can_put_in_inner_loop (struct loop *inner, gimple stmt)
2147 {
2148   imm_use_iterator imm_iter;
2149   use_operand_p use_p;
2150
2151   gcc_assert (is_gimple_assign (stmt));
2152   if (gimple_vuse (stmt)
2153       || !stmt_invariant_in_loop_p (inner, stmt))
2154     return false;
2155
2156   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2157     {
2158       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2159         {
2160           basic_block immbb = gimple_bb (USE_STMT (use_p));
2161
2162           if (!flow_bb_inside_loop_p (inner, immbb))
2163             return false;
2164         }
2165     }
2166   return true;
2167 }
2168
2169 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2170
2171 static bool
2172 can_put_after_inner_loop (struct loop *loop, gimple stmt)
2173 {
2174   imm_use_iterator imm_iter;
2175   use_operand_p use_p;
2176
2177   if (gimple_vuse (stmt))
2178     return false;
2179
2180   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt))
2181     {
2182       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2183         {
2184           basic_block immbb = gimple_bb (USE_STMT (use_p));
2185
2186           if (!dominated_by_p (CDI_DOMINATORS,
2187                                immbb,
2188                                loop->inner->header)
2189               && !can_put_in_inner_loop (loop->inner, stmt))
2190             return false;
2191         }
2192     }
2193   return true;
2194 }
2195
2196 /* Return true when the induction variable IV is simple enough to be
2197    re-synthesized.  */
2198
2199 static bool
2200 can_duplicate_iv (tree iv, struct loop *loop)
2201 {
2202   tree scev = instantiate_parameters
2203     (loop, analyze_scalar_evolution (loop, iv));
2204
2205   if (!automatically_generated_chrec_p (scev))
2206     {
2207       tree step = evolution_part_in_loop_num (scev, loop->num);
2208
2209       if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST)
2210         return true;
2211     }
2212
2213   return false;
2214 }
2215
2216 /* If this is a scalar operation that can be put back into the inner
2217    loop, or after the inner loop, through copying, then do so. This
2218    works on the theory that any amount of scalar code we have to
2219    reduplicate into or after the loops is less expensive that the win
2220    we get from rearranging the memory walk the loop is doing so that
2221    it has better cache behavior.  */
2222
2223 static bool
2224 cannot_convert_modify_to_perfect_nest (gimple stmt, struct loop *loop)
2225 {
2226   use_operand_p use_a, use_b;
2227   imm_use_iterator imm_iter;
2228   ssa_op_iter op_iter, op_iter1;
2229   tree op0 = gimple_assign_lhs (stmt);
2230
2231   /* The statement should not define a variable used in the inner
2232      loop.  */
2233   if (TREE_CODE (op0) == SSA_NAME
2234       && !can_duplicate_iv (op0, loop))
2235     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2236       if (gimple_bb (USE_STMT (use_a))->loop_father == loop->inner)
2237         return true;
2238
2239   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2240     {
2241       gimple node;
2242       tree op = USE_FROM_PTR (use_a);
2243
2244       /* The variables should not be used in both loops.  */
2245       if (!can_duplicate_iv (op, loop))
2246         FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2247           if (gimple_bb (USE_STMT (use_b))->loop_father == loop->inner)
2248             return true;
2249
2250       /* The statement should not use the value of a scalar that was
2251          modified in the loop.  */
2252       node = SSA_NAME_DEF_STMT (op);
2253       if (gimple_code (node) == GIMPLE_PHI)
2254         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2255           {
2256             tree arg = USE_FROM_PTR (use_b);
2257
2258             if (TREE_CODE (arg) == SSA_NAME)
2259               {
2260                 gimple arg_stmt = SSA_NAME_DEF_STMT (arg);
2261
2262                 if (gimple_bb (arg_stmt)
2263                     && (gimple_bb (arg_stmt)->loop_father == loop->inner))
2264                   return true;
2265               }
2266           }
2267     }
2268
2269   return false;
2270 }
2271 /* Return true when BB contains statements that can harm the transform
2272    to a perfect loop nest.  */
2273
2274 static bool
2275 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop)
2276 {
2277   gimple_stmt_iterator bsi;
2278   gimple exit_condition = get_loop_exit_condition (loop);
2279
2280   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2281     {
2282       gimple stmt = gsi_stmt (bsi);
2283
2284       if (stmt == exit_condition
2285           || not_interesting_stmt (stmt)
2286           || stmt_is_bumper_for_loop (loop, stmt))
2287         continue;
2288
2289       if (is_gimple_assign (stmt))
2290         {
2291           if (cannot_convert_modify_to_perfect_nest (stmt, loop))
2292             return true;
2293
2294           if (can_duplicate_iv (gimple_assign_lhs (stmt), loop))
2295             continue;
2296
2297           if (can_put_in_inner_loop (loop->inner, stmt)
2298               || can_put_after_inner_loop (loop, stmt))
2299             continue;
2300         }
2301
2302       /* If the bb of a statement we care about isn't dominated by the
2303          header of the inner loop, then we can't handle this case
2304          right now.  This test ensures that the statement comes
2305          completely *after* the inner loop.  */
2306       if (!dominated_by_p (CDI_DOMINATORS,
2307                            gimple_bb (stmt),
2308                            loop->inner->header))
2309         return true;
2310     }
2311
2312   return false;
2313 }
2314
2315
2316 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2317    perfect one.  At the moment, we only handle imperfect nests of
2318    depth 2, where all of the statements occur after the inner loop.  */
2319
2320 static bool
2321 can_convert_to_perfect_nest (struct loop *loop)
2322 {
2323   basic_block *bbs;
2324   size_t i;
2325   gimple_stmt_iterator si;
2326
2327   /* Can't handle triply nested+ loops yet.  */
2328   if (!loop->inner || loop->inner->inner)
2329     return false;
2330
2331   bbs = get_loop_body (loop);
2332   for (i = 0; i < loop->num_nodes; i++)
2333     if (bbs[i]->loop_father == loop
2334         && cannot_convert_bb_to_perfect_nest (bbs[i], loop))
2335       goto fail;
2336
2337   /* We also need to make sure the loop exit only has simple copy phis in it,
2338      otherwise we don't know how to transform it into a perfect nest.  */
2339   for (si = gsi_start_phis (single_exit (loop)->dest);
2340        !gsi_end_p (si);
2341        gsi_next (&si))
2342     if (gimple_phi_num_args (gsi_stmt (si)) != 1)
2343       goto fail;
2344
2345   free (bbs);
2346   return true;
2347
2348  fail:
2349   free (bbs);
2350   return false;
2351 }
2352
2353
2354 DEF_VEC_I(source_location);
2355 DEF_VEC_ALLOC_I(source_location,heap);
2356
2357 /* Transform the loop nest into a perfect nest, if possible.
2358    LOOP is the loop nest to transform into a perfect nest
2359    LBOUNDS are the lower bounds for the loops to transform
2360    UBOUNDS are the upper bounds for the loops to transform
2361    STEPS is the STEPS for the loops to transform.
2362    LOOPIVS is the induction variables for the loops to transform.
2363
2364    Basically, for the case of
2365
2366    FOR (i = 0; i < 50; i++)
2367     {
2368      FOR (j =0; j < 50; j++)
2369      {
2370         <whatever>
2371      }
2372      <some code>
2373     }
2374
2375    This function will transform it into a perfect loop nest by splitting the
2376    outer loop into two loops, like so:
2377
2378    FOR (i = 0; i < 50; i++)
2379    {
2380      FOR (j = 0; j < 50; j++)
2381      {
2382          <whatever>
2383      }
2384    }
2385
2386    FOR (i = 0; i < 50; i ++)
2387    {
2388     <some code>
2389    }
2390
2391    Return FALSE if we can't make this loop into a perfect nest.  */
2392
2393 static bool
2394 perfect_nestify (struct loop *loop,
2395                  VEC(tree,heap) *lbounds,
2396                  VEC(tree,heap) *ubounds,
2397                  VEC(int,heap) *steps,
2398                  VEC(tree,heap) *loopivs)
2399 {
2400   basic_block *bbs;
2401   gimple exit_condition;
2402   gimple cond_stmt;
2403   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2404   int i;
2405   gimple_stmt_iterator bsi, firstbsi;
2406   bool insert_after;
2407   edge e;
2408   struct loop *newloop;
2409   gimple phi;
2410   tree uboundvar;
2411   gimple stmt;
2412   tree oldivvar, ivvar, ivvarinced;
2413   VEC(tree,heap) *phis = NULL;
2414   VEC(source_location,heap) *locations = NULL;
2415   htab_t replacements = NULL;
2416
2417   /* Create the new loop.  */
2418   olddest = single_exit (loop)->dest;
2419   preheaderbb = split_edge (single_exit (loop));
2420   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2421
2422   /* Push the exit phi nodes that we are moving.  */
2423   for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi))
2424     {
2425       phi = gsi_stmt (bsi);
2426       VEC_reserve (tree, heap, phis, 2);
2427       VEC_reserve (source_location, heap, locations, 1);
2428       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2429       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2430       VEC_quick_push (source_location, locations,
2431                       gimple_phi_arg_location (phi, 0));
2432     }
2433   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2434
2435   /* Remove the exit phis from the old basic block.  */
2436   for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); )
2437     remove_phi_node (&bsi, false);
2438
2439   /* and add them back to the new basic block.  */
2440   while (VEC_length (tree, phis) != 0)
2441     {
2442       tree def;
2443       tree phiname;
2444       source_location locus;
2445       def = VEC_pop (tree, phis);
2446       phiname = VEC_pop (tree, phis);
2447       locus = VEC_pop (source_location, locations);
2448       phi = create_phi_node (phiname, preheaderbb);
2449       add_phi_arg (phi, def, single_pred_edge (preheaderbb), locus);
2450     }
2451   flush_pending_stmts (e);
2452   VEC_free (tree, heap, phis);
2453
2454   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2455   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2456   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2457   cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node,
2458                                  NULL_TREE, NULL_TREE);
2459   bsi = gsi_start_bb (bodybb);
2460   gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT);
2461   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2462   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2463   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2464
2465   /* Update the loop structures.  */
2466   newloop = duplicate_loop (loop, olddest->loop_father);
2467   newloop->header = headerbb;
2468   newloop->latch = latchbb;
2469   add_bb_to_loop (latchbb, newloop);
2470   add_bb_to_loop (bodybb, newloop);
2471   add_bb_to_loop (headerbb, newloop);
2472   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2473   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2474   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2475                            single_exit (loop)->src);
2476   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2477   set_immediate_dominator (CDI_DOMINATORS, olddest,
2478                            recompute_dominator (CDI_DOMINATORS, olddest));
2479   /* Create the new iv.  */
2480   oldivvar = VEC_index (tree, loopivs, 0);
2481   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2482   add_referenced_var (ivvar);
2483   standard_iv_increment_position (newloop, &bsi, &insert_after);
2484   create_iv (VEC_index (tree, lbounds, 0),
2485              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2486              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2487
2488   /* Create the new upper bound.  This may be not just a variable, so we copy
2489      it to one just in case.  */
2490
2491   exit_condition = get_loop_exit_condition (newloop);
2492   uboundvar = create_tmp_var (TREE_TYPE (VEC_index (tree, ubounds, 0)),
2493                               "uboundvar");
2494   add_referenced_var (uboundvar);
2495   stmt = gimple_build_assign (uboundvar, VEC_index (tree, ubounds, 0));
2496   uboundvar = make_ssa_name (uboundvar, stmt);
2497   gimple_assign_set_lhs (stmt, uboundvar);
2498
2499   if (insert_after)
2500     gsi_insert_after (&bsi, stmt, GSI_SAME_STMT);
2501   else
2502     gsi_insert_before (&bsi, stmt, GSI_SAME_STMT);
2503   update_stmt (stmt);
2504   gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced);
2505   update_stmt (exit_condition);
2506   replacements = htab_create_ggc (20, tree_map_hash,
2507                                   tree_map_eq, NULL);
2508   bbs = get_loop_body_in_dom_order (loop);
2509   /* Now move the statements, and replace the induction variable in the moved
2510      statements with the correct loop induction variable.  */
2511   oldivvar = VEC_index (tree, loopivs, 0);
2512   firstbsi = gsi_start_bb (bodybb);
2513   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2514     {
2515       gimple_stmt_iterator tobsi = gsi_last_bb (bodybb);
2516       if (bbs[i]->loop_father == loop)
2517         {
2518           /* If this is true, we are *before* the inner loop.
2519              If this isn't true, we are *after* it.
2520
2521              The only time can_convert_to_perfect_nest returns true when we
2522              have statements before the inner loop is if they can be moved
2523              into the inner loop.
2524
2525              The only time can_convert_to_perfect_nest returns true when we
2526              have statements after the inner loop is if they can be moved into
2527              the new split loop.  */
2528
2529           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2530             {
2531               gimple_stmt_iterator header_bsi
2532                 = gsi_after_labels (loop->inner->header);
2533
2534               for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2535                 {
2536                   gimple stmt = gsi_stmt (bsi);
2537
2538                   if (stmt == exit_condition
2539                       || not_interesting_stmt (stmt)
2540                       || stmt_is_bumper_for_loop (loop, stmt))
2541                     {
2542                       gsi_next (&bsi);
2543                       continue;
2544                     }
2545
2546                   gsi_move_before (&bsi, &header_bsi);
2547                 }
2548             }
2549           else
2550             {
2551               /* Note that the bsi only needs to be explicitly incremented
2552                  when we don't move something, since it is automatically
2553                  incremented when we do.  */
2554               for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);)
2555                 {
2556                   gimple stmt = gsi_stmt (bsi);
2557
2558                   if (stmt == exit_condition
2559                       || not_interesting_stmt (stmt)
2560                       || stmt_is_bumper_for_loop (loop, stmt))
2561                     {
2562                       gsi_next (&bsi);
2563                       continue;
2564                     }
2565
2566                   replace_uses_equiv_to_x_with_y
2567                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2568                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2569
2570                   gsi_move_before (&bsi, &tobsi);
2571
2572                   /* If the statement has any virtual operands, they may
2573                      need to be rewired because the original loop may
2574                      still reference them.  */
2575                   if (gimple_vuse (stmt))
2576                     mark_sym_for_renaming (gimple_vop (cfun));
2577                 }
2578             }
2579
2580         }
2581     }
2582
2583   free (bbs);
2584   htab_delete (replacements);
2585   return perfect_nest_p (loop);
2586 }
2587
2588 /* Return true if TRANS is a legal transformation matrix that respects
2589    the dependence vectors in DISTS and DIRS.  The conservative answer
2590    is false.
2591
2592    "Wolfe proves that a unimodular transformation represented by the
2593    matrix T is legal when applied to a loop nest with a set of
2594    lexicographically non-negative distance vectors RDG if and only if
2595    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2596    i.e.: if and only if it transforms the lexicographically positive
2597    distance vectors to lexicographically positive vectors.  Note that
2598    a unimodular matrix must transform the zero vector (and only it) to
2599    the zero vector." S.Muchnick.  */
2600
2601 bool
2602 lambda_transform_legal_p (lambda_trans_matrix trans,
2603                           int nb_loops,
2604                           VEC (ddr_p, heap) *dependence_relations)
2605 {
2606   unsigned int i, j;
2607   lambda_vector distres;
2608   struct data_dependence_relation *ddr;
2609
2610   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2611               && LTM_ROWSIZE (trans) == nb_loops);
2612
2613   /* When there are no dependences, the transformation is correct.  */
2614   if (VEC_length (ddr_p, dependence_relations) == 0)
2615     return true;
2616
2617   ddr = VEC_index (ddr_p, dependence_relations, 0);
2618   if (ddr == NULL)
2619     return true;
2620
2621   /* When there is an unknown relation in the dependence_relations, we
2622      know that it is no worth looking at this loop nest: give up.  */
2623   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2624     return false;
2625
2626   distres = lambda_vector_new (nb_loops);
2627
2628   /* For each distance vector in the dependence graph.  */
2629   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
2630     {
2631       /* Don't care about relations for which we know that there is no
2632          dependence, nor about read-read (aka. output-dependences):
2633          these data accesses can happen in any order.  */
2634       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2635           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2636         continue;
2637
2638       /* Conservatively answer: "this transformation is not valid".  */
2639       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2640         return false;
2641
2642       /* If the dependence could not be captured by a distance vector,
2643          conservatively answer that the transform is not valid.  */
2644       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2645         return false;
2646
2647       /* Compute trans.dist_vect */
2648       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2649         {
2650           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2651                                      DDR_DIST_VECT (ddr, j), distres);
2652
2653           if (!lambda_vector_lexico_pos (distres, nb_loops))
2654             return false;
2655         }
2656     }
2657   return true;
2658 }
2659
2660
2661 /* Collects parameters from affine function ACCESS_FUNCTION, and push
2662    them in PARAMETERS.  */
2663
2664 static void
2665 lambda_collect_parameters_from_af (tree access_function,
2666                                    struct pointer_set_t *param_set,
2667                                    VEC (tree, heap) **parameters)
2668 {
2669   if (access_function == NULL)
2670     return;
2671
2672   if (TREE_CODE (access_function) == SSA_NAME
2673       && pointer_set_contains (param_set, access_function) == 0)
2674     {
2675       pointer_set_insert (param_set, access_function);
2676       VEC_safe_push (tree, heap, *parameters, access_function);
2677     }
2678   else
2679     {
2680       int i, num_operands = tree_operand_length (access_function);
2681
2682       for (i = 0; i < num_operands; i++)
2683         lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i),
2684                                            param_set, parameters);
2685     }
2686 }
2687
2688 /* Collects parameters from DATAREFS, and push them in PARAMETERS.  */
2689
2690 void
2691 lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs,
2692                            VEC (tree, heap) **parameters)
2693 {
2694   unsigned i, j;
2695   struct pointer_set_t *parameter_set = pointer_set_create ();
2696   data_reference_p data_reference;
2697
2698   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, data_reference)
2699     for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++)
2700       lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j),
2701                                          parameter_set, parameters);
2702   pointer_set_destroy (parameter_set);
2703 }
2704
2705 /* Translates BASE_EXPR to vector CY.  AM is needed for inferring
2706    indexing positions in the data access vector.  CST is the analyzed
2707    integer constant.  */
2708
2709 static bool
2710 av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am,
2711                 int cst)
2712 {
2713   bool result = true;
2714
2715   switch (TREE_CODE (base_expr))
2716     {
2717     case INTEGER_CST:
2718       /* Constant part.  */
2719       cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst;
2720       return true;
2721
2722     case SSA_NAME:
2723       {
2724         int param_index =
2725           access_matrix_get_index_for_parameter (base_expr, am);
2726
2727         if (param_index >= 0)
2728           {
2729             cy[param_index] = cst + cy[param_index];
2730             return true;
2731           }
2732
2733         return false;
2734       }
2735
2736     case PLUS_EXPR:
2737       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2738         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst);
2739
2740     case MINUS_EXPR:
2741       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst)
2742         && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst);
2743
2744     case MULT_EXPR:
2745       if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST)
2746         result = av_for_af_base (TREE_OPERAND (base_expr, 1),
2747                                  cy, am, cst *
2748                                  int_cst_value (TREE_OPERAND (base_expr, 0)));
2749       else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST)
2750         result = av_for_af_base (TREE_OPERAND (base_expr, 0),
2751                                  cy, am, cst *
2752                                  int_cst_value (TREE_OPERAND (base_expr, 1)));
2753       else
2754         result = false;
2755
2756       return result;
2757
2758     case NEGATE_EXPR:
2759       return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst);
2760
2761     default:
2762       return false;
2763     }
2764
2765   return result;
2766 }
2767
2768 /* Translates ACCESS_FUN to vector CY.  AM is needed for inferring
2769    indexing positions in the data access vector.  */
2770
2771 static bool
2772 av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am)
2773 {
2774   switch (TREE_CODE (access_fun))
2775     {
2776     case POLYNOMIAL_CHREC:
2777       {
2778         tree left = CHREC_LEFT (access_fun);
2779         tree right = CHREC_RIGHT (access_fun);
2780         unsigned var;
2781
2782         if (TREE_CODE (right) != INTEGER_CST)
2783           return false;
2784
2785         var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun));
2786         cy[var] = int_cst_value (right);
2787
2788         if (TREE_CODE (left) == POLYNOMIAL_CHREC)
2789           return av_for_af (left, cy, am);
2790         else
2791           return av_for_af_base (left, cy, am, 1);
2792       }
2793
2794     case INTEGER_CST:
2795       /* Constant part.  */
2796       return av_for_af_base (access_fun, cy, am, 1);
2797
2798     default:
2799       return false;
2800     }
2801 }
2802
2803 /* Initializes the access matrix for DATA_REFERENCE.  */
2804
2805 static bool
2806 build_access_matrix (data_reference_p data_reference,
2807                      VEC (tree, heap) *parameters,
2808                      VEC (loop_p, heap) *nest,
2809                      struct obstack * lambda_obstack)
2810 {
2811   struct access_matrix *am = (struct access_matrix *)
2812     obstack_alloc(lambda_obstack, sizeof (struct access_matrix));
2813   unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference);
2814   unsigned nivs = VEC_length (loop_p, nest);
2815   unsigned lambda_nb_columns;
2816
2817   AM_LOOP_NEST (am) = nest;
2818   AM_NB_INDUCTION_VARS (am) = nivs;
2819   AM_PARAMETERS (am) = parameters;
2820
2821   lambda_nb_columns = AM_NB_COLUMNS (am);
2822   AM_MATRIX (am) = VEC_alloc (lambda_vector, gc, ndim);
2823
2824   for (i = 0; i < ndim; i++)
2825     {
2826       lambda_vector access_vector = lambda_vector_new (lambda_nb_columns);
2827       tree access_function = DR_ACCESS_FN (data_reference, i);
2828
2829       if (!av_for_af (access_function, access_vector, am))
2830         return false;
2831
2832       VEC_quick_push (lambda_vector, AM_MATRIX (am), access_vector);
2833     }
2834
2835   DR_ACCESS_MATRIX (data_reference) = am;
2836   return true;
2837 }
2838
2839 /* Returns false when one of the access matrices cannot be built.  */
2840
2841 bool
2842 lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs,
2843                                 VEC (tree, heap) *parameters,
2844                                 VEC (loop_p, heap) *nest,
2845                                 struct obstack * lambda_obstack)
2846 {
2847   data_reference_p dataref;
2848   unsigned ix;
2849
2850   FOR_EACH_VEC_ELT (data_reference_p, datarefs, ix, dataref)
2851     if (!build_access_matrix (dataref, parameters, nest, lambda_obstack))
2852       return false;
2853
2854   return true;
2855 }