gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 3, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING3.  If not see
  19     <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "ggc.h"
  26 #include "tree.h"
  27 #include "target.h"
  28 #include "rtl.h"
  29 #include "basic-block.h"
  30 #include "diagnostic.h"
  31 #include "obstack.h"
  32 #include "tree-flow.h"
  33 #include "tree-dump.h"
  34 #include "timevar.h"
  35 #include "cfgloop.h"
  36 #include "expr.h"
  37 #include "optabs.h"
  38 #include "tree-chrec.h"
  39 #include "tree-data-ref.h"
  40 #include "tree-pass.h"
  41 #include "tree-scalar-evolution.h"
  42 #include "vec.h"
  43 #include "lambda.h"
  44 #include "vecprim.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118 static bool perfect_nestify (struct loop *, VEC(tree,heap) *,
 119                              VEC(tree,heap) *, VEC(int,heap) *,
 120                              VEC(tree,heap) *);
 121 /* Lattice stuff that is internal to the code generation algorithm.  */
 122
 123 typedef struct lambda_lattice_s
 124 {
 125   /* Lattice base matrix.  */
 126   lambda_matrix base;
 127   /* Lattice dimension.  */
 128   int dimension;
 129   /* Origin vector for the coefficients.  */
 130   lambda_vector origin;
 131   /* Origin matrix for the invariants.  */
 132   lambda_matrix origin_invariants;
 133   /* Number of invariants.  */
 134   int invariants;
 135 } *lambda_lattice;
 136
 137 #define LATTICE_BASE(T) ((T)->base)
 138 #define LATTICE_DIMENSION(T) ((T)->dimension)
 139 #define LATTICE_ORIGIN(T) ((T)->origin)
 140 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 141 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 142
 143 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 144                        int, int);
 145 static lambda_lattice lambda_lattice_new (int, int, struct obstack *);
 146 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
 147                                                    struct obstack *);
 148
 149 static tree find_induction_var_from_exit_cond (struct loop *);
 150 static bool can_convert_to_perfect_nest (struct loop *);
 151
 152 /* Create a new lambda body vector.  */
 153
 154 lambda_body_vector
 155 lambda_body_vector_new (int size, struct obstack * lambda_obstack)
 156 {
 157   lambda_body_vector ret;
 158
 159   ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret));
 160   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 161   LBV_SIZE (ret) = size;
 162   LBV_DENOMINATOR (ret) = 1;
 163   return ret;
 164 }
 165
 166 /* Compute the new coefficients for the vector based on the
 167   *inverse* of the transformation matrix.  */
 168
 169 lambda_body_vector
 170 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 171                                 lambda_body_vector vect,
 172                                 struct obstack * lambda_obstack)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth, lambda_obstack);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants,
 229                               struct obstack * lambda_obstack)
 230 {
 231   lambda_linear_expression ret;
 232
 233   ret = (lambda_linear_expression)obstack_alloc (lambda_obstack,
 234                                                  sizeof (*ret));
 235   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 236   LLE_CONSTANT (ret) = 0;
 237   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 238   LLE_DENOMINATOR (ret) = 1;
 239   LLE_NEXT (ret) = NULL;
 240
 241   return ret;
 242 }
 243
 244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 245    The starting letter used for variable names is START.  */
 246
 247 static void
 248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 249                          char start)
 250 {
 251   int i;
 252   bool first = true;
 253   for (i = 0; i < size; i++)
 254     {
 255       if (expr[i] != 0)
 256         {
 257           if (first)
 258             {
 259               if (expr[i] < 0)
 260                 fprintf (outfile, "-");
 261               first = false;
 262             }
 263           else if (expr[i] > 0)
 264             fprintf (outfile, " + ");
 265           else
 266             fprintf (outfile, " - ");
 267           if (abs (expr[i]) == 1)
 268             fprintf (outfile, "%c", start + i);
 269           else
 270             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 271         }
 272     }
 273 }
 274
 275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 276    depth/number of coefficients is given by DEPTH, the number of invariants is
 277    given by INVARIANTS, and the character to start variable names with is given
 278    by START.  */
 279
 280 void
 281 print_lambda_linear_expression (FILE * outfile,
 282                                 lambda_linear_expression expr,
 283                                 int depth, int invariants, char start)
 284 {
 285   fprintf (outfile, "\tLinear expression: ");
 286   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 287   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 288   fprintf (outfile, "  invariants: ");
 289   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 290                            invariants, 'A');
 291   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 292 }
 293
 294 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 295    coefficients is given by DEPTH, the number of invariants is
 296    given by INVARIANTS, and the character to start variable names with is given
 297    by START.  */
 298
 299 void
 300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 301                    int invariants, char start)
 302 {
 303   int step;
 304   lambda_linear_expression expr;
 305
 306   gcc_assert (loop);
 307
 308   expr = LL_LINEAR_OFFSET (loop);
 309   step = LL_STEP (loop);
 310   fprintf (outfile, "  step size = %d \n", step);
 311
 312   if (expr)
 313     {
 314       fprintf (outfile, "  linear offset: \n");
 315       print_lambda_linear_expression (outfile, expr, depth, invariants,
 316                                       start);
 317     }
 318
 319   fprintf (outfile, "  lower bound: \n");
 320   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 321     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 322   fprintf (outfile, "  upper bound: \n");
 323   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 324     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 325 }
 326
 327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 328    number of invariants.  */
 329
 330 lambda_loopnest
 331 lambda_loopnest_new (int depth, int invariants,
 332                      struct obstack * lambda_obstack)
 333 {
 334   lambda_loopnest ret;
 335   ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret));
 336
 337   LN_LOOPS (ret) = (lambda_loop *)
 338       obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret)));
 339   LN_DEPTH (ret) = depth;
 340   LN_INVARIANTS (ret) = invariants;
 341
 342   return ret;
 343 }
 344
 345 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 346    character to use for loop names is given by START.  */
 347
 348 void
 349 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 350 {
 351   int i;
 352   for (i = 0; i < LN_DEPTH (nest); i++)
 353     {
 354       fprintf (outfile, "Loop %c\n", start + i);
 355       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 356                          LN_INVARIANTS (nest), 'i');
 357       fprintf (outfile, "\n");
 358     }
 359 }
 360
 361 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 362    of invariants.  */
 363
 364 static lambda_lattice
 365 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack)
 366 {
 367   lambda_lattice ret
 368       = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret));
 369   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 370   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 371   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 372   LATTICE_DIMENSION (ret) = depth;
 373   LATTICE_INVARIANTS (ret) = invariants;
 374   return ret;
 375 }
 376
 377 /* Compute the lattice base for NEST.  The lattice base is essentially a
 378    non-singular transform from a dense base space to a sparse iteration space.
 379    We use it so that we don't have to specially handle the case of a sparse
 380    iteration space in other parts of the algorithm.  As a result, this routine
 381    only does something interesting (IE produce a matrix that isn't the
 382    identity matrix) if NEST is a sparse space.  */
 383
 384 static lambda_lattice
 385 lambda_lattice_compute_base (lambda_loopnest nest,
 386                              struct obstack * lambda_obstack)
 387 {
 388   lambda_lattice ret;
 389   int depth, invariants;
 390   lambda_matrix base;
 391
 392   int i, j, step;
 393   lambda_loop loop;
 394   lambda_linear_expression expression;
 395
 396   depth = LN_DEPTH (nest);
 397   invariants = LN_INVARIANTS (nest);
 398
 399   ret = lambda_lattice_new (depth, invariants, lambda_obstack);
 400   base = LATTICE_BASE (ret);
 401   for (i = 0; i < depth; i++)
 402     {
 403       loop = LN_LOOPS (nest)[i];
 404       gcc_assert (loop);
 405       step = LL_STEP (loop);
 406       /* If we have a step of 1, then the base is one, and the
 407          origin and invariant coefficients are 0.  */
 408       if (step == 1)
 409         {
 410           for (j = 0; j < depth; j++)
 411             base[i][j] = 0;
 412           base[i][i] = 1;
 413           LATTICE_ORIGIN (ret)[i] = 0;
 414           for (j = 0; j < invariants; j++)
 415             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 416         }
 417       else
 418         {
 419           /* Otherwise, we need the lower bound expression (which must
 420              be an affine function)  to determine the base.  */
 421           expression = LL_LOWER_BOUND (loop);
 422           gcc_assert (expression && !LLE_NEXT (expression)
 423                       && LLE_DENOMINATOR (expression) == 1);
 424
 425           /* The lower triangular portion of the base is going to be the
 426              coefficient times the step */
 427           for (j = 0; j < i; j++)
 428             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 429               * LL_STEP (LN_LOOPS (nest)[j]);
 430           base[i][i] = step;
 431           for (j = i + 1; j < depth; j++)
 432             base[i][j] = 0;
 433
 434           /* Origin for this loop is the constant of the lower bound
 435              expression.  */
 436           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 437
 438           /* Coefficient for the invariants are equal to the invariant
 439              coefficients in the expression.  */
 440           for (j = 0; j < invariants; j++)
 441             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 442               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 443         }
 444     }
 445   return ret;
 446 }
 447
 448 /* Compute the least common multiple of two numbers A and B .  */
 449
 450 int
 451 least_common_multiple (int a, int b)
 452 {
 453   return (abs (a) * abs (b) / gcd (a, b));
 454 }
 455
 456 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 457    auxiliary nest.
 458    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 459    it is easy to calculate the answer and bounds.
 460    A sketch of how it works:
 461    Given a system of linear inequalities, ai * xj >= bk, you can always
 462    rewrite the constraints so they are all of the form
 463    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 464    in b1 ... bk, and some a in a1...ai)
 465    You can then eliminate this x from the non-constant inequalities by
 466    rewriting these as a <= b, x >= constant, and delete the x variable.
 467    You can then repeat this for any remaining x variables, and then we have
 468    an easy to use variable <= constant (or no variables at all) form that we
 469    can construct our bounds from.
 470
 471    In our case, each time we eliminate, we construct part of the bound from
 472    the ith variable, then delete the ith variable.
 473
 474    Remember the constant are in our vector a, our coefficient matrix is A,
 475    and our invariant coefficient matrix is B.
 476
 477    SIZE is the size of the matrices being passed.
 478    DEPTH is the loop nest depth.
 479    INVARIANTS is the number of loop invariants.
 480    A, B, and a are the coefficient matrix, invariant coefficient, and a
 481    vector of constants, respectively.  */
 482
 483 static lambda_loopnest
 484 compute_nest_using_fourier_motzkin (int size,
 485                                     int depth,
 486                                     int invariants,
 487                                     lambda_matrix A,
 488                                     lambda_matrix B,
 489                                     lambda_vector a,
 490                                     struct obstack * lambda_obstack)
 491 {
 492
 493   int multiple, f1, f2;
 494   int i, j, k;
 495   lambda_linear_expression expression;
 496   lambda_loop loop;
 497   lambda_loopnest auxillary_nest;
 498   lambda_matrix swapmatrix, A1, B1;
 499   lambda_vector swapvector, a1;
 500   int newsize;
 501
 502   A1 = lambda_matrix_new (128, depth);
 503   B1 = lambda_matrix_new (128, invariants);
 504   a1 = lambda_vector_new (128);
 505
 506   auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 507
 508   for (i = depth - 1; i >= 0; i--)
 509     {
 510       loop = lambda_loop_new ();
 511       LN_LOOPS (auxillary_nest)[i] = loop;
 512       LL_STEP (loop) = 1;
 513
 514       for (j = 0; j < size; j++)
 515         {
 516           if (A[j][i] < 0)
 517             {
 518               /* Any linear expression in the matrix with a coefficient less
 519                  than 0 becomes part of the new lower bound.  */
 520               expression = lambda_linear_expression_new (depth, invariants,
 521                                                          lambda_obstack);
 522
 523               for (k = 0; k < i; k++)
 524                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 525
 526               for (k = 0; k < invariants; k++)
 527                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 528
 529               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 530               LLE_CONSTANT (expression) = -1 * a[j];
 531
 532               /* Ignore if identical to the existing lower bound.  */
 533               if (!lle_equal (LL_LOWER_BOUND (loop),
 534                               expression, depth, invariants))
 535                 {
 536                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 537                   LL_LOWER_BOUND (loop) = expression;
 538                 }
 539
 540             }
 541           else if (A[j][i] > 0)
 542             {
 543               /* Any linear expression with a coefficient greater than 0
 544                  becomes part of the new upper bound.  */
 545               expression = lambda_linear_expression_new (depth, invariants,
 546                                                          lambda_obstack);
 547               for (k = 0; k < i; k++)
 548                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 549
 550               for (k = 0; k < invariants; k++)
 551                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 552
 553               LLE_DENOMINATOR (expression) = A[j][i];
 554               LLE_CONSTANT (expression) = a[j];
 555
 556               /* Ignore if identical to the existing upper bound.  */
 557               if (!lle_equal (LL_UPPER_BOUND (loop),
 558                               expression, depth, invariants))
 559                 {
 560                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 561                   LL_UPPER_BOUND (loop) = expression;
 562                 }
 563
 564             }
 565         }
 566
 567       /* This portion creates a new system of linear inequalities by deleting
 568          the i'th variable, reducing the system by one variable.  */
 569       newsize = 0;
 570       for (j = 0; j < size; j++)
 571         {
 572           /* If the coefficient for the i'th variable is 0, then we can just
 573              eliminate the variable straightaway.  Otherwise, we have to
 574              multiply through by the coefficients we are eliminating.  */
 575           if (A[j][i] == 0)
 576             {
 577               lambda_vector_copy (A[j], A1[newsize], depth);
 578               lambda_vector_copy (B[j], B1[newsize], invariants);
 579               a1[newsize] = a[j];
 580               newsize++;
 581             }
 582           else if (A[j][i] > 0)
 583             {
 584               for (k = 0; k < size; k++)
 585                 {
 586                   if (A[k][i] < 0)
 587                     {
 588                       multiple = least_common_multiple (A[j][i], A[k][i]);
 589                       f1 = multiple / A[j][i];
 590                       f2 = -1 * multiple / A[k][i];
 591
 592                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 593                                             A1[newsize], depth);
 594                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 595                                             B1[newsize], invariants);
 596                       a1[newsize] = f1 * a[j] + f2 * a[k];
 597                       newsize++;
 598                     }
 599                 }
 600             }
 601         }
 602
 603       swapmatrix = A;
 604       A = A1;
 605       A1 = swapmatrix;
 606
 607       swapmatrix = B;
 608       B = B1;
 609       B1 = swapmatrix;
 610
 611       swapvector = a;
 612       a = a1;
 613       a1 = swapvector;
 614
 615       size = newsize;
 616     }
 617
 618   return auxillary_nest;
 619 }
 620
 621 /* Compute the loop bounds for the auxiliary space NEST.
 622    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 623    Given the original nest, this function will
 624    1. Convert the nest into matrix form, which consists of a matrix for the
 625    coefficients, a matrix for the
 626    invariant coefficients, and a vector for the constants.
 627    2. Use the matrix form to calculate the lattice base for the nest (which is
 628    a dense space)
 629    3. Compose the dense space transform with the user specified transform, to
 630    get a transform we can easily calculate transformed bounds for.
 631    4. Multiply the composed transformation matrix times the matrix form of the
 632    loop.
 633    5. Transform the newly created matrix (from step 4) back into a loop nest
 634    using Fourier-Motzkin elimination to figure out the bounds.  */
 635
 636 static lambda_loopnest
 637 lambda_compute_auxillary_space (lambda_loopnest nest,
 638                                 lambda_trans_matrix trans,
 639                                 struct obstack * lambda_obstack)
 640 {
 641   lambda_matrix A, B, A1, B1;
 642   lambda_vector a, a1;
 643   lambda_matrix invertedtrans;
 644   int depth, invariants, size;
 645   int i, j;
 646   lambda_loop loop;
 647   lambda_linear_expression expression;
 648   lambda_lattice lattice;
 649
 650   depth = LN_DEPTH (nest);
 651   invariants = LN_INVARIANTS (nest);
 652
 653   /* Unfortunately, we can't know the number of constraints we'll have
 654      ahead of time, but this should be enough even in ridiculous loop nest
 655      cases. We must not go over this limit.  */
 656   A = lambda_matrix_new (128, depth);
 657   B = lambda_matrix_new (128, invariants);
 658   a = lambda_vector_new (128);
 659
 660   A1 = lambda_matrix_new (128, depth);
 661   B1 = lambda_matrix_new (128, invariants);
 662   a1 = lambda_vector_new (128);
 663
 664   /* Store the bounds in the equation matrix A, constant vector a, and
 665      invariant matrix B, so that we have Ax <= a + B.
 666      This requires a little equation rearranging so that everything is on the
 667      correct side of the inequality.  */
 668   size = 0;
 669   for (i = 0; i < depth; i++)
 670     {
 671       loop = LN_LOOPS (nest)[i];
 672
 673       /* First we do the lower bound.  */
 674       if (LL_STEP (loop) > 0)
 675         expression = LL_LOWER_BOUND (loop);
 676       else
 677         expression = LL_UPPER_BOUND (loop);
 678
 679       for (; expression != NULL; expression = LLE_NEXT (expression))
 680         {
 681           /* Fill in the coefficient.  */
 682           for (j = 0; j < i; j++)
 683             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 684
 685           /* And the invariant coefficient.  */
 686           for (j = 0; j < invariants; j++)
 687             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 688
 689           /* And the constant.  */
 690           a[size] = LLE_CONSTANT (expression);
 691
 692           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 693              constants and single variables on   */
 694           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 695           a[size] *= -1;
 696           for (j = 0; j < invariants; j++)
 697             B[size][j] *= -1;
 698
 699           size++;
 700           /* Need to increase matrix sizes above.  */
 701           gcc_assert (size <= 127);
 702
 703         }
 704
 705       /* Then do the exact same thing for the upper bounds.  */
 706       if (LL_STEP (loop) > 0)
 707         expression = LL_UPPER_BOUND (loop);
 708       else
 709         expression = LL_LOWER_BOUND (loop);
 710
 711       for (; expression != NULL; expression = LLE_NEXT (expression))
 712         {
 713           /* Fill in the coefficient.  */
 714           for (j = 0; j < i; j++)
 715             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 716
 717           /* And the invariant coefficient.  */
 718           for (j = 0; j < invariants; j++)
 719             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 720
 721           /* And the constant.  */
 722           a[size] = LLE_CONSTANT (expression);
 723
 724           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 725           for (j = 0; j < i; j++)
 726             A[size][j] *= -1;
 727           A[size][i] = LLE_DENOMINATOR (expression);
 728           size++;
 729           /* Need to increase matrix sizes above.  */
 730           gcc_assert (size <= 127);
 731
 732         }
 733     }
 734
 735   /* Compute the lattice base x = base * y + origin, where y is the
 736      base space.  */
 737   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
 738
 739   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 740
 741   /* A1 = A * L */
 742   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 743
 744   /* a1 = a - A * origin constant.  */
 745   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 746   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 747
 748   /* B1 = B - A * origin invariant.  */
 749   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 750                       invariants);
 751   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 752
 753   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 754      it by A1, then performing Fourier-Motzkin.  */
 755
 756   invertedtrans = lambda_matrix_new (depth, depth);
 757
 758   /* Compute the inverse of U.  */
 759   lambda_matrix_inverse (LTM_MATRIX (trans),
 760                          invertedtrans, depth);
 761
 762   /* A = A1 inv(U).  */
 763   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 764
 765   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 766                                              A, B1, a1, lambda_obstack);
 767 }
 768
 769 /* Compute the loop bounds for the target space, using the bounds of
 770    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 771    The target space loop bounds are computed by multiplying the triangular
 772    matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
 773    the loop steps (positive or negative) is then used to swap the bounds if
 774    the loop counts downwards.
 775    Return the target loopnest.  */
 776
 777 static lambda_loopnest
 778 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 779                              lambda_trans_matrix H, lambda_vector stepsigns,
 780                              struct obstack * lambda_obstack)
 781 {
 782   lambda_matrix inverse, H1;
 783   int determinant, i, j;
 784   int gcd1, gcd2;
 785   int factor;
 786
 787   lambda_loopnest target_nest;
 788   int depth, invariants;
 789   lambda_matrix target;
 790
 791   lambda_loop auxillary_loop, target_loop;
 792   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 793
 794   depth = LN_DEPTH (auxillary_nest);
 795   invariants = LN_INVARIANTS (auxillary_nest);
 796
 797   inverse = lambda_matrix_new (depth, depth);
 798   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 799
 800   /* H1 is H excluding its diagonal.  */
 801   H1 = lambda_matrix_new (depth, depth);
 802   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 803
 804   for (i = 0; i < depth; i++)
 805     H1[i][i] = 0;
 806
 807   /* Computes the linear offsets of the loop bounds.  */
 808   target = lambda_matrix_new (depth, depth);
 809   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 810
 811   target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack);
 812
 813   for (i = 0; i < depth; i++)
 814     {
 815
 816       /* Get a new loop structure.  */
 817       target_loop = lambda_loop_new ();
 818       LN_LOOPS (target_nest)[i] = target_loop;
 819
 820       /* Computes the gcd of the coefficients of the linear part.  */
 821       gcd1 = lambda_vector_gcd (target[i], i);
 822
 823       /* Include the denominator in the GCD.  */
 824       gcd1 = gcd (gcd1, determinant);
 825
 826       /* Now divide through by the gcd.  */
 827       for (j = 0; j < i; j++)
 828         target[i][j] = target[i][j] / gcd1;
 829
 830       expression = lambda_linear_expression_new (depth, invariants,
 831                                                  lambda_obstack);
 832       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 833       LLE_DENOMINATOR (expression) = determinant / gcd1;
 834       LLE_CONSTANT (expression) = 0;
 835       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 836                            invariants);
 837       LL_LINEAR_OFFSET (target_loop) = expression;
 838     }
 839
 840   /* For each loop, compute the new bounds from H.  */
 841   for (i = 0; i < depth; i++)
 842     {
 843       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 844       target_loop = LN_LOOPS (target_nest)[i];
 845       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 846       factor = LTM_MATRIX (H)[i][i];
 847
 848       /* First we do the lower bound.  */
 849       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 850
 851       for (; auxillary_expr != NULL;
 852            auxillary_expr = LLE_NEXT (auxillary_expr))
 853         {
 854           target_expr = lambda_linear_expression_new (depth, invariants,
 855                                                       lambda_obstack);
 856           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 857                                      depth, inverse, depth,
 858                                      LLE_COEFFICIENTS (target_expr));
 859           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 860                                     LLE_COEFFICIENTS (target_expr), depth,
 861                                     factor);
 862
 863           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 864           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 865                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 866                               invariants);
 867           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 868                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 869                                     invariants, factor);
 870           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 871
 872           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 873             {
 874               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 875                 * determinant;
 876               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 877                                         (target_expr),
 878                                         LLE_INVARIANT_COEFFICIENTS
 879                                         (target_expr), invariants,
 880                                         determinant);
 881               LLE_DENOMINATOR (target_expr) =
 882                 LLE_DENOMINATOR (target_expr) * determinant;
 883             }
 884           /* Find the gcd and divide by it here, rather than doing it
 885              at the tree level.  */
 886           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 887           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 888                                     invariants);
 889           gcd1 = gcd (gcd1, gcd2);
 890           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 891           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 892           for (j = 0; j < depth; j++)
 893             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 894           for (j = 0; j < invariants; j++)
 895             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 896           LLE_CONSTANT (target_expr) /= gcd1;
 897           LLE_DENOMINATOR (target_expr) /= gcd1;
 898           /* Ignore if identical to existing bound.  */
 899           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 900                           invariants))
 901             {
 902               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 903               LL_LOWER_BOUND (target_loop) = target_expr;
 904             }
 905         }
 906       /* Now do the upper bound.  */
 907       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 908
 909       for (; auxillary_expr != NULL;
 910            auxillary_expr = LLE_NEXT (auxillary_expr))
 911         {
 912           target_expr = lambda_linear_expression_new (depth, invariants,
 913                                                       lambda_obstack);
 914           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 915                                      depth, inverse, depth,
 916                                      LLE_COEFFICIENTS (target_expr));
 917           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 918                                     LLE_COEFFICIENTS (target_expr), depth,
 919                                     factor);
 920           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 921           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 922                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 923                               invariants);
 924           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 925                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 926                                     invariants, factor);
 927           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 928
 929           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 930             {
 931               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 932                 * determinant;
 933               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 934                                         (target_expr),
 935                                         LLE_INVARIANT_COEFFICIENTS
 936                                         (target_expr), invariants,
 937                                         determinant);
 938               LLE_DENOMINATOR (target_expr) =
 939                 LLE_DENOMINATOR (target_expr) * determinant;
 940             }
 941           /* Find the gcd and divide by it here, instead of at the
 942              tree level.  */
 943           gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
 944           gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
 945                                     invariants);
 946           gcd1 = gcd (gcd1, gcd2);
 947           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 948           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 949           for (j = 0; j < depth; j++)
 950             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 951           for (j = 0; j < invariants; j++)
 952             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 953           LLE_CONSTANT (target_expr) /= gcd1;
 954           LLE_DENOMINATOR (target_expr) /= gcd1;
 955           /* Ignore if equal to existing bound.  */
 956           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 957                           invariants))
 958             {
 959               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 960               LL_UPPER_BOUND (target_loop) = target_expr;
 961             }
 962         }
 963     }
 964   for (i = 0; i < depth; i++)
 965     {
 966       target_loop = LN_LOOPS (target_nest)[i];
 967       /* If necessary, exchange the upper and lower bounds and negate
 968          the step size.  */
 969       if (stepsigns[i] < 0)
 970         {
 971           LL_STEP (target_loop) *= -1;
 972           tmp_expr = LL_LOWER_BOUND (target_loop);
 973           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
 974           LL_UPPER_BOUND (target_loop) = tmp_expr;
 975         }
 976     }
 977   return target_nest;
 978 }
 979
 980 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
 981    result.  */
 982
 983 static lambda_vector
 984 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
 985 {
 986   lambda_matrix matrix, H;
 987   int size;
 988   lambda_vector newsteps;
 989   int i, j, factor, minimum_column;
 990   int temp;
 991
 992   matrix = LTM_MATRIX (trans);
 993   size = LTM_ROWSIZE (trans);
 994   H = lambda_matrix_new (size, size);
 995
 996   newsteps = lambda_vector_new (size);
 997   lambda_vector_copy (stepsigns, newsteps, size);
 998
 999   lambda_matrix_copy (matrix, H, size, size);
1000
1001   for (j = 0; j < size; j++)
1002     {
1003       lambda_vector row;
1004       row = H[j];
1005       for (i = j; i < size; i++)
1006         if (row[i] < 0)
1007           lambda_matrix_col_negate (H, size, i);
1008       while (lambda_vector_first_nz (row, size, j + 1) < size)
1009         {
1010           minimum_column = lambda_vector_min_nz (row, size, j);
1011           lambda_matrix_col_exchange (H, size, j, minimum_column);
1012
1013           temp = newsteps[j];
1014           newsteps[j] = newsteps[minimum_column];
1015           newsteps[minimum_column] = temp;
1016
1017           for (i = j + 1; i < size; i++)
1018             {
1019               factor = row[i] / row[j];
1020               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1021             }
1022         }
1023     }
1024   return newsteps;
1025 }
1026
1027 /* Transform NEST according to TRANS, and return the new loopnest.
1028    This involves
1029    1. Computing a lattice base for the transformation
1030    2. Composing the dense base with the specified transformation (TRANS)
1031    3. Decomposing the combined transformation into a lower triangular portion,
1032    and a unimodular portion.
1033    4. Computing the auxiliary nest using the unimodular portion.
1034    5. Computing the target nest using the auxiliary nest and the lower
1035    triangular portion.  */
1036
1037 lambda_loopnest
1038 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans,
1039                            struct obstack * lambda_obstack)
1040 {
1041   lambda_loopnest auxillary_nest, target_nest;
1042
1043   int depth, invariants;
1044   int i, j;
1045   lambda_lattice lattice;
1046   lambda_trans_matrix trans1, H, U;
1047   lambda_loop loop;
1048   lambda_linear_expression expression;
1049   lambda_vector origin;
1050   lambda_matrix origin_invariants;
1051   lambda_vector stepsigns;
1052   int f;
1053
1054   depth = LN_DEPTH (nest);
1055   invariants = LN_INVARIANTS (nest);
1056
1057   /* Keep track of the signs of the loop steps.  */
1058   stepsigns = lambda_vector_new (depth);
1059   for (i = 0; i < depth; i++)
1060     {
1061       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1062         stepsigns[i] = 1;
1063       else
1064         stepsigns[i] = -1;
1065     }
1066
1067   /* Compute the lattice base.  */
1068   lattice = lambda_lattice_compute_base (nest, lambda_obstack);
1069   trans1 = lambda_trans_matrix_new (depth, depth);
1070
1071   /* Multiply the transformation matrix by the lattice base.  */
1072
1073   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1074                       LTM_MATRIX (trans1), depth, depth, depth);
1075
1076   /* Compute the Hermite normal form for the new transformation matrix.  */
1077   H = lambda_trans_matrix_new (depth, depth);
1078   U = lambda_trans_matrix_new (depth, depth);
1079   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1080                          LTM_MATRIX (U));
1081
1082   /* Compute the auxiliary loop nest's space from the unimodular
1083      portion.  */
1084   auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack);
1085
1086   /* Compute the loop step signs from the old step signs and the
1087      transformation matrix.  */
1088   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1089
1090   /* Compute the target loop nest space from the auxiliary nest and
1091      the lower triangular matrix H.  */
1092   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns,
1093                                              lambda_obstack);
1094   origin = lambda_vector_new (depth);
1095   origin_invariants = lambda_matrix_new (depth, invariants);
1096   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1097                              LATTICE_ORIGIN (lattice), origin);
1098   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1099                       origin_invariants, depth, depth, invariants);
1100
1101   for (i = 0; i < depth; i++)
1102     {
1103       loop = LN_LOOPS (target_nest)[i];
1104       expression = LL_LINEAR_OFFSET (loop);
1105       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1106         f = 1;
1107       else
1108         f = LLE_DENOMINATOR (expression);
1109
1110       LLE_CONSTANT (expression) += f * origin[i];
1111
1112       for (j = 0; j < invariants; j++)
1113         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1114           f * origin_invariants[i][j];
1115     }
1116
1117   return target_nest;
1118
1119 }
1120
1121 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1122    return the new expression.  DEPTH is the depth of the loopnest.
1123    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1124    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1125    is the amount we have to add/subtract from the expression because of the
1126    type of comparison it is used in.  */
1127
1128 static lambda_linear_expression
1129 gcc_tree_to_linear_expression (int depth, tree expr,
1130                                VEC(tree,heap) *outerinductionvars,
1131                                VEC(tree,heap) *invariants, int extra,
1132                                struct obstack * lambda_obstack)
1133 {
1134   lambda_linear_expression lle = NULL;
1135   switch (TREE_CODE (expr))
1136     {
1137     case INTEGER_CST:
1138       {
1139         lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack);
1140         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1141         if (extra != 0)
1142           LLE_CONSTANT (lle) += extra;
1143
1144         LLE_DENOMINATOR (lle) = 1;
1145       }
1146       break;
1147     case SSA_NAME:
1148       {
1149         tree iv, invar;
1150         size_t i;
1151         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1152           if (iv != NULL)
1153             {
1154               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1155                 {
1156                   lle = lambda_linear_expression_new (depth, 2 * depth,
1157                                                       lambda_obstack);
1158                   LLE_COEFFICIENTS (lle)[i] = 1;
1159                   if (extra != 0)
1160                     LLE_CONSTANT (lle) = extra;
1161
1162                   LLE_DENOMINATOR (lle) = 1;
1163                 }
1164             }
1165         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1166           if (invar != NULL)
1167             {
1168               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1169                 {
1170                   lle = lambda_linear_expression_new (depth, 2 * depth,
1171                                                       lambda_obstack);
1172                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1173                   if (extra != 0)
1174                     LLE_CONSTANT (lle) = extra;
1175                   LLE_DENOMINATOR (lle) = 1;
1176                 }
1177             }
1178       }
1179       break;
1180     default:
1181       return NULL;
1182     }
1183
1184   return lle;
1185 }
1186
1187 /* Return the depth of the loopnest NEST */
1188
1189 static int
1190 depth_of_nest (struct loop *nest)
1191 {
1192   size_t depth = 0;
1193   while (nest)
1194     {
1195       depth++;
1196       nest = nest->inner;
1197     }
1198   return depth;
1199 }
1200
1201
1202 /* Return true if OP is invariant in LOOP and all outer loops.  */
1203
1204 static bool
1205 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1206 {
1207   if (is_gimple_min_invariant (op))
1208     return true;
1209   if (loop_depth (loop) == 0)
1210     return true;
1211   if (!expr_invariant_in_loop_p (loop, op))
1212     return false;
1213   if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op))
1214     return false;
1215   return true;
1216 }
1217
1218 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1219    or NULL if it could not be converted.
1220    DEPTH is the depth of the loop.
1221    INVARIANTS is a pointer to the array of loop invariants.
1222    The induction variable for this loop should be stored in the parameter
1223    OURINDUCTIONVAR.
1224    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1225
1226 static lambda_loop
1227 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1228                          VEC(tree,heap) ** invariants,
1229                          tree * ourinductionvar,
1230                          VEC(tree,heap) * outerinductionvars,
1231                          VEC(tree,heap) ** lboundvars,
1232                          VEC(tree,heap) ** uboundvars,
1233                          VEC(int,heap) ** steps,
1234                          struct obstack * lambda_obstack)
1235 {
1236   tree phi;
1237   tree exit_cond;
1238   tree access_fn, inductionvar;
1239   tree step;
1240   lambda_loop lloop = NULL;
1241   lambda_linear_expression lbound, ubound;
1242   tree test;
1243   int stepint;
1244   int extra = 0;
1245   tree lboundvar, uboundvar, uboundresult;
1246
1247   /* Find out induction var and exit condition.  */
1248   inductionvar = find_induction_var_from_exit_cond (loop);
1249   exit_cond = get_loop_exit_condition (loop);
1250
1251   if (inductionvar == NULL || exit_cond == NULL)
1252     {
1253       if (dump_file && (dump_flags & TDF_DETAILS))
1254         fprintf (dump_file,
1255                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1256       return NULL;
1257     }
1258
1259   test = TREE_OPERAND (exit_cond, 0);
1260
1261   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1262     {
1263
1264       if (dump_file && (dump_flags & TDF_DETAILS))
1265         fprintf (dump_file,
1266                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1267
1268       return NULL;
1269     }
1270
1271   phi = SSA_NAME_DEF_STMT (inductionvar);
1272   if (TREE_CODE (phi) != PHI_NODE)
1273     {
1274       phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1275       if (!phi)
1276         {
1277
1278           if (dump_file && (dump_flags & TDF_DETAILS))
1279             fprintf (dump_file,
1280                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1281
1282           return NULL;
1283         }
1284
1285       phi = SSA_NAME_DEF_STMT (phi);
1286       if (TREE_CODE (phi) != PHI_NODE)
1287         {
1288
1289           if (dump_file && (dump_flags & TDF_DETAILS))
1290             fprintf (dump_file,
1291                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1292           return NULL;
1293         }
1294
1295     }
1296
1297   /* The induction variable name/version we want to put in the array is the
1298      result of the induction variable phi node.  */
1299   *ourinductionvar = PHI_RESULT (phi);
1300   access_fn = instantiate_parameters
1301     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1302   if (access_fn == chrec_dont_know)
1303     {
1304       if (dump_file && (dump_flags & TDF_DETAILS))
1305         fprintf (dump_file,
1306                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1307
1308       return NULL;
1309     }
1310
1311   step = evolution_part_in_loop_num (access_fn, loop->num);
1312   if (!step || step == chrec_dont_know)
1313     {
1314       if (dump_file && (dump_flags & TDF_DETAILS))
1315         fprintf (dump_file,
1316                  "Unable to convert loop: Cannot determine step of loop.\n");
1317
1318       return NULL;
1319     }
1320   if (TREE_CODE (step) != INTEGER_CST)
1321     {
1322
1323       if (dump_file && (dump_flags & TDF_DETAILS))
1324         fprintf (dump_file,
1325                  "Unable to convert loop: Step of loop is not integer.\n");
1326       return NULL;
1327     }
1328
1329   stepint = TREE_INT_CST_LOW (step);
1330
1331   /* Only want phis for induction vars, which will have two
1332      arguments.  */
1333   if (PHI_NUM_ARGS (phi) != 2)
1334     {
1335       if (dump_file && (dump_flags & TDF_DETAILS))
1336         fprintf (dump_file,
1337                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1338       return NULL;
1339     }
1340
1341   /* Another induction variable check. One argument's source should be
1342      in the loop, one outside the loop.  */
1343   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1344       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1345     {
1346
1347       if (dump_file && (dump_flags & TDF_DETAILS))
1348         fprintf (dump_file,
1349                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1350
1351       return NULL;
1352     }
1353
1354   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1355     {
1356       lboundvar = PHI_ARG_DEF (phi, 1);
1357       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1358                                               outerinductionvars, *invariants,
1359                                               0, lambda_obstack);
1360     }
1361   else
1362     {
1363       lboundvar = PHI_ARG_DEF (phi, 0);
1364       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1365                                               outerinductionvars, *invariants,
1366                                               0, lambda_obstack);
1367     }
1368
1369   if (!lbound)
1370     {
1371
1372       if (dump_file && (dump_flags & TDF_DETAILS))
1373         fprintf (dump_file,
1374                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1375
1376       return NULL;
1377     }
1378   /* One part of the test may be a loop invariant tree.  */
1379   VEC_reserve (tree, heap, *invariants, 1);
1380   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1381       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1382     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1383   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1384            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1385     VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1386
1387   /* The non-induction variable part of the test is the upper bound variable.
1388    */
1389   if (TREE_OPERAND (test, 0) == inductionvar)
1390     uboundvar = TREE_OPERAND (test, 1);
1391   else
1392     uboundvar = TREE_OPERAND (test, 0);
1393
1394
1395   /* We only size the vectors assuming we have, at max, 2 times as many
1396      invariants as we do loops (one for each bound).
1397      This is just an arbitrary number, but it has to be matched against the
1398      code below.  */
1399   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1400
1401
1402   /* We might have some leftover.  */
1403   if (TREE_CODE (test) == LT_EXPR)
1404     extra = -1 * stepint;
1405   else if (TREE_CODE (test) == NE_EXPR)
1406     extra = -1 * stepint;
1407   else if (TREE_CODE (test) == GT_EXPR)
1408     extra = -1 * stepint;
1409   else if (TREE_CODE (test) == EQ_EXPR)
1410     extra = 1 * stepint;
1411
1412   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1413                                           outerinductionvars,
1414                                           *invariants, extra, lambda_obstack);
1415   uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1416                          build_int_cst (TREE_TYPE (uboundvar), extra));
1417   VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1418   VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1419   VEC_safe_push (int, heap, *steps, stepint);
1420   if (!ubound)
1421     {
1422       if (dump_file && (dump_flags & TDF_DETAILS))
1423         fprintf (dump_file,
1424                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1425       return NULL;
1426     }
1427
1428   lloop = lambda_loop_new ();
1429   LL_STEP (lloop) = stepint;
1430   LL_LOWER_BOUND (lloop) = lbound;
1431   LL_UPPER_BOUND (lloop) = ubound;
1432   return lloop;
1433 }
1434
1435 /* Given a LOOP, find the induction variable it is testing against in the exit
1436    condition.  Return the induction variable if found, NULL otherwise.  */
1437
1438 static tree
1439 find_induction_var_from_exit_cond (struct loop *loop)
1440 {
1441   tree expr = get_loop_exit_condition (loop);
1442   tree ivarop;
1443   tree test;
1444   if (expr == NULL_TREE)
1445     return NULL_TREE;
1446   if (TREE_CODE (expr) != COND_EXPR)
1447     return NULL_TREE;
1448   test = TREE_OPERAND (expr, 0);
1449   if (!COMPARISON_CLASS_P (test))
1450     return NULL_TREE;
1451
1452   /* Find the side that is invariant in this loop. The ivar must be the other
1453      side.  */
1454
1455   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1456       ivarop = TREE_OPERAND (test, 1);
1457   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1458       ivarop = TREE_OPERAND (test, 0);
1459   else
1460     return NULL_TREE;
1461
1462   if (TREE_CODE (ivarop) != SSA_NAME)
1463     return NULL_TREE;
1464   return ivarop;
1465 }
1466
1467 DEF_VEC_P(lambda_loop);
1468 DEF_VEC_ALLOC_P(lambda_loop,heap);
1469
1470 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1471    Return the new loop nest.
1472    INDUCTIONVARS is a pointer to an array of induction variables for the
1473    loopnest that will be filled in during this process.
1474    INVARIANTS is a pointer to an array of invariants that will be filled in
1475    during this process.  */
1476
1477 lambda_loopnest
1478 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest,
1479                                  VEC(tree,heap) **inductionvars,
1480                                  VEC(tree,heap) **invariants,
1481                                  struct obstack * lambda_obstack)
1482 {
1483   lambda_loopnest ret = NULL;
1484   struct loop *temp = loop_nest;
1485   int depth = depth_of_nest (loop_nest);
1486   size_t i;
1487   VEC(lambda_loop,heap) *loops = NULL;
1488   VEC(tree,heap) *uboundvars = NULL;
1489   VEC(tree,heap) *lboundvars  = NULL;
1490   VEC(int,heap) *steps = NULL;
1491   lambda_loop newloop;
1492   tree inductionvar = NULL;
1493   bool perfect_nest = perfect_nest_p (loop_nest);
1494
1495   if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1496     goto fail;
1497
1498   while (temp)
1499     {
1500       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1501                                          &inductionvar, *inductionvars,
1502                                          &lboundvars, &uboundvars,
1503                                          &steps, lambda_obstack);
1504       if (!newloop)
1505         goto fail;
1506
1507       VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1508       VEC_safe_push (lambda_loop, heap, loops, newloop);
1509       temp = temp->inner;
1510     }
1511
1512   if (!perfect_nest)
1513     {
1514       if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps,
1515                             *inductionvars))
1516         {
1517           if (dump_file)
1518             fprintf (dump_file,
1519                      "Not a perfect loop nest and couldn't convert to one.\n");
1520           goto fail;
1521         }
1522       else if (dump_file)
1523         fprintf (dump_file,
1524                  "Successfully converted loop nest to perfect loop nest.\n");
1525     }
1526
1527   ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack);
1528
1529   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1530     LN_LOOPS (ret)[i] = newloop;
1531
1532  fail:
1533   VEC_free (lambda_loop, heap, loops);
1534   VEC_free (tree, heap, uboundvars);
1535   VEC_free (tree, heap, lboundvars);
1536   VEC_free (int, heap, steps);
1537
1538   return ret;
1539 }
1540
1541 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1542    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1543    inserted for us are stored.  INDUCTION_VARS is the array of induction
1544    variables for the loop this LBV is from.  TYPE is the tree type to use for
1545    the variables and trees involved.  */
1546
1547 static tree
1548 lbv_to_gcc_expression (lambda_body_vector lbv,
1549                        tree type, VEC(tree,heap) *induction_vars,
1550                        tree *stmts_to_insert)
1551 {
1552   int k;
1553   tree resvar;
1554   tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars);
1555
1556   k = LBV_DENOMINATOR (lbv);
1557   gcc_assert (k != 0);
1558   if (k != 1)
1559     expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k));
1560
1561   resvar = create_tmp_var (type, "lbvtmp");
1562   add_referenced_var (resvar);
1563   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1564 }
1565
1566 /* Convert a linear expression from coefficient and constant form to a
1567    gcc tree.
1568    Return the tree that represents the final value of the expression.
1569    LLE is the linear expression to convert.
1570    OFFSET is the linear offset to apply to the expression.
1571    TYPE is the tree type to use for the variables and math.
1572    INDUCTION_VARS is a vector of induction variables for the loops.
1573    INVARIANTS is a vector of the loop nest invariants.
1574    WRAP specifies what tree code to wrap the results in, if there is more than
1575    one (it is either MAX_EXPR, or MIN_EXPR).
1576    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1577    statements that need to be inserted for the linear expression.  */
1578
1579 static tree
1580 lle_to_gcc_expression (lambda_linear_expression lle,
1581                        lambda_linear_expression offset,
1582                        tree type,
1583                        VEC(tree,heap) *induction_vars,
1584                        VEC(tree,heap) *invariants,
1585                        enum tree_code wrap, tree *stmts_to_insert)
1586 {
1587   int k;
1588   tree resvar;
1589   tree expr = NULL_TREE;
1590   VEC(tree,heap) *results = NULL;
1591
1592   gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1593
1594   /* Build up the linear expressions.  */
1595   for (; lle != NULL; lle = LLE_NEXT (lle))
1596     {
1597       expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars);
1598       expr = fold_build2 (PLUS_EXPR, type, expr,
1599                           build_linear_expr (type,
1600                                              LLE_INVARIANT_COEFFICIENTS (lle),
1601                                              invariants));
1602
1603       k = LLE_CONSTANT (lle);
1604       if (k)
1605         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1606
1607       k = LLE_CONSTANT (offset);
1608       if (k)
1609         expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k));
1610
1611       k = LLE_DENOMINATOR (lle);
1612       if (k != 1)
1613         expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1614                             type, expr, build_int_cst (type, k));
1615
1616       expr = fold (expr);
1617       VEC_safe_push (tree, heap, results, expr);
1618     }
1619
1620   gcc_assert (expr);
1621
1622   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1623   if (VEC_length (tree, results) > 1)
1624     {
1625       size_t i;
1626       tree op;
1627
1628       expr = VEC_index (tree, results, 0);
1629       for (i = 1; VEC_iterate (tree, results, i, op); i++)
1630         expr = fold_build2 (wrap, type, expr, op);
1631     }
1632
1633   VEC_free (tree, heap, results);
1634
1635   resvar = create_tmp_var (type, "lletmp");
1636   add_referenced_var (resvar);
1637   return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar);
1638 }
1639
1640 /* Remove the induction variable defined at IV_STMT.  */
1641
1642 static void
1643 remove_iv (tree iv_stmt)
1644 {
1645   if (TREE_CODE (iv_stmt) == PHI_NODE)
1646     {
1647       int i;
1648
1649       for (i = 0; i < PHI_NUM_ARGS (iv_stmt); i++)
1650         {
1651           tree stmt;
1652           imm_use_iterator imm_iter;
1653           tree arg = PHI_ARG_DEF (iv_stmt, i);
1654           bool used = false;
1655
1656           if (TREE_CODE (arg) != SSA_NAME)
1657             continue;
1658
1659           FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg)
1660             if (stmt != iv_stmt)
1661               used = true;
1662
1663           if (!used)
1664             remove_iv (SSA_NAME_DEF_STMT (arg));
1665         }
1666
1667       remove_phi_node (iv_stmt, NULL_TREE, true);
1668     }
1669   else
1670     {
1671       block_stmt_iterator bsi = bsi_for_stmt (iv_stmt);
1672
1673       bsi_remove (&bsi, true);
1674       release_defs (iv_stmt);
1675     }
1676 }
1677
1678
1679 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1680    it, back into gcc code.  This changes the
1681    loops, their induction variables, and their bodies, so that they
1682    match the transformed loopnest.
1683    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1684    loopnest.
1685    OLD_IVS is a vector of induction variables from the old loopnest.
1686    INVARIANTS is a vector of loop invariants from the old loopnest.
1687    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1688    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1689    NEW_LOOPNEST.  */
1690
1691 void
1692 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1693                                  VEC(tree,heap) *old_ivs,
1694                                  VEC(tree,heap) *invariants,
1695                                  lambda_loopnest new_loopnest,
1696                                  lambda_trans_matrix transform,
1697                                  struct obstack * lambda_obstack)
1698 {
1699   struct loop *temp;
1700   size_t i = 0;
1701   size_t depth = 0;
1702   VEC(tree,heap) *new_ivs = NULL;
1703   tree oldiv;
1704
1705   block_stmt_iterator bsi;
1706
1707   if (dump_file)
1708     {
1709       transform = lambda_trans_matrix_inverse (transform);
1710       fprintf (dump_file, "Inverse of transformation matrix:\n");
1711       print_lambda_trans_matrix (dump_file, transform);
1712     }
1713   depth = depth_of_nest (old_loopnest);
1714   temp = old_loopnest;
1715
1716   while (temp)
1717     {
1718       lambda_loop newloop;
1719       basic_block bb;
1720       edge exit;
1721       tree ivvar, ivvarinced, exitcond, stmts;
1722       enum tree_code testtype;
1723       tree newupperbound, newlowerbound;
1724       lambda_linear_expression offset;
1725       tree type;
1726       bool insert_after;
1727       tree inc_stmt;
1728
1729       oldiv = VEC_index (tree, old_ivs, i);
1730       type = TREE_TYPE (oldiv);
1731
1732       /* First, build the new induction variable temporary  */
1733
1734       ivvar = create_tmp_var (type, "lnivtmp");
1735       add_referenced_var (ivvar);
1736
1737       VEC_safe_push (tree, heap, new_ivs, ivvar);
1738
1739       newloop = LN_LOOPS (new_loopnest)[i];
1740
1741       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1742          cases for now.  */
1743       offset = LL_LINEAR_OFFSET (newloop);
1744
1745       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1746                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1747
1748       /* Now build the  new lower bounds, and insert the statements
1749          necessary to generate it on the loop preheader.  */
1750       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1751                                              LL_LINEAR_OFFSET (newloop),
1752                                              type,
1753                                              new_ivs,
1754                                              invariants, MAX_EXPR, &stmts);
1755
1756       if (stmts)
1757         {
1758           bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1759           bsi_commit_edge_inserts ();
1760         }
1761       /* Build the new upper bound and insert its statements in the
1762          basic block of the exit condition */
1763       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1764                                              LL_LINEAR_OFFSET (newloop),
1765                                              type,
1766                                              new_ivs,
1767                                              invariants, MIN_EXPR, &stmts);
1768       exit = single_exit (temp);
1769       exitcond = get_loop_exit_condition (temp);
1770       bb = bb_for_stmt (exitcond);
1771       bsi = bsi_after_labels (bb);
1772       if (stmts)
1773         bsi_insert_before (&bsi, stmts, BSI_NEW_STMT);
1774
1775       /* Create the new iv.  */
1776
1777       standard_iv_increment_position (temp, &bsi, &insert_after);
1778       create_iv (newlowerbound,
1779                  build_int_cst (type, LL_STEP (newloop)),
1780                  ivvar, temp, &bsi, insert_after, &ivvar,
1781                  NULL);
1782
1783       /* Unfortunately, the incremented ivvar that create_iv inserted may not
1784          dominate the block containing the exit condition.
1785          So we simply create our own incremented iv to use in the new exit
1786          test,  and let redundancy elimination sort it out.  */
1787       inc_stmt = build2 (PLUS_EXPR, type,
1788                          ivvar, build_int_cst (type, LL_STEP (newloop)));
1789       inc_stmt = build_gimple_modify_stmt (SSA_NAME_VAR (ivvar), inc_stmt);
1790       ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1791       GIMPLE_STMT_OPERAND (inc_stmt, 0) = ivvarinced;
1792       bsi = bsi_for_stmt (exitcond);
1793       bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1794
1795       /* Replace the exit condition with the new upper bound
1796          comparison.  */
1797
1798       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1799
1800       /* We want to build a conditional where true means exit the loop, and
1801          false means continue the loop.
1802          So swap the testtype if this isn't the way things are.*/
1803
1804       if (exit->flags & EDGE_FALSE_VALUE)
1805         testtype = swap_tree_comparison (testtype);
1806
1807       COND_EXPR_COND (exitcond) = build2 (testtype,
1808                                           boolean_type_node,
1809                                           newupperbound, ivvarinced);
1810       update_stmt (exitcond);
1811       VEC_replace (tree, new_ivs, i, ivvar);
1812
1813       i++;
1814       temp = temp->inner;
1815     }
1816
1817   /* Rewrite uses of the old ivs so that they are now specified in terms of
1818      the new ivs.  */
1819
1820   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1821     {
1822       imm_use_iterator imm_iter;
1823       use_operand_p use_p;
1824       tree oldiv_def;
1825       tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1826       tree stmt;
1827
1828       if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1829         oldiv_def = PHI_RESULT (oldiv_stmt);
1830       else
1831         oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1832       gcc_assert (oldiv_def != NULL_TREE);
1833
1834       FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1835         {
1836           tree newiv, stmts;
1837           lambda_body_vector lbv, newlbv;
1838
1839           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1840
1841           /* Compute the new expression for the induction
1842              variable.  */
1843           depth = VEC_length (tree, new_ivs);
1844           lbv = lambda_body_vector_new (depth, lambda_obstack);
1845           LBV_COEFFICIENTS (lbv)[i] = 1;
1846
1847           newlbv = lambda_body_vector_compute_new (transform, lbv,
1848                                                    lambda_obstack);
1849
1850           newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1851                                          new_ivs, &stmts);
1852           if (stmts)
1853             {
1854               bsi = bsi_for_stmt (stmt);
1855               bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1856             }
1857
1858           FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1859             propagate_value (use_p, newiv);
1860           update_stmt (stmt);
1861         }
1862
1863       /* Remove the now unused induction variable.  */
1864       remove_iv (oldiv_stmt);
1865     }
1866   VEC_free (tree, heap, new_ivs);
1867 }
1868
1869 /* Return TRUE if this is not interesting statement from the perspective of
1870    determining if we have a perfect loop nest.  */
1871
1872 static bool
1873 not_interesting_stmt (tree stmt)
1874 {
1875   /* Note that COND_EXPR's aren't interesting because if they were exiting the
1876      loop, we would have already failed the number of exits tests.  */
1877   if (TREE_CODE (stmt) == LABEL_EXPR
1878       || TREE_CODE (stmt) == GOTO_EXPR
1879       || TREE_CODE (stmt) == COND_EXPR)
1880     return true;
1881   return false;
1882 }
1883
1884 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1885
1886 static bool
1887 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1888 {
1889   int i;
1890   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1891     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1892       if (PHI_ARG_DEF (phi, i) == def)
1893         return true;
1894   return false;
1895 }
1896
1897 /* Return TRUE if STMT is a use of PHI_RESULT.  */
1898
1899 static bool
1900 stmt_uses_phi_result (tree stmt, tree phi_result)
1901 {
1902   tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1903
1904   /* This is conservatively true, because we only want SIMPLE bumpers
1905      of the form x +- constant for our pass.  */
1906   return (use == phi_result);
1907 }
1908
1909 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
1910    in-loop-edge in a phi node, and the operand it uses is the result of that
1911    phi node.
1912    I.E. i_29 = i_3 + 1
1913         i_3 = PHI (0, i_29);  */
1914
1915 static bool
1916 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
1917 {
1918   tree use;
1919   tree def;
1920   imm_use_iterator iter;
1921   use_operand_p use_p;
1922
1923   def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
1924   if (!def)
1925     return false;
1926
1927   FOR_EACH_IMM_USE_FAST (use_p, iter, def)
1928     {
1929       use = USE_STMT (use_p);
1930       if (TREE_CODE (use) == PHI_NODE)
1931         {
1932           if (phi_loop_edge_uses_def (loop, use, def))
1933             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
1934               return true;
1935         }
1936     }
1937   return false;
1938 }
1939
1940
1941 /* Return true if LOOP is a perfect loop nest.
1942    Perfect loop nests are those loop nests where all code occurs in the
1943    innermost loop body.
1944    If S is a program statement, then
1945
1946    i.e.
1947    DO I = 1, 20
1948        S1
1949        DO J = 1, 20
1950        ...
1951        END DO
1952    END DO
1953    is not a perfect loop nest because of S1.
1954
1955    DO I = 1, 20
1956       DO J = 1, 20
1957         S1
1958         ...
1959       END DO
1960    END DO
1961    is a perfect loop nest.
1962
1963    Since we don't have high level loops anymore, we basically have to walk our
1964    statements and ignore those that are there because the loop needs them (IE
1965    the induction variable increment, and jump back to the top of the loop).  */
1966
1967 bool
1968 perfect_nest_p (struct loop *loop)
1969 {
1970   basic_block *bbs;
1971   size_t i;
1972   tree exit_cond;
1973
1974   if (!loop->inner)
1975     return true;
1976   bbs = get_loop_body (loop);
1977   exit_cond = get_loop_exit_condition (loop);
1978   for (i = 0; i < loop->num_nodes; i++)
1979     {
1980       if (bbs[i]->loop_father == loop)
1981         {
1982           block_stmt_iterator bsi;
1983           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1984             {
1985               tree stmt = bsi_stmt (bsi);
1986               if (stmt == exit_cond
1987                   || not_interesting_stmt (stmt)
1988                   || stmt_is_bumper_for_loop (loop, stmt))
1989                 continue;
1990               free (bbs);
1991               return false;
1992             }
1993         }
1994     }
1995   free (bbs);
1996   /* See if the inner loops are perfectly nested as well.  */
1997   if (loop->inner)
1998     return perfect_nest_p (loop->inner);
1999   return true;
2000 }
2001
2002 /* Replace the USES of X in STMT, or uses with the same step as X with Y.
2003    YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2004    avoid creating duplicate temporaries and FIRSTBSI is statement
2005    iterator where new temporaries should be inserted at the beginning
2006    of body basic block.  */
2007
2008 static void
2009 replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2010                                 int xstep, tree y, tree yinit,
2011                                 htab_t replacements,
2012                                 block_stmt_iterator *firstbsi)
2013 {
2014   ssa_op_iter iter;
2015   use_operand_p use_p;
2016
2017   FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2018     {
2019       tree use = USE_FROM_PTR (use_p);
2020       tree step = NULL_TREE;
2021       tree scev, init, val, var, setstmt;
2022       struct tree_map *h, in;
2023       void **loc;
2024
2025       /* Replace uses of X with Y right away.  */
2026       if (use == x)
2027         {
2028           SET_USE (use_p, y);
2029           continue;
2030         }
2031
2032       scev = instantiate_parameters (loop,
2033                                      analyze_scalar_evolution (loop, use));
2034
2035       if (scev == NULL || scev == chrec_dont_know)
2036         continue;
2037
2038       step = evolution_part_in_loop_num (scev, loop->num);
2039       if (step == NULL
2040           || step == chrec_dont_know
2041           || TREE_CODE (step) != INTEGER_CST
2042           || int_cst_value (step) != xstep)
2043         continue;
2044
2045       /* Use REPLACEMENTS hash table to cache already created
2046          temporaries.  */
2047       in.hash = htab_hash_pointer (use);
2048       in.base.from = use;
2049       h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash);
2050       if (h != NULL)
2051         {
2052           SET_USE (use_p, h->to);
2053           continue;
2054         }
2055
2056       /* USE which has the same step as X should be replaced
2057          with a temporary set to Y + YINIT - INIT.  */
2058       init = initial_condition_in_loop_num (scev, loop->num);
2059       gcc_assert (init != NULL && init != chrec_dont_know);
2060       if (TREE_TYPE (use) == TREE_TYPE (y))
2061         {
2062           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2063           val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2064           if (val == y)
2065             {
2066               /* If X has the same type as USE, the same step
2067                  and same initial value, it can be replaced by Y.  */
2068               SET_USE (use_p, y);
2069               continue;
2070             }
2071         }
2072       else
2073         {
2074           val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2075           val = fold_convert (TREE_TYPE (use), val);
2076           val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2077         }
2078
2079       /* Create a temporary variable and insert it at the beginning
2080          of the loop body basic block, right after the PHI node
2081          which sets Y.  */
2082       var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2083       add_referenced_var (var);
2084       val = force_gimple_operand_bsi (firstbsi, val, false, NULL,
2085                                       true, BSI_SAME_STMT);
2086       setstmt = build_gimple_modify_stmt (var, val);
2087       var = make_ssa_name (var, setstmt);
2088       GIMPLE_STMT_OPERAND (setstmt, 0) = var;
2089       bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2090       update_stmt (setstmt);
2091       SET_USE (use_p, var);
2092       h = GGC_NEW (struct tree_map);
2093       h->hash = in.hash;
2094       h->base.from = use;
2095       h->to = var;
2096       loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2097       gcc_assert ((*(struct tree_map **)loc) == NULL);
2098       *(struct tree_map **) loc = h;
2099     }
2100 }
2101
2102 /* Return true if STMT is an exit PHI for LOOP */
2103
2104 static bool
2105 exit_phi_for_loop_p (struct loop *loop, tree stmt)
2106 {
2107
2108   if (TREE_CODE (stmt) != PHI_NODE
2109       || PHI_NUM_ARGS (stmt) != 1
2110       || bb_for_stmt (stmt) != single_exit (loop)->dest)
2111     return false;
2112
2113   return true;
2114 }
2115
2116 /* Return true if STMT can be put back into the loop INNER, by
2117    copying it to the beginning of that loop and changing the uses.  */
2118
2119 static bool
2120 can_put_in_inner_loop (struct loop *inner, tree stmt)
2121 {
2122   imm_use_iterator imm_iter;
2123   use_operand_p use_p;
2124
2125   gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2126   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2127       || !expr_invariant_in_loop_p (inner, GIMPLE_STMT_OPERAND (stmt, 1)))
2128     return false;
2129
2130   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2131     {
2132       if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2133         {
2134           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2135
2136           if (!flow_bb_inside_loop_p (inner, immbb))
2137             return false;
2138         }
2139     }
2140   return true;
2141 }
2142
2143 /* Return true if STMT can be put *after* the inner loop of LOOP.  */
2144 static bool
2145 can_put_after_inner_loop (struct loop *loop, tree stmt)
2146 {
2147   imm_use_iterator imm_iter;
2148   use_operand_p use_p;
2149
2150   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2151     return false;
2152
2153   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
2154     {
2155       if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2156         {
2157           basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2158
2159           if (!dominated_by_p (CDI_DOMINATORS,
2160                                immbb,
2161                                loop->inner->header)
2162               && !can_put_in_inner_loop (loop->inner, stmt))
2163             return false;
2164         }
2165     }
2166   return true;
2167 }
2168
2169
2170
2171 /* Return TRUE if LOOP is an imperfect nest that we can convert to a
2172    perfect one.  At the moment, we only handle imperfect nests of
2173    depth 2, where all of the statements occur after the inner loop.  */
2174
2175 static bool
2176 can_convert_to_perfect_nest (struct loop *loop)
2177 {
2178   basic_block *bbs;
2179   tree exit_condition, phi;
2180   size_t i;
2181   block_stmt_iterator bsi;
2182   basic_block exitdest;
2183
2184   /* Can't handle triply nested+ loops yet.  */
2185   if (!loop->inner || loop->inner->inner)
2186     return false;
2187
2188   bbs = get_loop_body (loop);
2189   exit_condition = get_loop_exit_condition (loop);
2190   for (i = 0; i < loop->num_nodes; i++)
2191     {
2192       if (bbs[i]->loop_father == loop)
2193         {
2194           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2195             {
2196               tree stmt = bsi_stmt (bsi);
2197
2198               if (stmt == exit_condition
2199                   || not_interesting_stmt (stmt)
2200                   || stmt_is_bumper_for_loop (loop, stmt))
2201                 continue;
2202
2203               /* If this is a scalar operation that can be put back
2204                  into the inner loop, or after the inner loop, through
2205                  copying, then do so. This works on the theory that
2206                  any amount of scalar code we have to reduplicate
2207                  into or after the loops is less expensive that the
2208                  win we get from rearranging the memory walk
2209                  the loop is doing so that it has better
2210                  cache behavior.  */
2211               if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
2212                 {
2213                   use_operand_p use_a, use_b;
2214                   imm_use_iterator imm_iter;
2215                   ssa_op_iter op_iter, op_iter1;
2216                   tree op0 = GIMPLE_STMT_OPERAND (stmt, 0);
2217                   tree scev = instantiate_parameters
2218                     (loop, analyze_scalar_evolution (loop, op0));
2219
2220                   /* If the IV is simple, it can be duplicated.  */
2221                   if (!automatically_generated_chrec_p (scev))
2222                     {
2223                       tree step = evolution_part_in_loop_num (scev, loop->num);
2224                       if (step && step != chrec_dont_know
2225                           && TREE_CODE (step) == INTEGER_CST)
2226                         continue;
2227                     }
2228
2229                   /* The statement should not define a variable used
2230                      in the inner loop.  */
2231                   if (TREE_CODE (op0) == SSA_NAME)
2232                     FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2233                       if (bb_for_stmt (USE_STMT (use_a))->loop_father
2234                           == loop->inner)
2235                         goto fail;
2236
2237                   FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2238                     {
2239                       tree node, op = USE_FROM_PTR (use_a);
2240
2241                       /* The variables should not be used in both loops.  */
2242                       FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2243                       if (bb_for_stmt (USE_STMT (use_b))->loop_father
2244                           == loop->inner)
2245                         goto fail;
2246
2247                       /* The statement should not use the value of a
2248                          scalar that was modified in the loop.  */
2249                       node = SSA_NAME_DEF_STMT (op);
2250                       if (TREE_CODE (node) == PHI_NODE)
2251                         FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2252                           {
2253                             tree arg = USE_FROM_PTR (use_b);
2254
2255                             if (TREE_CODE (arg) == SSA_NAME)
2256                               {
2257                                 tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2258
2259                                 if (bb_for_stmt (arg_stmt)
2260                                     && (bb_for_stmt (arg_stmt)->loop_father
2261                                         == loop->inner))
2262                                   goto fail;
2263                               }
2264                           }
2265                     }
2266
2267                   if (can_put_in_inner_loop (loop->inner, stmt)
2268                       || can_put_after_inner_loop (loop, stmt))
2269                     continue;
2270                 }
2271
2272               /* Otherwise, if the bb of a statement we care about isn't
2273                  dominated by the header of the inner loop, then we can't
2274                  handle this case right now.  This test ensures that the
2275                  statement comes completely *after* the inner loop.  */
2276               if (!dominated_by_p (CDI_DOMINATORS,
2277                                    bb_for_stmt (stmt),
2278                                    loop->inner->header))
2279                 goto fail;
2280             }
2281         }
2282     }
2283
2284   /* We also need to make sure the loop exit only has simple copy phis in it,
2285      otherwise we don't know how to transform it into a perfect nest right
2286      now.  */
2287   exitdest = single_exit (loop)->dest;
2288
2289   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2290     if (PHI_NUM_ARGS (phi) != 1)
2291       goto fail;
2292
2293   free (bbs);
2294   return true;
2295
2296  fail:
2297   free (bbs);
2298   return false;
2299 }
2300
2301 /* Transform the loop nest into a perfect nest, if possible.
2302    LOOP is the loop nest to transform into a perfect nest
2303    LBOUNDS are the lower bounds for the loops to transform
2304    UBOUNDS are the upper bounds for the loops to transform
2305    STEPS is the STEPS for the loops to transform.
2306    LOOPIVS is the induction variables for the loops to transform.
2307
2308    Basically, for the case of
2309
2310    FOR (i = 0; i < 50; i++)
2311     {
2312      FOR (j =0; j < 50; j++)
2313      {
2314         <whatever>
2315      }
2316      <some code>
2317     }
2318
2319    This function will transform it into a perfect loop nest by splitting the
2320    outer loop into two loops, like so:
2321
2322    FOR (i = 0; i < 50; i++)
2323    {
2324      FOR (j = 0; j < 50; j++)
2325      {
2326          <whatever>
2327      }
2328    }
2329
2330    FOR (i = 0; i < 50; i ++)
2331    {
2332     <some code>
2333    }
2334
2335    Return FALSE if we can't make this loop into a perfect nest.  */
2336
2337 static bool
2338 perfect_nestify (struct loop *loop,
2339                  VEC(tree,heap) *lbounds,
2340                  VEC(tree,heap) *ubounds,
2341                  VEC(int,heap) *steps,
2342                  VEC(tree,heap) *loopivs)
2343 {
2344   basic_block *bbs;
2345   tree exit_condition;
2346   tree cond_stmt;
2347   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2348   int i;
2349   block_stmt_iterator bsi, firstbsi;
2350   bool insert_after;
2351   edge e;
2352   struct loop *newloop;
2353   tree phi;
2354   tree uboundvar;
2355   tree stmt;
2356   tree oldivvar, ivvar, ivvarinced;
2357   VEC(tree,heap) *phis = NULL;
2358   htab_t replacements = NULL;
2359
2360   /* Create the new loop.  */
2361   olddest = single_exit (loop)->dest;
2362   preheaderbb = split_edge (single_exit (loop));
2363   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2364
2365   /* Push the exit phi nodes that we are moving.  */
2366   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2367     {
2368       VEC_reserve (tree, heap, phis, 2);
2369       VEC_quick_push (tree, phis, PHI_RESULT (phi));
2370       VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2371     }
2372   e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2373
2374   /* Remove the exit phis from the old basic block.  */
2375   while (phi_nodes (olddest) != NULL)
2376     remove_phi_node (phi_nodes (olddest), NULL, false);
2377
2378   /* and add them back to the new basic block.  */
2379   while (VEC_length (tree, phis) != 0)
2380     {
2381       tree def;
2382       tree phiname;
2383       def = VEC_pop (tree, phis);
2384       phiname = VEC_pop (tree, phis);
2385       phi = create_phi_node (phiname, preheaderbb);
2386       add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2387     }
2388   flush_pending_stmts (e);
2389   VEC_free (tree, heap, phis);
2390
2391   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2392   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2393   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2394   cond_stmt = build3 (COND_EXPR, void_type_node,
2395                       build2 (NE_EXPR, boolean_type_node,
2396                               integer_one_node,
2397                               integer_zero_node),
2398                       NULL_TREE, NULL_TREE);
2399   bsi = bsi_start (bodybb);
2400   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2401   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2402   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2403   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2404
2405   /* Update the loop structures.  */
2406   newloop = duplicate_loop (loop, olddest->loop_father);
2407   newloop->header = headerbb;
2408   newloop->latch = latchbb;
2409   add_bb_to_loop (latchbb, newloop);
2410   add_bb_to_loop (bodybb, newloop);
2411   add_bb_to_loop (headerbb, newloop);
2412   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2413   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2414   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2415                            single_exit (loop)->src);
2416   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2417   set_immediate_dominator (CDI_DOMINATORS, olddest,
2418                            recompute_dominator (CDI_DOMINATORS, olddest));
2419   /* Create the new iv.  */
2420   oldivvar = VEC_index (tree, loopivs, 0);
2421   ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2422   add_referenced_var (ivvar);
2423   standard_iv_increment_position (newloop, &bsi, &insert_after);
2424   create_iv (VEC_index (tree, lbounds, 0),
2425              build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2426              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2427
2428   /* Create the new upper bound.  This may be not just a variable, so we copy
2429      it to one just in case.  */
2430
2431   exit_condition = get_loop_exit_condition (newloop);
2432   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2433   add_referenced_var (uboundvar);
2434   stmt = build_gimple_modify_stmt (uboundvar, VEC_index (tree, ubounds, 0));
2435   uboundvar = make_ssa_name (uboundvar, stmt);
2436   GIMPLE_STMT_OPERAND (stmt, 0) = uboundvar;
2437
2438   if (insert_after)
2439     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2440   else
2441     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2442   update_stmt (stmt);
2443   COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2444                                             boolean_type_node,
2445                                             uboundvar,
2446                                             ivvarinced);
2447   update_stmt (exit_condition);
2448   replacements = htab_create_ggc (20, tree_map_hash,
2449                                   tree_map_eq, NULL);
2450   bbs = get_loop_body_in_dom_order (loop);
2451   /* Now move the statements, and replace the induction variable in the moved
2452      statements with the correct loop induction variable.  */
2453   oldivvar = VEC_index (tree, loopivs, 0);
2454   firstbsi = bsi_start (bodybb);
2455   for (i = loop->num_nodes - 1; i >= 0 ; i--)
2456     {
2457       block_stmt_iterator tobsi = bsi_last (bodybb);
2458       if (bbs[i]->loop_father == loop)
2459         {
2460           /* If this is true, we are *before* the inner loop.
2461              If this isn't true, we are *after* it.
2462
2463              The only time can_convert_to_perfect_nest returns true when we
2464              have statements before the inner loop is if they can be moved
2465              into the inner loop.
2466
2467              The only time can_convert_to_perfect_nest returns true when we
2468              have statements after the inner loop is if they can be moved into
2469              the new split loop.  */
2470
2471           if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2472             {
2473               block_stmt_iterator header_bsi
2474                 = bsi_after_labels (loop->inner->header);
2475
2476               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2477                 {
2478                   tree stmt = bsi_stmt (bsi);
2479
2480                   if (stmt == exit_condition
2481                       || not_interesting_stmt (stmt)
2482                       || stmt_is_bumper_for_loop (loop, stmt))
2483                     {
2484                       bsi_next (&bsi);
2485                       continue;
2486                     }
2487
2488                   bsi_move_before (&bsi, &header_bsi);
2489                 }
2490             }
2491           else
2492             {
2493               /* Note that the bsi only needs to be explicitly incremented
2494                  when we don't move something, since it is automatically
2495                  incremented when we do.  */
2496               for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2497                 {
2498                   ssa_op_iter i;
2499                   tree n, stmt = bsi_stmt (bsi);
2500
2501                   if (stmt == exit_condition
2502                       || not_interesting_stmt (stmt)
2503                       || stmt_is_bumper_for_loop (loop, stmt))
2504                     {
2505                       bsi_next (&bsi);
2506                       continue;
2507                     }
2508
2509                   replace_uses_equiv_to_x_with_y
2510                     (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2511                      VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2512
2513                   bsi_move_before (&bsi, &tobsi);
2514
2515                   /* If the statement has any virtual operands, they may
2516                      need to be rewired because the original loop may
2517                      still reference them.  */
2518                   FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2519                     mark_sym_for_renaming (SSA_NAME_VAR (n));
2520                 }
2521             }
2522
2523         }
2524     }
2525
2526   free (bbs);
2527   htab_delete (replacements);
2528   return perfect_nest_p (loop);
2529 }
2530
2531 /* Return true if TRANS is a legal transformation matrix that respects
2532    the dependence vectors in DISTS and DIRS.  The conservative answer
2533    is false.
2534
2535    "Wolfe proves that a unimodular transformation represented by the
2536    matrix T is legal when applied to a loop nest with a set of
2537    lexicographically non-negative distance vectors RDG if and only if
2538    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2539    i.e.: if and only if it transforms the lexicographically positive
2540    distance vectors to lexicographically positive vectors.  Note that
2541    a unimodular matrix must transform the zero vector (and only it) to
2542    the zero vector." S.Muchnick.  */
2543
2544 bool
2545 lambda_transform_legal_p (lambda_trans_matrix trans,
2546                           int nb_loops,
2547                           VEC (ddr_p, heap) *dependence_relations)
2548 {
2549   unsigned int i, j;
2550   lambda_vector distres;
2551   struct data_dependence_relation *ddr;
2552
2553   gcc_assert (LTM_COLSIZE (trans) == nb_loops
2554               && LTM_ROWSIZE (trans) == nb_loops);
2555
2556   /* When there is an unknown relation in the dependence_relations, we
2557      know that it is no worth looking at this loop nest: give up.  */
2558   ddr = VEC_index (ddr_p, dependence_relations, 0);
2559   if (ddr == NULL)
2560     return true;
2561   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2562     return false;
2563
2564   distres = lambda_vector_new (nb_loops);
2565
2566   /* For each distance vector in the dependence graph.  */
2567   for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2568     {
2569       /* Don't care about relations for which we know that there is no
2570          dependence, nor about read-read (aka. output-dependences):
2571          these data accesses can happen in any order.  */
2572       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2573           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2574         continue;
2575
2576       /* Conservatively answer: "this transformation is not valid".  */
2577       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2578         return false;
2579
2580       /* If the dependence could not be captured by a distance vector,
2581          conservatively answer that the transform is not valid.  */
2582       if (DDR_NUM_DIST_VECTS (ddr) == 0)
2583         return false;
2584
2585       /* Compute trans.dist_vect */
2586       for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2587         {
2588           lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2589                                      DDR_DIST_VECT (ddr, j), distres);
2590
2591           if (!lambda_vector_lexico_pos (distres, nb_loops))
2592             return false;
2593         }
2594     }
2595   return true;
2596 }