gcc/lambda-code.c

   1 /*  Loop transformation code generation
   2     Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3     Contributed by Daniel Berlin <dberlin@dberlin.org>
   4
   5     This file is part of GCC.
   6
   7     GCC is free software; you can redistribute it and/or modify it under
   8     the terms of the GNU General Public License as published by the Free
   9     Software Foundation; either version 2, or (at your option) any later
  10     version.
  11
  12     GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13     WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14     FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15     for more details.
  16
  17     You should have received a copy of the GNU General Public License
  18     along with GCC; see the file COPYING.  If not, write to the Free
  19     Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20     02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "errors.h"
  27 #include "ggc.h"
  28 #include "tree.h"
  29 #include "target.h"
  30 #include "rtl.h"
  31 #include "basic-block.h"
  32 #include "diagnostic.h"
  33 #include "tree-flow.h"
  34 #include "tree-dump.h"
  35 #include "timevar.h"
  36 #include "cfgloop.h"
  37 #include "expr.h"
  38 #include "optabs.h"
  39 #include "tree-chrec.h"
  40 #include "tree-data-ref.h"
  41 #include "tree-pass.h"
  42 #include "tree-scalar-evolution.h"
  43 #include "vec.h"
  44 #include "lambda.h"
  45
  46 /* This loop nest code generation is based on non-singular matrix
  47    math.
  48
  49  A little terminology and a general sketch of the algorithm.  See "A singular
  50  loop transformation framework based on non-singular matrices" by Wei Li and
  51  Keshav Pingali for formal proofs that the various statements below are
  52  correct.
  53
  54  A loop iteration space represents the points traversed by the loop.  A point in the
  55  iteration space can be represented by a vector of size <loop depth>.  You can
  56  therefore represent the iteration space as an integral combinations of a set
  57  of basis vectors.
  58
  59  A loop iteration space is dense if every integer point between the loop
  60  bounds is a point in the iteration space.  Every loop with a step of 1
  61  therefore has a dense iteration space.
  62
  63  for i = 1 to 3, step 1 is a dense iteration space.
  64
  65  A loop iteration space is sparse if it is not dense.  That is, the iteration
  66  space skips integer points that are within the loop bounds.
  67
  68  for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
  69  2 is skipped.
  70
  71  Dense source spaces are easy to transform, because they don't skip any
  72  points to begin with.  Thus we can compute the exact bounds of the target
  73  space using min/max and floor/ceil.
  74
  75  For a dense source space, we take the transformation matrix, decompose it
  76  into a lower triangular part (H) and a unimodular part (U).
  77  We then compute the auxiliary space from the unimodular part (source loop
  78  nest . U = auxiliary space) , which has two important properties:
  79   1. It traverses the iterations in the same lexicographic order as the source
  80   space.
  81   2. It is a dense space when the source is a dense space (even if the target
  82   space is going to be sparse).
  83
  84  Given the auxiliary space, we use the lower triangular part to compute the
  85  bounds in the target space by simple matrix multiplication.
  86  The gaps in the target space (IE the new loop step sizes) will be the
  87  diagonals of the H matrix.
  88
  89  Sparse source spaces require another step, because you can't directly compute
  90  the exact bounds of the auxiliary and target space from the sparse space.
  91  Rather than try to come up with a separate algorithm to handle sparse source
  92  spaces directly, we just find a legal transformation matrix that gives you
  93  the sparse source space, from a dense space, and then transform the dense
  94  space.
  95
  96  For a regular sparse space, you can represent the source space as an integer
  97  lattice, and the base space of that lattice will always be dense.  Thus, we
  98  effectively use the lattice to figure out the transformation from the lattice
  99  base space, to the sparse iteration space (IE what transform was applied to
 100  the dense space to make it sparse).  We then compose this transform with the
 101  transformation matrix specified by the user (since our matrix transformations
 102  are closed under composition, this is okay).  We can then use the base space
 103  (which is dense) plus the composed transformation matrix, to compute the rest
 104  of the transform using the dense space algorithm above.
 105
 106  In other words, our sparse source space (B) is decomposed into a dense base
 107  space (A), and a matrix (L) that transforms A into B, such that A.L = B.
 108  We then compute the composition of L and the user transformation matrix (T),
 109  so that T is now a transform from A to the result, instead of from B to the
 110  result.
 111  IE A.(LT) = result instead of B.T = result
 112  Since A is now a dense source space, we can use the dense source space
 113  algorithm above to compute the result of applying transform (LT) to A.
 114
 115  Fourier-Motzkin elimination is used to compute the bounds of the base space
 116  of the lattice.  */
 117
 118
 119 DEF_VEC_GC_P(int);
 120
 121 static bool perfect_nestify (struct loops *,
 122                              struct loop *, VEC (tree) *,
 123                              VEC (tree) *, VEC (int) *, VEC (tree) *);
 124 /* Lattice stuff that is internal to the code generation algorithm.  */
 125
 126 typedef struct
 127 {
 128   /* Lattice base matrix.  */
 129   lambda_matrix base;
 130   /* Lattice dimension.  */
 131   int dimension;
 132   /* Origin vector for the coefficients.  */
 133   lambda_vector origin;
 134   /* Origin matrix for the invariants.  */
 135   lambda_matrix origin_invariants;
 136   /* Number of invariants.  */
 137   int invariants;
 138 } *lambda_lattice;
 139
 140 #define LATTICE_BASE(T) ((T)->base)
 141 #define LATTICE_DIMENSION(T) ((T)->dimension)
 142 #define LATTICE_ORIGIN(T) ((T)->origin)
 143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
 144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
 145
 146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
 147                        int, int);
 148 static lambda_lattice lambda_lattice_new (int, int);
 149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
 150
 151 static tree find_induction_var_from_exit_cond (struct loop *);
 152
 153 /* Create a new lambda body vector.  */
 154
 155 lambda_body_vector
 156 lambda_body_vector_new (int size)
 157 {
 158   lambda_body_vector ret;
 159
 160   ret = ggc_alloc (sizeof (*ret));
 161   LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
 162   LBV_SIZE (ret) = size;
 163   LBV_DENOMINATOR (ret) = 1;
 164   return ret;
 165 }
 166
 167 /* Compute the new coefficients for the vector based on the
 168   *inverse* of the transformation matrix.  */
 169
 170 lambda_body_vector
 171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
 172                                 lambda_body_vector vect)
 173 {
 174   lambda_body_vector temp;
 175   int depth;
 176
 177   /* Make sure the matrix is square.  */
 178   gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
 179
 180   depth = LTM_ROWSIZE (transform);
 181
 182   temp = lambda_body_vector_new (depth);
 183   LBV_DENOMINATOR (temp) =
 184     LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
 185   lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
 186                              LTM_MATRIX (transform), depth,
 187                              LBV_COEFFICIENTS (temp));
 188   LBV_SIZE (temp) = LBV_SIZE (vect);
 189   return temp;
 190 }
 191
 192 /* Print out a lambda body vector.  */
 193
 194 void
 195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
 196 {
 197   print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
 198 }
 199
 200 /* Return TRUE if two linear expressions are equal.  */
 201
 202 static bool
 203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
 204            int depth, int invariants)
 205 {
 206   int i;
 207
 208   if (lle1 == NULL || lle2 == NULL)
 209     return false;
 210   if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
 211     return false;
 212   if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
 213     return false;
 214   for (i = 0; i < depth; i++)
 215     if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
 216       return false;
 217   for (i = 0; i < invariants; i++)
 218     if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
 219         LLE_INVARIANT_COEFFICIENTS (lle2)[i])
 220       return false;
 221   return true;
 222 }
 223
 224 /* Create a new linear expression with dimension DIM, and total number
 225    of invariants INVARIANTS.  */
 226
 227 lambda_linear_expression
 228 lambda_linear_expression_new (int dim, int invariants)
 229 {
 230   lambda_linear_expression ret;
 231
 232   ret = ggc_alloc_cleared (sizeof (*ret));
 233
 234   LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
 235   LLE_CONSTANT (ret) = 0;
 236   LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
 237   LLE_DENOMINATOR (ret) = 1;
 238   LLE_NEXT (ret) = NULL;
 239
 240   return ret;
 241 }
 242
 243 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
 244    The starting letter used for variable names is START.  */
 245
 246 static void
 247 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
 248                          char start)
 249 {
 250   int i;
 251   bool first = true;
 252   for (i = 0; i < size; i++)
 253     {
 254       if (expr[i] != 0)
 255         {
 256           if (first)
 257             {
 258               if (expr[i] < 0)
 259                 fprintf (outfile, "-");
 260               first = false;
 261             }
 262           else if (expr[i] > 0)
 263             fprintf (outfile, " + ");
 264           else
 265             fprintf (outfile, " - ");
 266           if (abs (expr[i]) == 1)
 267             fprintf (outfile, "%c", start + i);
 268           else
 269             fprintf (outfile, "%d%c", abs (expr[i]), start + i);
 270         }
 271     }
 272 }
 273
 274 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
 275    depth/number of coefficients is given by DEPTH, the number of invariants is
 276    given by INVARIANTS, and the character to start variable names with is given
 277    by START.  */
 278
 279 void
 280 print_lambda_linear_expression (FILE * outfile,
 281                                 lambda_linear_expression expr,
 282                                 int depth, int invariants, char start)
 283 {
 284   fprintf (outfile, "\tLinear expression: ");
 285   print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
 286   fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
 287   fprintf (outfile, "  invariants: ");
 288   print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
 289                            invariants, 'A');
 290   fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
 291 }
 292
 293 /* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
 294    coefficients is given by DEPTH, the number of invariants is
 295    given by INVARIANTS, and the character to start variable names with is given
 296    by START.  */
 297
 298 void
 299 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
 300                    int invariants, char start)
 301 {
 302   int step;
 303   lambda_linear_expression expr;
 304
 305   gcc_assert (loop);
 306
 307   expr = LL_LINEAR_OFFSET (loop);
 308   step = LL_STEP (loop);
 309   fprintf (outfile, "  step size = %d \n", step);
 310
 311   if (expr)
 312     {
 313       fprintf (outfile, "  linear offset: \n");
 314       print_lambda_linear_expression (outfile, expr, depth, invariants,
 315                                       start);
 316     }
 317
 318   fprintf (outfile, "  lower bound: \n");
 319   for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 320     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 321   fprintf (outfile, "  upper bound: \n");
 322   for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
 323     print_lambda_linear_expression (outfile, expr, depth, invariants, start);
 324 }
 325
 326 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
 327    number of invariants.  */
 328
 329 lambda_loopnest
 330 lambda_loopnest_new (int depth, int invariants)
 331 {
 332   lambda_loopnest ret;
 333   ret = ggc_alloc (sizeof (*ret));
 334
 335   LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
 336   LN_DEPTH (ret) = depth;
 337   LN_INVARIANTS (ret) = invariants;
 338
 339   return ret;
 340 }
 341
 342 /* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
 343    character to use for loop names is given by START.  */
 344
 345 void
 346 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
 347 {
 348   int i;
 349   for (i = 0; i < LN_DEPTH (nest); i++)
 350     {
 351       fprintf (outfile, "Loop %c\n", start + i);
 352       print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
 353                          LN_INVARIANTS (nest), 'i');
 354       fprintf (outfile, "\n");
 355     }
 356 }
 357
 358 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
 359    of invariants.  */
 360
 361 static lambda_lattice
 362 lambda_lattice_new (int depth, int invariants)
 363 {
 364   lambda_lattice ret;
 365   ret = ggc_alloc (sizeof (*ret));
 366   LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
 367   LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
 368   LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
 369   LATTICE_DIMENSION (ret) = depth;
 370   LATTICE_INVARIANTS (ret) = invariants;
 371   return ret;
 372 }
 373
 374 /* Compute the lattice base for NEST.  The lattice base is essentially a
 375    non-singular transform from a dense base space to a sparse iteration space.
 376    We use it so that we don't have to specially handle the case of a sparse
 377    iteration space in other parts of the algorithm.  As a result, this routine
 378    only does something interesting (IE produce a matrix that isn't the
 379    identity matrix) if NEST is a sparse space.  */
 380
 381 static lambda_lattice
 382 lambda_lattice_compute_base (lambda_loopnest nest)
 383 {
 384   lambda_lattice ret;
 385   int depth, invariants;
 386   lambda_matrix base;
 387
 388   int i, j, step;
 389   lambda_loop loop;
 390   lambda_linear_expression expression;
 391
 392   depth = LN_DEPTH (nest);
 393   invariants = LN_INVARIANTS (nest);
 394
 395   ret = lambda_lattice_new (depth, invariants);
 396   base = LATTICE_BASE (ret);
 397   for (i = 0; i < depth; i++)
 398     {
 399       loop = LN_LOOPS (nest)[i];
 400       gcc_assert (loop);
 401       step = LL_STEP (loop);
 402       /* If we have a step of 1, then the base is one, and the
 403          origin and invariant coefficients are 0.  */
 404       if (step == 1)
 405         {
 406           for (j = 0; j < depth; j++)
 407             base[i][j] = 0;
 408           base[i][i] = 1;
 409           LATTICE_ORIGIN (ret)[i] = 0;
 410           for (j = 0; j < invariants; j++)
 411             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
 412         }
 413       else
 414         {
 415           /* Otherwise, we need the lower bound expression (which must
 416              be an affine function)  to determine the base.  */
 417           expression = LL_LOWER_BOUND (loop);
 418           gcc_assert (expression && !LLE_NEXT (expression)
 419                       && LLE_DENOMINATOR (expression) == 1);
 420
 421           /* The lower triangular portion of the base is going to be the
 422              coefficient times the step */
 423           for (j = 0; j < i; j++)
 424             base[i][j] = LLE_COEFFICIENTS (expression)[j]
 425               * LL_STEP (LN_LOOPS (nest)[j]);
 426           base[i][i] = step;
 427           for (j = i + 1; j < depth; j++)
 428             base[i][j] = 0;
 429
 430           /* Origin for this loop is the constant of the lower bound
 431              expression.  */
 432           LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
 433
 434           /* Coefficient for the invariants are equal to the invariant
 435              coefficients in the expression.  */
 436           for (j = 0; j < invariants; j++)
 437             LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
 438               LLE_INVARIANT_COEFFICIENTS (expression)[j];
 439         }
 440     }
 441   return ret;
 442 }
 443
 444 /* Compute the greatest common denominator of two numbers (A and B) using
 445    Euclid's algorithm.  */
 446
 447 static int
 448 gcd (int a, int b)
 449 {
 450
 451   int x, y, z;
 452
 453   x = abs (a);
 454   y = abs (b);
 455
 456   while (x > 0)
 457     {
 458       z = y % x;
 459       y = x;
 460       x = z;
 461     }
 462
 463   return (y);
 464 }
 465
 466 /* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
 467
 468 static int
 469 gcd_vector (lambda_vector vector, int size)
 470 {
 471   int i;
 472   int gcd1 = 0;
 473
 474   if (size > 0)
 475     {
 476       gcd1 = vector[0];
 477       for (i = 1; i < size; i++)
 478         gcd1 = gcd (gcd1, vector[i]);
 479     }
 480   return gcd1;
 481 }
 482
 483 /* Compute the least common multiple of two numbers A and B .  */
 484
 485 static int
 486 lcm (int a, int b)
 487 {
 488   return (abs (a) * abs (b) / gcd (a, b));
 489 }
 490
 491 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
 492    auxillary nest.
 493    Fourier-Motzkin is a way of reducing systems of linear inequalities so that
 494    it is easy to calculate the answer and bounds.
 495    A sketch of how it works:
 496    Given a system of linear inequalities, ai * xj >= bk, you can always
 497    rewrite the constraints so they are all of the form
 498    a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
 499    in b1 ... bk, and some a in a1...ai)
 500    You can then eliminate this x from the non-constant inequalities by
 501    rewriting these as a <= b, x >= constant, and delete the x variable.
 502    You can then repeat this for any remaining x variables, and then we have
 503    an easy to use variable <= constant (or no variables at all) form that we
 504    can construct our bounds from.
 505
 506    In our case, each time we eliminate, we construct part of the bound from
 507    the ith variable, then delete the ith variable.
 508
 509    Remember the constant are in our vector a, our coefficient matrix is A,
 510    and our invariant coefficient matrix is B.
 511
 512    SIZE is the size of the matrices being passed.
 513    DEPTH is the loop nest depth.
 514    INVARIANTS is the number of loop invariants.
 515    A, B, and a are the coefficient matrix, invariant coefficient, and a
 516    vector of constants, respectively.  */
 517
 518 static lambda_loopnest
 519 compute_nest_using_fourier_motzkin (int size,
 520                                     int depth,
 521                                     int invariants,
 522                                     lambda_matrix A,
 523                                     lambda_matrix B,
 524                                     lambda_vector a)
 525 {
 526
 527   int multiple, f1, f2;
 528   int i, j, k;
 529   lambda_linear_expression expression;
 530   lambda_loop loop;
 531   lambda_loopnest auxillary_nest;
 532   lambda_matrix swapmatrix, A1, B1;
 533   lambda_vector swapvector, a1;
 534   int newsize;
 535
 536   A1 = lambda_matrix_new (128, depth);
 537   B1 = lambda_matrix_new (128, invariants);
 538   a1 = lambda_vector_new (128);
 539
 540   auxillary_nest = lambda_loopnest_new (depth, invariants);
 541
 542   for (i = depth - 1; i >= 0; i--)
 543     {
 544       loop = lambda_loop_new ();
 545       LN_LOOPS (auxillary_nest)[i] = loop;
 546       LL_STEP (loop) = 1;
 547
 548       for (j = 0; j < size; j++)
 549         {
 550           if (A[j][i] < 0)
 551             {
 552               /* Any linear expression in the matrix with a coefficient less
 553                  than 0 becomes part of the new lower bound.  */
 554               expression = lambda_linear_expression_new (depth, invariants);
 555
 556               for (k = 0; k < i; k++)
 557                 LLE_COEFFICIENTS (expression)[k] = A[j][k];
 558
 559               for (k = 0; k < invariants; k++)
 560                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
 561
 562               LLE_DENOMINATOR (expression) = -1 * A[j][i];
 563               LLE_CONSTANT (expression) = -1 * a[j];
 564
 565               /* Ignore if identical to the existing lower bound.  */
 566               if (!lle_equal (LL_LOWER_BOUND (loop),
 567                               expression, depth, invariants))
 568                 {
 569                   LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
 570                   LL_LOWER_BOUND (loop) = expression;
 571                 }
 572
 573             }
 574           else if (A[j][i] > 0)
 575             {
 576               /* Any linear expression with a coefficient greater than 0
 577                  becomes part of the new upper bound.  */
 578               expression = lambda_linear_expression_new (depth, invariants);
 579               for (k = 0; k < i; k++)
 580                 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
 581
 582               for (k = 0; k < invariants; k++)
 583                 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
 584
 585               LLE_DENOMINATOR (expression) = A[j][i];
 586               LLE_CONSTANT (expression) = a[j];
 587
 588               /* Ignore if identical to the existing upper bound.  */
 589               if (!lle_equal (LL_UPPER_BOUND (loop),
 590                               expression, depth, invariants))
 591                 {
 592                   LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
 593                   LL_UPPER_BOUND (loop) = expression;
 594                 }
 595
 596             }
 597         }
 598
 599       /* This portion creates a new system of linear inequalities by deleting
 600          the i'th variable, reducing the system by one variable.  */
 601       newsize = 0;
 602       for (j = 0; j < size; j++)
 603         {
 604           /* If the coefficient for the i'th variable is 0, then we can just
 605              eliminate the variable straightaway.  Otherwise, we have to
 606              multiply through by the coefficients we are eliminating.  */
 607           if (A[j][i] == 0)
 608             {
 609               lambda_vector_copy (A[j], A1[newsize], depth);
 610               lambda_vector_copy (B[j], B1[newsize], invariants);
 611               a1[newsize] = a[j];
 612               newsize++;
 613             }
 614           else if (A[j][i] > 0)
 615             {
 616               for (k = 0; k < size; k++)
 617                 {
 618                   if (A[k][i] < 0)
 619                     {
 620                       multiple = lcm (A[j][i], A[k][i]);
 621                       f1 = multiple / A[j][i];
 622                       f2 = -1 * multiple / A[k][i];
 623
 624                       lambda_vector_add_mc (A[j], f1, A[k], f2,
 625                                             A1[newsize], depth);
 626                       lambda_vector_add_mc (B[j], f1, B[k], f2,
 627                                             B1[newsize], invariants);
 628                       a1[newsize] = f1 * a[j] + f2 * a[k];
 629                       newsize++;
 630                     }
 631                 }
 632             }
 633         }
 634
 635       swapmatrix = A;
 636       A = A1;
 637       A1 = swapmatrix;
 638
 639       swapmatrix = B;
 640       B = B1;
 641       B1 = swapmatrix;
 642
 643       swapvector = a;
 644       a = a1;
 645       a1 = swapvector;
 646
 647       size = newsize;
 648     }
 649
 650   return auxillary_nest;
 651 }
 652
 653 /* Compute the loop bounds for the auxiliary space NEST.
 654    Input system used is Ax <= b.  TRANS is the unimodular transformation.
 655    Given the original nest, this function will
 656    1. Convert the nest into matrix form, which consists of a matrix for the
 657    coefficients, a matrix for the
 658    invariant coefficients, and a vector for the constants.
 659    2. Use the matrix form to calculate the lattice base for the nest (which is
 660    a dense space)
 661    3. Compose the dense space transform with the user specified transform, to
 662    get a transform we can easily calculate transformed bounds for.
 663    4. Multiply the composed transformation matrix times the matrix form of the
 664    loop.
 665    5. Transform the newly created matrix (from step 4) back into a loop nest
 666    using fourier motzkin elimination to figure out the bounds.  */
 667
 668 static lambda_loopnest
 669 lambda_compute_auxillary_space (lambda_loopnest nest,
 670                                 lambda_trans_matrix trans)
 671 {
 672   lambda_matrix A, B, A1, B1;
 673   lambda_vector a, a1;
 674   lambda_matrix invertedtrans;
 675   int determinant, depth, invariants, size;
 676   int i, j;
 677   lambda_loop loop;
 678   lambda_linear_expression expression;
 679   lambda_lattice lattice;
 680
 681   depth = LN_DEPTH (nest);
 682   invariants = LN_INVARIANTS (nest);
 683
 684   /* Unfortunately, we can't know the number of constraints we'll have
 685      ahead of time, but this should be enough even in ridiculous loop nest
 686      cases. We abort if we go over this limit.  */
 687   A = lambda_matrix_new (128, depth);
 688   B = lambda_matrix_new (128, invariants);
 689   a = lambda_vector_new (128);
 690
 691   A1 = lambda_matrix_new (128, depth);
 692   B1 = lambda_matrix_new (128, invariants);
 693   a1 = lambda_vector_new (128);
 694
 695   /* Store the bounds in the equation matrix A, constant vector a, and
 696      invariant matrix B, so that we have Ax <= a + B.
 697      This requires a little equation rearranging so that everything is on the
 698      correct side of the inequality.  */
 699   size = 0;
 700   for (i = 0; i < depth; i++)
 701     {
 702       loop = LN_LOOPS (nest)[i];
 703
 704       /* First we do the lower bound.  */
 705       if (LL_STEP (loop) > 0)
 706         expression = LL_LOWER_BOUND (loop);
 707       else
 708         expression = LL_UPPER_BOUND (loop);
 709
 710       for (; expression != NULL; expression = LLE_NEXT (expression))
 711         {
 712           /* Fill in the coefficient.  */
 713           for (j = 0; j < i; j++)
 714             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 715
 716           /* And the invariant coefficient.  */
 717           for (j = 0; j < invariants; j++)
 718             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 719
 720           /* And the constant.  */
 721           a[size] = LLE_CONSTANT (expression);
 722
 723           /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
 724              constants and single variables on   */
 725           A[size][i] = -1 * LLE_DENOMINATOR (expression);
 726           a[size] *= -1;
 727           for (j = 0; j < invariants; j++)
 728             B[size][j] *= -1;
 729
 730           size++;
 731           /* Need to increase matrix sizes above.  */
 732           gcc_assert (size <= 127);
 733
 734         }
 735
 736       /* Then do the exact same thing for the upper bounds.  */
 737       if (LL_STEP (loop) > 0)
 738         expression = LL_UPPER_BOUND (loop);
 739       else
 740         expression = LL_LOWER_BOUND (loop);
 741
 742       for (; expression != NULL; expression = LLE_NEXT (expression))
 743         {
 744           /* Fill in the coefficient.  */
 745           for (j = 0; j < i; j++)
 746             A[size][j] = LLE_COEFFICIENTS (expression)[j];
 747
 748           /* And the invariant coefficient.  */
 749           for (j = 0; j < invariants; j++)
 750             B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
 751
 752           /* And the constant.  */
 753           a[size] = LLE_CONSTANT (expression);
 754
 755           /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
 756           for (j = 0; j < i; j++)
 757             A[size][j] *= -1;
 758           A[size][i] = LLE_DENOMINATOR (expression);
 759           size++;
 760           /* Need to increase matrix sizes above.  */
 761           gcc_assert (size <= 127);
 762
 763         }
 764     }
 765
 766   /* Compute the lattice base x = base * y + origin, where y is the
 767      base space.  */
 768   lattice = lambda_lattice_compute_base (nest);
 769
 770   /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
 771
 772   /* A1 = A * L */
 773   lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
 774
 775   /* a1 = a - A * origin constant.  */
 776   lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
 777   lambda_vector_add_mc (a, 1, a1, -1, a1, size);
 778
 779   /* B1 = B - A * origin invariant.  */
 780   lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
 781                       invariants);
 782   lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
 783
 784   /* Now compute the auxiliary space bounds by first inverting U, multiplying
 785      it by A1, then performing fourier motzkin.  */
 786
 787   invertedtrans = lambda_matrix_new (depth, depth);
 788
 789   /* Compute the inverse of U.  */
 790   determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
 791                                        invertedtrans, depth);
 792
 793   /* A = A1 inv(U).  */
 794   lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
 795
 796   return compute_nest_using_fourier_motzkin (size, depth, invariants,
 797                                              A, B1, a1);
 798 }
 799
 800 /* Compute the loop bounds for the target space, using the bounds of
 801    the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
 802    The target space loop bounds are computed by multiplying the triangular
 803    matrix H by the auxillary nest, to get the new loop bounds.  The sign of
 804    the loop steps (positive or negative) is then used to swap the bounds if
 805    the loop counts downwards.
 806    Return the target loopnest.  */
 807
 808 static lambda_loopnest
 809 lambda_compute_target_space (lambda_loopnest auxillary_nest,
 810                              lambda_trans_matrix H, lambda_vector stepsigns)
 811 {
 812   lambda_matrix inverse, H1;
 813   int determinant, i, j;
 814   int gcd1, gcd2;
 815   int factor;
 816
 817   lambda_loopnest target_nest;
 818   int depth, invariants;
 819   lambda_matrix target;
 820
 821   lambda_loop auxillary_loop, target_loop;
 822   lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
 823
 824   depth = LN_DEPTH (auxillary_nest);
 825   invariants = LN_INVARIANTS (auxillary_nest);
 826
 827   inverse = lambda_matrix_new (depth, depth);
 828   determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
 829
 830   /* H1 is H excluding its diagonal.  */
 831   H1 = lambda_matrix_new (depth, depth);
 832   lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
 833
 834   for (i = 0; i < depth; i++)
 835     H1[i][i] = 0;
 836
 837   /* Computes the linear offsets of the loop bounds.  */
 838   target = lambda_matrix_new (depth, depth);
 839   lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
 840
 841   target_nest = lambda_loopnest_new (depth, invariants);
 842
 843   for (i = 0; i < depth; i++)
 844     {
 845
 846       /* Get a new loop structure.  */
 847       target_loop = lambda_loop_new ();
 848       LN_LOOPS (target_nest)[i] = target_loop;
 849
 850       /* Computes the gcd of the coefficients of the linear part.  */
 851       gcd1 = gcd_vector (target[i], i);
 852
 853       /* Include the denominator in the GCD.  */
 854       gcd1 = gcd (gcd1, determinant);
 855
 856       /* Now divide through by the gcd.  */
 857       for (j = 0; j < i; j++)
 858         target[i][j] = target[i][j] / gcd1;
 859
 860       expression = lambda_linear_expression_new (depth, invariants);
 861       lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
 862       LLE_DENOMINATOR (expression) = determinant / gcd1;
 863       LLE_CONSTANT (expression) = 0;
 864       lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
 865                            invariants);
 866       LL_LINEAR_OFFSET (target_loop) = expression;
 867     }
 868
 869   /* For each loop, compute the new bounds from H.  */
 870   for (i = 0; i < depth; i++)
 871     {
 872       auxillary_loop = LN_LOOPS (auxillary_nest)[i];
 873       target_loop = LN_LOOPS (target_nest)[i];
 874       LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
 875       factor = LTM_MATRIX (H)[i][i];
 876
 877       /* First we do the lower bound.  */
 878       auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
 879
 880       for (; auxillary_expr != NULL;
 881            auxillary_expr = LLE_NEXT (auxillary_expr))
 882         {
 883           target_expr = lambda_linear_expression_new (depth, invariants);
 884           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 885                                      depth, inverse, depth,
 886                                      LLE_COEFFICIENTS (target_expr));
 887           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 888                                     LLE_COEFFICIENTS (target_expr), depth,
 889                                     factor);
 890
 891           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 892           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 893                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 894                               invariants);
 895           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 896                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 897                                     invariants, factor);
 898           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 899
 900           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 901             {
 902               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 903                 * determinant;
 904               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 905                                         (target_expr),
 906                                         LLE_INVARIANT_COEFFICIENTS
 907                                         (target_expr), invariants,
 908                                         determinant);
 909               LLE_DENOMINATOR (target_expr) =
 910                 LLE_DENOMINATOR (target_expr) * determinant;
 911             }
 912           /* Find the gcd and divide by it here, rather than doing it
 913              at the tree level.  */
 914           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 915           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 916                              invariants);
 917           gcd1 = gcd (gcd1, gcd2);
 918           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 919           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 920           for (j = 0; j < depth; j++)
 921             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 922           for (j = 0; j < invariants; j++)
 923             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 924           LLE_CONSTANT (target_expr) /= gcd1;
 925           LLE_DENOMINATOR (target_expr) /= gcd1;
 926           /* Ignore if identical to existing bound.  */
 927           if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
 928                           invariants))
 929             {
 930               LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
 931               LL_LOWER_BOUND (target_loop) = target_expr;
 932             }
 933         }
 934       /* Now do the upper bound.  */
 935       auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
 936
 937       for (; auxillary_expr != NULL;
 938            auxillary_expr = LLE_NEXT (auxillary_expr))
 939         {
 940           target_expr = lambda_linear_expression_new (depth, invariants);
 941           lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
 942                                      depth, inverse, depth,
 943                                      LLE_COEFFICIENTS (target_expr));
 944           lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
 945                                     LLE_COEFFICIENTS (target_expr), depth,
 946                                     factor);
 947           LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
 948           lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
 949                               LLE_INVARIANT_COEFFICIENTS (target_expr),
 950                               invariants);
 951           lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
 952                                     LLE_INVARIANT_COEFFICIENTS (target_expr),
 953                                     invariants, factor);
 954           LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
 955
 956           if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
 957             {
 958               LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
 959                 * determinant;
 960               lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
 961                                         (target_expr),
 962                                         LLE_INVARIANT_COEFFICIENTS
 963                                         (target_expr), invariants,
 964                                         determinant);
 965               LLE_DENOMINATOR (target_expr) =
 966                 LLE_DENOMINATOR (target_expr) * determinant;
 967             }
 968           /* Find the gcd and divide by it here, instead of at the
 969              tree level.  */
 970           gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
 971           gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
 972                              invariants);
 973           gcd1 = gcd (gcd1, gcd2);
 974           gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
 975           gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
 976           for (j = 0; j < depth; j++)
 977             LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
 978           for (j = 0; j < invariants; j++)
 979             LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
 980           LLE_CONSTANT (target_expr) /= gcd1;
 981           LLE_DENOMINATOR (target_expr) /= gcd1;
 982           /* Ignore if equal to existing bound.  */
 983           if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
 984                           invariants))
 985             {
 986               LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
 987               LL_UPPER_BOUND (target_loop) = target_expr;
 988             }
 989         }
 990     }
 991   for (i = 0; i < depth; i++)
 992     {
 993       target_loop = LN_LOOPS (target_nest)[i];
 994       /* If necessary, exchange the upper and lower bounds and negate
 995          the step size.  */
 996       if (stepsigns[i] < 0)
 997         {
 998           LL_STEP (target_loop) *= -1;
 999           tmp_expr = LL_LOWER_BOUND (target_loop);
1000           LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1001           LL_UPPER_BOUND (target_loop) = tmp_expr;
1002         }
1003     }
1004   return target_nest;
1005 }
1006
1007 /* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1008    result.  */
1009
1010 static lambda_vector
1011 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1012 {
1013   lambda_matrix matrix, H;
1014   int size;
1015   lambda_vector newsteps;
1016   int i, j, factor, minimum_column;
1017   int temp;
1018
1019   matrix = LTM_MATRIX (trans);
1020   size = LTM_ROWSIZE (trans);
1021   H = lambda_matrix_new (size, size);
1022
1023   newsteps = lambda_vector_new (size);
1024   lambda_vector_copy (stepsigns, newsteps, size);
1025
1026   lambda_matrix_copy (matrix, H, size, size);
1027
1028   for (j = 0; j < size; j++)
1029     {
1030       lambda_vector row;
1031       row = H[j];
1032       for (i = j; i < size; i++)
1033         if (row[i] < 0)
1034           lambda_matrix_col_negate (H, size, i);
1035       while (lambda_vector_first_nz (row, size, j + 1) < size)
1036         {
1037           minimum_column = lambda_vector_min_nz (row, size, j);
1038           lambda_matrix_col_exchange (H, size, j, minimum_column);
1039
1040           temp = newsteps[j];
1041           newsteps[j] = newsteps[minimum_column];
1042           newsteps[minimum_column] = temp;
1043
1044           for (i = j + 1; i < size; i++)
1045             {
1046               factor = row[i] / row[j];
1047               lambda_matrix_col_add (H, size, j, i, -1 * factor);
1048             }
1049         }
1050     }
1051   return newsteps;
1052 }
1053
1054 /* Transform NEST according to TRANS, and return the new loopnest.
1055    This involves
1056    1. Computing a lattice base for the transformation
1057    2. Composing the dense base with the specified transformation (TRANS)
1058    3. Decomposing the combined transformation into a lower triangular portion,
1059    and a unimodular portion.
1060    4. Computing the auxillary nest using the unimodular portion.
1061    5. Computing the target nest using the auxillary nest and the lower
1062    triangular portion.  */
1063
1064 lambda_loopnest
1065 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1066 {
1067   lambda_loopnest auxillary_nest, target_nest;
1068
1069   int depth, invariants;
1070   int i, j;
1071   lambda_lattice lattice;
1072   lambda_trans_matrix trans1, H, U;
1073   lambda_loop loop;
1074   lambda_linear_expression expression;
1075   lambda_vector origin;
1076   lambda_matrix origin_invariants;
1077   lambda_vector stepsigns;
1078   int f;
1079
1080   depth = LN_DEPTH (nest);
1081   invariants = LN_INVARIANTS (nest);
1082
1083   /* Keep track of the signs of the loop steps.  */
1084   stepsigns = lambda_vector_new (depth);
1085   for (i = 0; i < depth; i++)
1086     {
1087       if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1088         stepsigns[i] = 1;
1089       else
1090         stepsigns[i] = -1;
1091     }
1092
1093   /* Compute the lattice base.  */
1094   lattice = lambda_lattice_compute_base (nest);
1095   trans1 = lambda_trans_matrix_new (depth, depth);
1096
1097   /* Multiply the transformation matrix by the lattice base.  */
1098
1099   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1100                       LTM_MATRIX (trans1), depth, depth, depth);
1101
1102   /* Compute the Hermite normal form for the new transformation matrix.  */
1103   H = lambda_trans_matrix_new (depth, depth);
1104   U = lambda_trans_matrix_new (depth, depth);
1105   lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1106                          LTM_MATRIX (U));
1107
1108   /* Compute the auxiliary loop nest's space from the unimodular
1109      portion.  */
1110   auxillary_nest = lambda_compute_auxillary_space (nest, U);
1111
1112   /* Compute the loop step signs from the old step signs and the
1113      transformation matrix.  */
1114   stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1115
1116   /* Compute the target loop nest space from the auxiliary nest and
1117      the lower triangular matrix H.  */
1118   target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1119   origin = lambda_vector_new (depth);
1120   origin_invariants = lambda_matrix_new (depth, invariants);
1121   lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1122                              LATTICE_ORIGIN (lattice), origin);
1123   lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1124                       origin_invariants, depth, depth, invariants);
1125
1126   for (i = 0; i < depth; i++)
1127     {
1128       loop = LN_LOOPS (target_nest)[i];
1129       expression = LL_LINEAR_OFFSET (loop);
1130       if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1131         f = 1;
1132       else
1133         f = LLE_DENOMINATOR (expression);
1134
1135       LLE_CONSTANT (expression) += f * origin[i];
1136
1137       for (j = 0; j < invariants; j++)
1138         LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1139           f * origin_invariants[i][j];
1140     }
1141
1142   return target_nest;
1143
1144 }
1145
1146 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1147    return the new expression.  DEPTH is the depth of the loopnest.
1148    OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1149    in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1150    is the amount we have to add/subtract from the expression because of the
1151    type of comparison it is used in.  */
1152
1153 static lambda_linear_expression
1154 gcc_tree_to_linear_expression (int depth, tree expr,
1155                                VEC(tree) *outerinductionvars,
1156                                VEC(tree) *invariants, int extra)
1157 {
1158   lambda_linear_expression lle = NULL;
1159   switch (TREE_CODE (expr))
1160     {
1161     case INTEGER_CST:
1162       {
1163         lle = lambda_linear_expression_new (depth, 2 * depth);
1164         LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1165         if (extra != 0)
1166           LLE_CONSTANT (lle) += extra;
1167
1168         LLE_DENOMINATOR (lle) = 1;
1169       }
1170       break;
1171     case SSA_NAME:
1172       {
1173         tree iv, invar;
1174         size_t i;
1175         for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1176           if (iv != NULL)
1177             {
1178               if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1179                 {
1180                   lle = lambda_linear_expression_new (depth, 2 * depth);
1181                   LLE_COEFFICIENTS (lle)[i] = 1;
1182                   if (extra != 0)
1183                     LLE_CONSTANT (lle) = extra;
1184
1185                   LLE_DENOMINATOR (lle) = 1;
1186                 }
1187             }
1188         for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1189           if (invar != NULL)
1190             {
1191               if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1192                 {
1193                   lle = lambda_linear_expression_new (depth, 2 * depth);
1194                   LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1195                   if (extra != 0)
1196                     LLE_CONSTANT (lle) = extra;
1197                   LLE_DENOMINATOR (lle) = 1;
1198                 }
1199             }
1200       }
1201       break;
1202     default:
1203       return NULL;
1204     }
1205
1206   return lle;
1207 }
1208
1209 /* Return the depth of the loopnest NEST */
1210
1211 static int
1212 depth_of_nest (struct loop *nest)
1213 {
1214   size_t depth = 0;
1215   while (nest)
1216     {
1217       depth++;
1218       nest = nest->inner;
1219     }
1220   return depth;
1221 }
1222
1223
1224 /* Return true if OP is invariant in LOOP and all outer loops.  */
1225
1226 static bool
1227 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1228 {
1229   if (is_gimple_min_invariant (op))
1230     return true;
1231   if (loop->depth == 0)
1232     return true;
1233   if (!expr_invariant_in_loop_p (loop, op))
1234     return false;
1235   if (loop->outer
1236       && !invariant_in_loop_and_outer_loops (loop->outer, op))
1237     return false;
1238   return true;
1239 }
1240
1241 /* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1242    or NULL if it could not be converted.
1243    DEPTH is the depth of the loop.
1244    INVARIANTS is a pointer to the array of loop invariants.
1245    The induction variable for this loop should be stored in the parameter
1246    OURINDUCTIONVAR.
1247    OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1248
1249 static lambda_loop
1250 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1251                          VEC (tree) ** invariants,
1252                          tree * ourinductionvar,
1253                          VEC (tree) * outerinductionvars,
1254                          VEC (tree) ** lboundvars,
1255                          VEC (tree) ** uboundvars,
1256                          VEC (int) ** steps)
1257 {
1258   tree phi;
1259   tree exit_cond;
1260   tree access_fn, inductionvar;
1261   tree step;
1262   lambda_loop lloop = NULL;
1263   lambda_linear_expression lbound, ubound;
1264   tree test;
1265   int stepint;
1266   int extra = 0;
1267   tree lboundvar, uboundvar, uboundresult;
1268   use_optype uses;
1269
1270   /* Find out induction var and exit condition.  */
1271   inductionvar = find_induction_var_from_exit_cond (loop);
1272   exit_cond = get_loop_exit_condition (loop);
1273
1274   if (inductionvar == NULL || exit_cond == NULL)
1275     {
1276       if (dump_file && (dump_flags & TDF_DETAILS))
1277         fprintf (dump_file,
1278                  "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1279       return NULL;
1280     }
1281
1282   test = TREE_OPERAND (exit_cond, 0);
1283
1284   if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1285     {
1286
1287       if (dump_file && (dump_flags & TDF_DETAILS))
1288         fprintf (dump_file,
1289                  "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290
1291       return NULL;
1292     }
1293
1294   phi = SSA_NAME_DEF_STMT (inductionvar);
1295   if (TREE_CODE (phi) != PHI_NODE)
1296     {
1297       get_stmt_operands (phi);
1298       uses = STMT_USE_OPS (phi);
1299
1300       if (!uses)
1301         {
1302
1303           if (dump_file && (dump_flags & TDF_DETAILS))
1304             fprintf (dump_file,
1305                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1306
1307           return NULL;
1308         }
1309
1310       phi = USE_OP (uses, 0);
1311       phi = SSA_NAME_DEF_STMT (phi);
1312       if (TREE_CODE (phi) != PHI_NODE)
1313         {
1314
1315           if (dump_file && (dump_flags & TDF_DETAILS))
1316             fprintf (dump_file,
1317                      "Unable to convert loop: Cannot find PHI node for induction variable\n");
1318           return NULL;
1319         }
1320
1321     }
1322
1323   /* The induction variable name/version we want to put in the array is the
1324      result of the induction variable phi node.  */
1325   *ourinductionvar = PHI_RESULT (phi);
1326   access_fn = instantiate_parameters
1327     (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1328   if (access_fn == chrec_dont_know)
1329     {
1330       if (dump_file && (dump_flags & TDF_DETAILS))
1331         fprintf (dump_file,
1332                  "Unable to convert loop: Access function for induction variable phi is unknown\n");
1333
1334       return NULL;
1335     }
1336
1337   step = evolution_part_in_loop_num (access_fn, loop->num);
1338   if (!step || step == chrec_dont_know)
1339     {
1340       if (dump_file && (dump_flags & TDF_DETAILS))
1341         fprintf (dump_file,
1342                  "Unable to convert loop: Cannot determine step of loop.\n");
1343
1344       return NULL;
1345     }
1346   if (TREE_CODE (step) != INTEGER_CST)
1347     {
1348
1349       if (dump_file && (dump_flags & TDF_DETAILS))
1350         fprintf (dump_file,
1351                  "Unable to convert loop: Step of loop is not integer.\n");
1352       return NULL;
1353     }
1354
1355   stepint = TREE_INT_CST_LOW (step);
1356
1357   /* Only want phis for induction vars, which will have two
1358      arguments.  */
1359   if (PHI_NUM_ARGS (phi) != 2)
1360     {
1361       if (dump_file && (dump_flags & TDF_DETAILS))
1362         fprintf (dump_file,
1363                  "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1364       return NULL;
1365     }
1366
1367   /* Another induction variable check. One argument's source should be
1368      in the loop, one outside the loop.  */
1369   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1370       && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1371     {
1372
1373       if (dump_file && (dump_flags & TDF_DETAILS))
1374         fprintf (dump_file,
1375                  "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1376
1377       return NULL;
1378     }
1379
1380   if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1381     {
1382       lboundvar = PHI_ARG_DEF (phi, 1);
1383       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1384                                               outerinductionvars, *invariants,
1385                                               0);
1386     }
1387   else
1388     {
1389       lboundvar = PHI_ARG_DEF (phi, 0);
1390       lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1391                                               outerinductionvars, *invariants,
1392                                               0);
1393     }
1394
1395   if (!lbound)
1396     {
1397
1398       if (dump_file && (dump_flags & TDF_DETAILS))
1399         fprintf (dump_file,
1400                  "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1401
1402       return NULL;
1403     }
1404   /* One part of the test may be a loop invariant tree.  */
1405   if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1406       && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1407     VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1408   else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1409            && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1410     VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
1411
1412   /* The non-induction variable part of the test is the upper bound variable.
1413    */
1414   if (TREE_OPERAND (test, 0) == inductionvar)
1415     uboundvar = TREE_OPERAND (test, 1);
1416   else
1417     uboundvar = TREE_OPERAND (test, 0);
1418
1419
1420   /* We only size the vectors assuming we have, at max, 2 times as many
1421      invariants as we do loops (one for each bound).
1422      This is just an arbitrary number, but it has to be matched against the
1423      code below.  */
1424   gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1425
1426
1427   /* We might have some leftover.  */
1428   if (TREE_CODE (test) == LT_EXPR)
1429     extra = -1 * stepint;
1430   else if (TREE_CODE (test) == NE_EXPR)
1431     extra = -1 * stepint;
1432   else if (TREE_CODE (test) == GT_EXPR)
1433     extra = -1 * stepint;
1434   else if (TREE_CODE (test) == EQ_EXPR)
1435     extra = 1 * stepint;
1436
1437   ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1438                                           outerinductionvars,
1439                                           *invariants, extra);
1440   uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1441                         build_int_cst (TREE_TYPE (uboundvar), extra));
1442   VEC_safe_push (tree, *uboundvars, uboundresult);
1443   VEC_safe_push (tree, *lboundvars, lboundvar);
1444   VEC_safe_push (int, *steps, stepint);
1445   if (!ubound)
1446     {
1447       if (dump_file && (dump_flags & TDF_DETAILS))
1448         fprintf (dump_file,
1449                  "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1450       return NULL;
1451     }
1452
1453   lloop = lambda_loop_new ();
1454   LL_STEP (lloop) = stepint;
1455   LL_LOWER_BOUND (lloop) = lbound;
1456   LL_UPPER_BOUND (lloop) = ubound;
1457   return lloop;
1458 }
1459
1460 /* Given a LOOP, find the induction variable it is testing against in the exit
1461    condition.  Return the induction variable if found, NULL otherwise.  */
1462
1463 static tree
1464 find_induction_var_from_exit_cond (struct loop *loop)
1465 {
1466   tree expr = get_loop_exit_condition (loop);
1467   tree ivarop;
1468   tree test;
1469   if (expr == NULL_TREE)
1470     return NULL_TREE;
1471   if (TREE_CODE (expr) != COND_EXPR)
1472     return NULL_TREE;
1473   test = TREE_OPERAND (expr, 0);
1474   if (!COMPARISON_CLASS_P (test))
1475     return NULL_TREE;
1476
1477   /* Find the side that is invariant in this loop. The ivar must be the other
1478      side.  */
1479
1480   if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1481       ivarop = TREE_OPERAND (test, 1);
1482   else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1483       ivarop = TREE_OPERAND (test, 0);
1484   else
1485     return NULL_TREE;
1486
1487   if (TREE_CODE (ivarop) != SSA_NAME)
1488     return NULL_TREE;
1489   return ivarop;
1490 }
1491
1492 DEF_VEC_GC_P(lambda_loop);
1493 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1494    Return the new loop nest.
1495    INDUCTIONVARS is a pointer to an array of induction variables for the
1496    loopnest that will be filled in during this process.
1497    INVARIANTS is a pointer to an array of invariants that will be filled in
1498    during this process.  */
1499
1500 lambda_loopnest
1501 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1502                                  struct loop * loop_nest,
1503                                  VEC (tree) **inductionvars,
1504                                  VEC (tree) **invariants,
1505                                  bool need_perfect_nest)
1506 {
1507   lambda_loopnest ret;
1508   struct loop *temp;
1509   int depth = 0;
1510   size_t i;
1511   VEC (lambda_loop) *loops = NULL;
1512   VEC (tree) *uboundvars = NULL;
1513   VEC (tree) *lboundvars  = NULL;
1514   VEC (int) *steps = NULL;
1515   lambda_loop newloop;
1516   tree inductionvar = NULL;
1517
1518   depth = depth_of_nest (loop_nest);
1519   temp = loop_nest;
1520   while (temp)
1521     {
1522       newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1523                                          &inductionvar, *inductionvars,
1524                                          &lboundvars, &uboundvars,
1525                                          &steps);
1526       if (!newloop)
1527         return NULL;
1528       VEC_safe_push (tree, *inductionvars, inductionvar);
1529       VEC_safe_push (lambda_loop, loops, newloop);
1530       temp = temp->inner;
1531     }
1532   if (need_perfect_nest)
1533     {
1534       if (!perfect_nestify (currloops, loop_nest,
1535                             lboundvars, uboundvars, steps, *inductionvars))
1536         {
1537           if (dump_file)
1538             fprintf (dump_file, "Not a perfect loop nest and couldn't convert to one.\n");
1539           return NULL;
1540         }
1541       else if (dump_file)
1542         fprintf (dump_file, "Successfully converted loop nest to perfect loop nest.\n");
1543
1544
1545     }
1546   ret = lambda_loopnest_new (depth, 2 * depth);
1547   for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1548     LN_LOOPS (ret)[i] = newloop;
1549
1550   return ret;
1551
1552 }
1553
1554
1555 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1556    STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1557    inserted for us are stored.  INDUCTION_VARS is the array of induction
1558    variables for the loop this LBV is from.  TYPE is the tree type to use for
1559    the variables and trees involved.  */
1560
1561 static tree
1562 lbv_to_gcc_expression (lambda_body_vector lbv,
1563                        tree type, VEC (tree) *induction_vars,
1564                        tree * stmts_to_insert)
1565 {
1566   tree stmts, stmt, resvar, name;
1567   tree iv;
1568   size_t i;
1569   tree_stmt_iterator tsi;
1570
1571   /* Create a statement list and a linear expression temporary.  */
1572   stmts = alloc_stmt_list ();
1573   resvar = create_tmp_var (type, "lbvtmp");
1574   add_referenced_tmp_var (resvar);
1575
1576   /* Start at 0.  */
1577   stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1578   name = make_ssa_name (resvar, stmt);
1579   TREE_OPERAND (stmt, 0) = name;
1580   tsi = tsi_last (stmts);
1581   tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1582
1583   for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1584     {
1585       if (LBV_COEFFICIENTS (lbv)[i] != 0)
1586         {
1587           tree newname;
1588           tree coeffmult;
1589
1590           /* newname = coefficient * induction_variable */
1591           coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1592           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1593                         fold (build (MULT_EXPR, type, iv, coeffmult)));
1594
1595           newname = make_ssa_name (resvar, stmt);
1596           TREE_OPERAND (stmt, 0) = newname;
1597           fold_stmt (&stmt);
1598           tsi = tsi_last (stmts);
1599           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1600
1601           /* name = name + newname */
1602           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1603                         build (PLUS_EXPR, type, name, newname));
1604           name = make_ssa_name (resvar, stmt);
1605           TREE_OPERAND (stmt, 0) = name;
1606           fold_stmt (&stmt);
1607           tsi = tsi_last (stmts);
1608           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1609
1610         }
1611     }
1612
1613   /* Handle any denominator that occurs.  */
1614   if (LBV_DENOMINATOR (lbv) != 1)
1615     {
1616       tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1617       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1618                     build (CEIL_DIV_EXPR, type, name, denominator));
1619       name = make_ssa_name (resvar, stmt);
1620       TREE_OPERAND (stmt, 0) = name;
1621       fold_stmt (&stmt);
1622       tsi = tsi_last (stmts);
1623       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1624     }
1625   *stmts_to_insert = stmts;
1626   return name;
1627 }
1628
1629 /* Convert a linear expression from coefficient and constant form to a
1630    gcc tree.
1631    Return the tree that represents the final value of the expression.
1632    LLE is the linear expression to convert.
1633    OFFSET is the linear offset to apply to the expression.
1634    TYPE is the tree type to use for the variables and math.
1635    INDUCTION_VARS is a vector of induction variables for the loops.
1636    INVARIANTS is a vector of the loop nest invariants.
1637    WRAP specifies what tree code to wrap the results in, if there is more than
1638    one (it is either MAX_EXPR, or MIN_EXPR).
1639    STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1640    statements that need to be inserted for the linear expression.  */
1641
1642 static tree
1643 lle_to_gcc_expression (lambda_linear_expression lle,
1644                        lambda_linear_expression offset,
1645                        tree type,
1646                        VEC(tree) *induction_vars,
1647                        VEC(tree) *invariants,
1648                        enum tree_code wrap, tree * stmts_to_insert)
1649 {
1650   tree stmts, stmt, resvar, name;
1651   size_t i;
1652   tree_stmt_iterator tsi;
1653   tree iv, invar;
1654   VEC(tree) *results = NULL;
1655
1656   name = NULL_TREE;
1657   /* Create a statement list and a linear expression temporary.  */
1658   stmts = alloc_stmt_list ();
1659   resvar = create_tmp_var (type, "lletmp");
1660   add_referenced_tmp_var (resvar);
1661
1662   /* Build up the linear expressions, and put the variable representing the
1663      result in the results array.  */
1664   for (; lle != NULL; lle = LLE_NEXT (lle))
1665     {
1666       /* Start at name = 0.  */
1667       stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1668       name = make_ssa_name (resvar, stmt);
1669       TREE_OPERAND (stmt, 0) = name;
1670       fold_stmt (&stmt);
1671       tsi = tsi_last (stmts);
1672       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1673
1674       /* First do the induction variables.
1675          at the end, name = name + all the induction variables added
1676          together.  */
1677       for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1678         {
1679           if (LLE_COEFFICIENTS (lle)[i] != 0)
1680             {
1681               tree newname;
1682               tree mult;
1683               tree coeff;
1684
1685               /* mult = induction variable * coefficient.  */
1686               if (LLE_COEFFICIENTS (lle)[i] == 1)
1687                 {
1688                   mult = VEC_index (tree, induction_vars, i);
1689                 }
1690               else
1691                 {
1692                   coeff = build_int_cst (type,
1693                                          LLE_COEFFICIENTS (lle)[i]);
1694                   mult = fold (build (MULT_EXPR, type, iv, coeff));
1695                 }
1696
1697               /* newname = mult */
1698               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1699               newname = make_ssa_name (resvar, stmt);
1700               TREE_OPERAND (stmt, 0) = newname;
1701               fold_stmt (&stmt);
1702               tsi = tsi_last (stmts);
1703               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1704
1705               /* name = name + newname */
1706               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1707                             build (PLUS_EXPR, type, name, newname));
1708               name = make_ssa_name (resvar, stmt);
1709               TREE_OPERAND (stmt, 0) = name;
1710               fold_stmt (&stmt);
1711               tsi = tsi_last (stmts);
1712               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1713             }
1714         }
1715
1716       /* Handle our invariants.
1717          At the end, we have name = name + result of adding all multiplied
1718          invariants.  */
1719       for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1720         {
1721           if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1722             {
1723               tree newname;
1724               tree mult;
1725               tree coeff;
1726               int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1727               /* mult = invariant * coefficient  */
1728               if (invcoeff == 1)
1729                 {
1730                   mult = invar;
1731                 }
1732               else
1733                 {
1734                   coeff = build_int_cst (type, invcoeff);
1735                   mult = fold (build (MULT_EXPR, type, invar, coeff));
1736                 }
1737
1738               /* newname = mult */
1739               stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1740               newname = make_ssa_name (resvar, stmt);
1741               TREE_OPERAND (stmt, 0) = newname;
1742               fold_stmt (&stmt);
1743               tsi = tsi_last (stmts);
1744               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1745
1746               /* name = name + newname */
1747               stmt = build (MODIFY_EXPR, void_type_node, resvar,
1748                             build (PLUS_EXPR, type, name, newname));
1749               name = make_ssa_name (resvar, stmt);
1750               TREE_OPERAND (stmt, 0) = name;
1751               fold_stmt (&stmt);
1752               tsi = tsi_last (stmts);
1753               tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1754             }
1755         }
1756
1757       /* Now handle the constant.
1758          name = name + constant.  */
1759       if (LLE_CONSTANT (lle) != 0)
1760         {
1761           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1762                         build (PLUS_EXPR, type, name,
1763                                build_int_cst (type, LLE_CONSTANT (lle))));
1764           name = make_ssa_name (resvar, stmt);
1765           TREE_OPERAND (stmt, 0) = name;
1766           fold_stmt (&stmt);
1767           tsi = tsi_last (stmts);
1768           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1769         }
1770
1771       /* Now handle the offset.
1772          name = name + linear offset.  */
1773       if (LLE_CONSTANT (offset) != 0)
1774         {
1775           stmt = build (MODIFY_EXPR, void_type_node, resvar,
1776                         build (PLUS_EXPR, type, name,
1777                                build_int_cst (type, LLE_CONSTANT (offset))));
1778           name = make_ssa_name (resvar, stmt);
1779           TREE_OPERAND (stmt, 0) = name;
1780           fold_stmt (&stmt);
1781           tsi = tsi_last (stmts);
1782           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1783         }
1784
1785       /* Handle any denominator that occurs.  */
1786       if (LLE_DENOMINATOR (lle) != 1)
1787         {
1788           if (wrap == MAX_EXPR)
1789             stmt = build (MODIFY_EXPR, void_type_node, resvar,
1790                           build (CEIL_DIV_EXPR, type, name,
1791                                  build_int_cst (type, LLE_DENOMINATOR (lle))));
1792           else if (wrap == MIN_EXPR)
1793             stmt = build (MODIFY_EXPR, void_type_node, resvar,
1794                           build (FLOOR_DIV_EXPR, type, name,
1795                                  build_int_cst (type, LLE_DENOMINATOR (lle))));
1796           else
1797             gcc_unreachable();
1798
1799           /* name = {ceil, floor}(name/denominator) */
1800           name = make_ssa_name (resvar, stmt);
1801           TREE_OPERAND (stmt, 0) = name;
1802           tsi = tsi_last (stmts);
1803           tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1804         }
1805       VEC_safe_push (tree, results, name);
1806     }
1807
1808   /* Again, out of laziness, we don't handle this case yet.  It's not
1809      hard, it just hasn't occurred.  */
1810   gcc_assert (VEC_length (tree, results) <= 2);
1811
1812   /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1813   if (VEC_length (tree, results) > 1)
1814     {
1815       tree op1 = VEC_index (tree, results, 0);
1816       tree op2 = VEC_index (tree, results, 1);
1817       stmt = build (MODIFY_EXPR, void_type_node, resvar,
1818                     build (wrap, type, op1, op2));
1819       name = make_ssa_name (resvar, stmt);
1820       TREE_OPERAND (stmt, 0) = name;
1821       tsi = tsi_last (stmts);
1822       tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1823     }
1824
1825   *stmts_to_insert = stmts;
1826   return name;
1827 }
1828
1829 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1830    it, back into gcc code.  This changes the
1831    loops, their induction variables, and their bodies, so that they
1832    match the transformed loopnest.
1833    OLD_LOOPNEST is the loopnest before we've replaced it with the new
1834    loopnest.
1835    OLD_IVS is a vector of induction variables from the old loopnest.
1836    INVARIANTS is a vector of loop invariants from the old loopnest.
1837    NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1838    TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1839    NEW_LOOPNEST.  */
1840
1841 void
1842 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1843                                  VEC(tree) *old_ivs,
1844                                  VEC(tree) *invariants,
1845                                  lambda_loopnest new_loopnest,
1846                                  lambda_trans_matrix transform)
1847 {
1848
1849   struct loop *temp;
1850   size_t i = 0;
1851   size_t depth = 0;
1852   VEC(tree) *new_ivs = NULL;
1853   tree oldiv;
1854
1855   block_stmt_iterator bsi;
1856
1857   if (dump_file)
1858     {
1859       transform = lambda_trans_matrix_inverse (transform);
1860       fprintf (dump_file, "Inverse of transformation matrix:\n");
1861       print_lambda_trans_matrix (dump_file, transform);
1862     }
1863   depth = depth_of_nest (old_loopnest);
1864   temp = old_loopnest;
1865
1866   while (temp)
1867     {
1868       lambda_loop newloop;
1869       basic_block bb;
1870       edge exit;
1871       tree ivvar, ivvarinced, exitcond, stmts;
1872       enum tree_code testtype;
1873       tree newupperbound, newlowerbound;
1874       lambda_linear_expression offset;
1875       tree type;
1876       bool insert_after;
1877
1878       oldiv = VEC_index (tree, old_ivs, i);
1879       type = TREE_TYPE (oldiv);
1880
1881       /* First, build the new induction variable temporary  */
1882
1883       ivvar = create_tmp_var (type, "lnivtmp");
1884       add_referenced_tmp_var (ivvar);
1885
1886       VEC_safe_push (tree, new_ivs, ivvar);
1887
1888       newloop = LN_LOOPS (new_loopnest)[i];
1889
1890       /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1891          cases for now.  */
1892       offset = LL_LINEAR_OFFSET (newloop);
1893
1894       gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1895                   lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1896
1897       /* Now build the  new lower bounds, and insert the statements
1898          necessary to generate it on the loop preheader.  */
1899       newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1900                                              LL_LINEAR_OFFSET (newloop),
1901                                              type,
1902                                              new_ivs,
1903                                              invariants, MAX_EXPR, &stmts);
1904       bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1905       bsi_commit_edge_inserts ();
1906       /* Build the new upper bound and insert its statements in the
1907          basic block of the exit condition */
1908       newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1909                                              LL_LINEAR_OFFSET (newloop),
1910                                              type,
1911                                              new_ivs,
1912                                              invariants, MIN_EXPR, &stmts);
1913       exit = temp->single_exit;
1914       exitcond = get_loop_exit_condition (temp);
1915       bb = bb_for_stmt (exitcond);
1916       bsi = bsi_start (bb);
1917       bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1918
1919       /* Create the new iv.  */
1920
1921       standard_iv_increment_position (temp, &bsi, &insert_after);
1922       create_iv (newlowerbound,
1923                  build_int_cst (type, LL_STEP (newloop)),
1924                  ivvar, temp, &bsi, insert_after, &ivvar,
1925                  &ivvarinced);
1926
1927       /* Replace the exit condition with the new upper bound
1928          comparison.  */
1929
1930       testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1931
1932       /* We want to build a conditional where true means exit the loop, and
1933          false means continue the loop.
1934          So swap the testtype if this isn't the way things are.*/
1935
1936       if (exit->flags & EDGE_FALSE_VALUE)
1937         testtype = swap_tree_comparison (testtype);
1938
1939       COND_EXPR_COND (exitcond) = build (testtype,
1940                                          boolean_type_node,
1941                                          newupperbound, ivvarinced);
1942       modify_stmt (exitcond);
1943       VEC_replace (tree, new_ivs, i, ivvar);
1944
1945       i++;
1946       temp = temp->inner;
1947     }
1948
1949   /* Rewrite uses of the old ivs so that they are now specified in terms of
1950      the new ivs.  */
1951
1952   for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1953     {
1954       int j;
1955       dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv));
1956       for (j = 0; j < num_immediate_uses (imm); j++)
1957         {
1958           tree stmt = immediate_use (imm, j);
1959           use_operand_p use_p;
1960           ssa_op_iter iter;
1961           gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1962           FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1963             {
1964               if (USE_FROM_PTR (use_p) == oldiv)
1965                 {
1966                   tree newiv, stmts;
1967                   lambda_body_vector lbv, newlbv;
1968                   /* Compute the new expression for the induction
1969                      variable.  */
1970                   depth = VEC_length (tree, new_ivs);
1971                   lbv = lambda_body_vector_new (depth);
1972                   LBV_COEFFICIENTS (lbv)[i] = 1;
1973
1974                   newlbv = lambda_body_vector_compute_new (transform, lbv);
1975
1976                   newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1977                                                  new_ivs, &stmts);
1978                   bsi = bsi_for_stmt (stmt);
1979                   /* Insert the statements to build that
1980                      expression.  */
1981                   bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1982                   propagate_value (use_p, newiv);
1983                   modify_stmt (stmt);
1984
1985                 }
1986             }
1987         }
1988     }
1989 }
1990
1991
1992 /* Returns true when the vector V is lexicographically positive, in
1993    other words, when the first nonzero element is positive.  */
1994
1995 static bool
1996 lambda_vector_lexico_pos (lambda_vector v,
1997                           unsigned n)
1998 {
1999   unsigned i;
2000   for (i = 0; i < n; i++)
2001     {
2002       if (v[i] == 0)
2003         continue;
2004       if (v[i] < 0)
2005         return false;
2006       if (v[i] > 0)
2007         return true;
2008     }
2009   return true;
2010 }
2011
2012
2013 /* Return TRUE if this is not interesting statement from the perspective of
2014    determining if we have a perfect loop nest.  */
2015
2016 static bool
2017 not_interesting_stmt (tree stmt)
2018 {
2019   /* Note that COND_EXPR's aren't interesting because if they were exiting the
2020      loop, we would have already failed the number of exits tests.  */
2021   if (TREE_CODE (stmt) == LABEL_EXPR
2022       || TREE_CODE (stmt) == GOTO_EXPR
2023       || TREE_CODE (stmt) == COND_EXPR)
2024     return true;
2025   return false;
2026 }
2027
2028 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2029
2030 static bool
2031 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2032 {
2033   int i;
2034   for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2035     if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2036       if (PHI_ARG_DEF (phi, i) == def)
2037         return true;
2038   return false;
2039 }
2040
2041 /* Return TRUE if STMT is a use of PHI_RESULT.  */
2042
2043 static bool
2044 stmt_uses_phi_result (tree stmt, tree phi_result)
2045 {
2046   use_optype uses = STMT_USE_OPS (stmt);
2047
2048   /* This is conservatively true, because we only want SIMPLE bumpers
2049      of the form x +- constant for our pass.  */
2050   if (NUM_USES (uses) != 1)
2051     return false;
2052   if (USE_OP (uses, 0) == phi_result)
2053     return true;
2054
2055   return false;
2056 }
2057
2058 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2059    in-loop-edge in a phi node, and the operand it uses is the result of that
2060    phi node.
2061    I.E. i_29 = i_3 + 1
2062         i_3 = PHI (0, i_29);  */
2063
2064 static bool
2065 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2066 {
2067   tree use;
2068   tree def;
2069   def_optype defs = STMT_DEF_OPS (stmt);
2070   dataflow_t imm;
2071   int i;
2072
2073   if (NUM_DEFS (defs) != 1)
2074     return false;
2075   def = DEF_OP (defs, 0);
2076   imm = get_immediate_uses (stmt);
2077   for (i = 0; i < num_immediate_uses (imm); i++)
2078     {
2079       use = immediate_use (imm, i);
2080       if (TREE_CODE (use) == PHI_NODE)
2081         {
2082           if (phi_loop_edge_uses_def (loop, use, def))
2083             if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2084               return true;
2085         }
2086     }
2087   return false;
2088 }
2089
2090
2091 /* Return true if LOOP is a perfect loop nest.
2092    Perfect loop nests are those loop nests where all code occurs in the
2093    innermost loop body.
2094    If S is a program statement, then
2095
2096    i.e.
2097    DO I = 1, 20
2098        S1
2099        DO J = 1, 20
2100        ...
2101        END DO
2102    END DO
2103    is not a perfect loop nest because of S1.
2104
2105    DO I = 1, 20
2106       DO J = 1, 20
2107         S1
2108         ...
2109       END DO
2110    END DO
2111    is a perfect loop nest.
2112
2113    Since we don't have high level loops anymore, we basically have to walk our
2114    statements and ignore those that are there because the loop needs them (IE
2115    the induction variable increment, and jump back to the top of the loop).  */
2116
2117 bool
2118 perfect_nest_p (struct loop *loop)
2119 {
2120   basic_block *bbs;
2121   size_t i;
2122   tree exit_cond;
2123
2124   if (!loop->inner)
2125     return true;
2126   bbs = get_loop_body (loop);
2127   exit_cond = get_loop_exit_condition (loop);
2128   for (i = 0; i < loop->num_nodes; i++)
2129     {
2130       if (bbs[i]->loop_father == loop)
2131         {
2132           block_stmt_iterator bsi;
2133           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2134             {
2135               tree stmt = bsi_stmt (bsi);
2136               if (stmt == exit_cond
2137                   || not_interesting_stmt (stmt)
2138                   || stmt_is_bumper_for_loop (loop, stmt))
2139                 continue;
2140               free (bbs);
2141               return false;
2142             }
2143         }
2144     }
2145   free (bbs);
2146   /* See if the inner loops are perfectly nested as well.  */
2147   if (loop->inner)
2148     return perfect_nest_p (loop->inner);
2149   return true;
2150 }
2151
2152 /* Replace the USES of tree X in STMT with tree Y */
2153
2154 static void
2155 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2156 {
2157   use_optype uses = STMT_USE_OPS (stmt);
2158   size_t i;
2159   for (i = 0; i < NUM_USES (uses); i++)
2160     {
2161       if (USE_OP (uses, i) == x)
2162         SET_USE_OP (uses, i, y);
2163     }
2164 }
2165
2166 /* Return TRUE if STMT uses tree OP in it's uses.  */
2167
2168 static bool
2169 stmt_uses_op (tree stmt, tree op)
2170 {
2171   use_optype uses = STMT_USE_OPS (stmt);
2172   size_t i;
2173   for (i = 0; i < NUM_USES (uses); i++)
2174     {
2175       if (USE_OP (uses, i) == op)
2176         return true;
2177     }
2178   return false;
2179 }
2180
2181 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2182    one.  LOOPIVS is a vector of induction variables, one per loop.
2183    ATM, we only handle imperfect nests of depth 2, where all of the statements
2184    occur after the inner loop.  */
2185
2186 static bool
2187 can_convert_to_perfect_nest (struct loop *loop,
2188                              VEC (tree) *loopivs)
2189 {
2190   basic_block *bbs;
2191   tree exit_condition, phi;
2192   size_t i;
2193   block_stmt_iterator bsi;
2194   basic_block exitdest;
2195
2196   /* Can't handle triply nested+ loops yet.  */
2197   if (!loop->inner || loop->inner->inner)
2198     return false;
2199
2200   /* We only handle moving the after-inner-body statements right now, so make
2201      sure all the statements we need to move are located in that position.  */
2202   bbs = get_loop_body (loop);
2203   exit_condition = get_loop_exit_condition (loop);
2204   for (i = 0; i < loop->num_nodes; i++)
2205     {
2206       if (bbs[i]->loop_father == loop)
2207         {
2208           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2209             {
2210               size_t j;
2211               tree stmt = bsi_stmt (bsi);
2212               if (stmt == exit_condition
2213                   || not_interesting_stmt (stmt)
2214                   || stmt_is_bumper_for_loop (loop, stmt))
2215                 continue;
2216               /* If the statement uses inner loop ivs, we == screwed.  */
2217               for (j = 1; j < VEC_length (tree, loopivs); j++)
2218                 if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
2219                   {
2220                     free (bbs);
2221                     return false;
2222                   }
2223
2224               /* If the bb of a statement we care about isn't dominated by
2225                  the header of the inner loop, then we are also screwed.  */
2226               if (!dominated_by_p (CDI_DOMINATORS,
2227                                    bb_for_stmt (stmt),
2228                                    loop->inner->header))
2229                 {
2230                   free (bbs);
2231                   return false;
2232                 }
2233             }
2234         }
2235     }
2236
2237   /* We also need to make sure the loop exit only has simple copy phis in it,
2238      otherwise we don't know how to transform it into a perfect nest right
2239      now.  */
2240   exitdest = loop->single_exit->dest;
2241
2242   for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2243     if (PHI_NUM_ARGS (phi) != 1)
2244       return false;
2245
2246   return true;
2247 }
2248
2249 /* Transform the loop nest into a perfect nest, if possible.
2250    LOOPS is the current struct loops *
2251    LOOP is the loop nest to transform into a perfect nest
2252    LBOUNDS are the lower bounds for the loops to transform
2253    UBOUNDS are the upper bounds for the loops to transform
2254    STEPS is the STEPS for the loops to transform.
2255    LOOPIVS is the induction variables for the loops to transform.
2256
2257    Basically, for the case of
2258
2259    FOR (i = 0; i < 50; i++)
2260     {
2261      FOR (j =0; j < 50; j++)
2262      {
2263         <whatever>
2264      }
2265      <some code>
2266     }
2267
2268    This function will transform it into a perfect loop nest by splitting the
2269    outer loop into two loops, like so:
2270
2271    FOR (i = 0; i < 50; i++)
2272    {
2273      FOR (j = 0; j < 50; j++)
2274      {
2275          <whatever>
2276      }
2277    }
2278
2279    FOR (i = 0; i < 50; i ++)
2280    {
2281     <some code>
2282    }
2283
2284    Return FALSE if we can't make this loop into a perfect nest.  */
2285 static bool
2286 perfect_nestify (struct loops *loops,
2287                  struct loop *loop,
2288                  VEC (tree) *lbounds,
2289                  VEC (tree) *ubounds,
2290                  VEC (int) *steps,
2291                  VEC (tree) *loopivs)
2292 {
2293   basic_block *bbs;
2294   tree exit_condition;
2295   tree then_label, else_label, cond_stmt;
2296   basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2297   size_t i;
2298   block_stmt_iterator bsi;
2299   bool insert_after;
2300   edge e;
2301   struct loop *newloop;
2302   tree phi;
2303   tree uboundvar;
2304   tree stmt;
2305   tree oldivvar, ivvar, ivvarinced;
2306   VEC (tree) *phis = NULL;
2307
2308   if (!can_convert_to_perfect_nest (loop, loopivs))
2309     return false;
2310
2311   /* Create the new loop */
2312
2313   olddest = loop->single_exit->dest;
2314   preheaderbb =  loop_split_edge_with (loop->single_exit, NULL);
2315   headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2316
2317   /* Push the exit phi nodes that we are moving.  */
2318   for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2319     {
2320       VEC_safe_push (tree, phis, PHI_RESULT (phi));
2321       VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
2322     }
2323   e = redirect_edge_and_branch (EDGE_SUCC (preheaderbb, 0), headerbb);
2324
2325   /* Remove the exit phis from the old basic block.  Make sure to set
2326      PHI_RESULT to null so it doesn't get released.  */
2327   while (phi_nodes (olddest) != NULL)
2328     {
2329       SET_PHI_RESULT (phi_nodes (olddest), NULL);
2330       remove_phi_node (phi_nodes (olddest), NULL, olddest);
2331     }
2332
2333   /* and add them back to the new basic block.  */
2334   while (VEC_length (tree, phis) != 0)
2335     {
2336       tree def;
2337       tree phiname;
2338       def = VEC_pop (tree, phis);
2339       phiname = VEC_pop (tree, phis);
2340       phi = create_phi_node (phiname, preheaderbb);
2341       add_phi_arg (phi, def, EDGE_PRED (preheaderbb, 0));
2342     }
2343   flush_pending_stmts (e);
2344
2345   bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2346   latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2347   make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2348   then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2349   else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2350   cond_stmt = build (COND_EXPR, void_type_node,
2351                      build (NE_EXPR, boolean_type_node,
2352                             integer_one_node,
2353                             integer_zero_node),
2354                      then_label, else_label);
2355   bsi = bsi_start (bodybb);
2356   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2357   e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2358   make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2359   make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2360
2361   /* Update the loop structures.  */
2362   newloop = duplicate_loop (loops, loop, olddest->loop_father);
2363   newloop->header = headerbb;
2364   newloop->latch = latchbb;
2365   newloop->single_exit = e;
2366   add_bb_to_loop (latchbb, newloop);
2367   add_bb_to_loop (bodybb, newloop);
2368   add_bb_to_loop (headerbb, newloop);
2369   add_bb_to_loop (preheaderbb, olddest->loop_father);
2370   set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2371   set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2372   set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2373                            loop->single_exit->src);
2374   set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2375   set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2376   /* Create the new iv.  */
2377   ivvar = create_tmp_var (integer_type_node, "perfectiv");
2378   add_referenced_tmp_var (ivvar);
2379   standard_iv_increment_position (newloop, &bsi, &insert_after);
2380   create_iv (VEC_index (tree, lbounds, 0),
2381              build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
2382              ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2383
2384   /* Create the new upper bound.  This may be not just a variable, so we copy
2385      it to one just in case.  */
2386
2387   exit_condition = get_loop_exit_condition (newloop);
2388   uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2389   add_referenced_tmp_var (uboundvar);
2390   stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2391                 VEC_index (tree, ubounds, 0));
2392   uboundvar = make_ssa_name (uboundvar, stmt);
2393   TREE_OPERAND (stmt, 0) = uboundvar;
2394
2395   if (insert_after)
2396     bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2397   else
2398     bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2399
2400   COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2401                                            boolean_type_node,
2402                                            uboundvar,
2403                                            ivvarinced);
2404
2405   bbs = get_loop_body (loop);
2406   /* Now replace the induction variable in the moved statements with the
2407      correct loop induction variable.  */
2408   oldivvar = VEC_index (tree, loopivs, 0);
2409   for (i = 0; i < loop->num_nodes; i++)
2410     {
2411       block_stmt_iterator tobsi = bsi_last (bodybb);
2412       if (bbs[i]->loop_father == loop)
2413         {
2414           /* Note that the bsi only needs to be explicitly incremented
2415              when we don't move something, since it is automatically
2416              incremented when we do.  */
2417           for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2418             {
2419               tree stmt = bsi_stmt (bsi);
2420               if (stmt == exit_condition
2421                   || not_interesting_stmt (stmt)
2422                   || stmt_is_bumper_for_loop (loop, stmt))
2423                 {
2424                   bsi_next (&bsi);
2425                   continue;
2426                 }
2427               replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
2428               bsi_move_before (&bsi, &tobsi);
2429             }
2430         }
2431     }
2432   free (bbs);
2433   flow_loops_find (loops, LOOP_ALL);
2434   return perfect_nest_p (loop);
2435 }
2436
2437 /* Return true if TRANS is a legal transformation matrix that respects
2438    the dependence vectors in DISTS and DIRS.  The conservative answer
2439    is false.
2440
2441    "Wolfe proves that a unimodular transformation represented by the
2442    matrix T is legal when applied to a loop nest with a set of
2443    lexicographically non-negative distance vectors RDG if and only if
2444    for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2445    i.e.: if and only if it transforms the lexicographically positive
2446    distance vectors to lexicographically positive vectors.  Note that
2447    a unimodular matrix must transform the zero vector (and only it) to
2448    the zero vector." S.Muchnick.  */
2449
2450 bool
2451 lambda_transform_legal_p (lambda_trans_matrix trans,
2452                           int nb_loops,
2453                           varray_type dependence_relations)
2454 {
2455   unsigned int i;
2456   lambda_vector distres;
2457   struct data_dependence_relation *ddr;
2458
2459 #if defined ENABLE_CHECKING
2460   if (LTM_COLSIZE (trans) != nb_loops
2461       || LTM_ROWSIZE (trans) != nb_loops)
2462     abort ();
2463 #endif
2464
2465   /* When there is an unknown relation in the dependence_relations, we
2466      know that it is no worth looking at this loop nest: give up.  */
2467   ddr = (struct data_dependence_relation *)
2468     VARRAY_GENERIC_PTR (dependence_relations, 0);
2469   if (ddr == NULL)
2470     return true;
2471   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2472     return false;
2473
2474   distres = lambda_vector_new (nb_loops);
2475
2476   /* For each distance vector in the dependence graph.  */
2477   for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2478     {
2479       ddr = (struct data_dependence_relation *)
2480         VARRAY_GENERIC_PTR (dependence_relations, i);
2481
2482       /* Don't care about relations for which we know that there is no
2483          dependence, nor about read-read (aka. output-dependences):
2484          these data accesses can happen in any order.  */
2485       if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2486           || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2487         continue;
2488
2489       /* Conservatively answer: "this transformation is not valid".  */
2490       if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2491         return false;
2492
2493       /* If the dependence could not be captured by a distance vector,
2494          conservatively answer that the transform is not valid.  */
2495       if (DDR_DIST_VECT (ddr) == NULL)
2496         return false;
2497
2498       /* Compute trans.dist_vect */
2499       lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2500                                  DDR_DIST_VECT (ddr), distres);
2501
2502       if (!lambda_vector_lexico_pos (distres, nb_loops))
2503         return false;
2504     }
2505   return true;
2506 }