2004-09-23 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / gcc / lambda-code.c
blob3faeee1bb9b7dcc87563f8630a36411500089ad5
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
46 /* This loop nest code generation is based on non-singular matrix
47 math.
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
57 of basis vectors.
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
120 DEF_VEC_GC_P(int);
122 static bool perfect_nestify (struct loops *,
123 struct loop *, VEC (tree) *,
124 VEC (tree) *, VEC (int) *, VEC (tree) *);
125 /* Lattice stuff that is internal to the code generation algorithm. */
127 typedef struct
129 /* Lattice base matrix. */
130 lambda_matrix base;
131 /* Lattice dimension. */
132 int dimension;
133 /* Origin vector for the coefficients. */
134 lambda_vector origin;
135 /* Origin matrix for the invariants. */
136 lambda_matrix origin_invariants;
137 /* Number of invariants. */
138 int invariants;
139 } *lambda_lattice;
141 #define LATTICE_BASE(T) ((T)->base)
142 #define LATTICE_DIMENSION(T) ((T)->dimension)
143 #define LATTICE_ORIGIN(T) ((T)->origin)
144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
147 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
148 int, int);
149 static lambda_lattice lambda_lattice_new (int, int);
150 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
152 static tree find_induction_var_from_exit_cond (struct loop *);
154 /* Create a new lambda body vector. */
156 lambda_body_vector
157 lambda_body_vector_new (int size)
159 lambda_body_vector ret;
161 ret = ggc_alloc (sizeof (*ret));
162 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
163 LBV_SIZE (ret) = size;
164 LBV_DENOMINATOR (ret) = 1;
165 return ret;
168 /* Compute the new coefficients for the vector based on the
169 *inverse* of the transformation matrix. */
171 lambda_body_vector
172 lambda_body_vector_compute_new (lambda_trans_matrix transform,
173 lambda_body_vector vect)
175 lambda_body_vector temp;
176 int depth;
178 /* Make sure the matrix is square. */
179 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
181 depth = LTM_ROWSIZE (transform);
183 temp = lambda_body_vector_new (depth);
184 LBV_DENOMINATOR (temp) =
185 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
186 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
187 LTM_MATRIX (transform), depth,
188 LBV_COEFFICIENTS (temp));
189 LBV_SIZE (temp) = LBV_SIZE (vect);
190 return temp;
193 /* Print out a lambda body vector. */
195 void
196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
198 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
201 /* Return TRUE if two linear expressions are equal. */
203 static bool
204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
205 int depth, int invariants)
207 int i;
209 if (lle1 == NULL || lle2 == NULL)
210 return false;
211 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
212 return false;
213 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
214 return false;
215 for (i = 0; i < depth; i++)
216 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
217 return false;
218 for (i = 0; i < invariants; i++)
219 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
220 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
221 return false;
222 return true;
225 /* Create a new linear expression with dimension DIM, and total number
226 of invariants INVARIANTS. */
228 lambda_linear_expression
229 lambda_linear_expression_new (int dim, int invariants)
231 lambda_linear_expression ret;
233 ret = ggc_alloc_cleared (sizeof (*ret));
235 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
236 LLE_CONSTANT (ret) = 0;
237 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
238 LLE_DENOMINATOR (ret) = 1;
239 LLE_NEXT (ret) = NULL;
241 return ret;
244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
245 The starting letter used for variable names is START. */
247 static void
248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
249 char start)
251 int i;
252 bool first = true;
253 for (i = 0; i < size; i++)
255 if (expr[i] != 0)
257 if (first)
259 if (expr[i] < 0)
260 fprintf (outfile, "-");
261 first = false;
263 else if (expr[i] > 0)
264 fprintf (outfile, " + ");
265 else
266 fprintf (outfile, " - ");
267 if (abs (expr[i]) == 1)
268 fprintf (outfile, "%c", start + i);
269 else
270 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
276 depth/number of coefficients is given by DEPTH, the number of invariants is
277 given by INVARIANTS, and the character to start variable names with is given
278 by START. */
280 void
281 print_lambda_linear_expression (FILE * outfile,
282 lambda_linear_expression expr,
283 int depth, int invariants, char start)
285 fprintf (outfile, "\tLinear expression: ");
286 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
287 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
288 fprintf (outfile, " invariants: ");
289 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
290 invariants, 'A');
291 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
294 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
295 coefficients is given by DEPTH, the number of invariants is
296 given by INVARIANTS, and the character to start variable names with is given
297 by START. */
299 void
300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
301 int invariants, char start)
303 int step;
304 lambda_linear_expression expr;
306 gcc_assert (loop);
308 expr = LL_LINEAR_OFFSET (loop);
309 step = LL_STEP (loop);
310 fprintf (outfile, " step size = %d \n", step);
312 if (expr)
314 fprintf (outfile, " linear offset: \n");
315 print_lambda_linear_expression (outfile, expr, depth, invariants,
316 start);
319 fprintf (outfile, " lower bound: \n");
320 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
321 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
322 fprintf (outfile, " upper bound: \n");
323 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
324 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
328 number of invariants. */
330 lambda_loopnest
331 lambda_loopnest_new (int depth, int invariants)
333 lambda_loopnest ret;
334 ret = ggc_alloc (sizeof (*ret));
336 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
337 LN_DEPTH (ret) = depth;
338 LN_INVARIANTS (ret) = invariants;
340 return ret;
343 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
344 character to use for loop names is given by START. */
346 void
347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
349 int i;
350 for (i = 0; i < LN_DEPTH (nest); i++)
352 fprintf (outfile, "Loop %c\n", start + i);
353 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
354 LN_INVARIANTS (nest), 'i');
355 fprintf (outfile, "\n");
359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
360 of invariants. */
362 static lambda_lattice
363 lambda_lattice_new (int depth, int invariants)
365 lambda_lattice ret;
366 ret = ggc_alloc (sizeof (*ret));
367 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
368 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
369 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
370 LATTICE_DIMENSION (ret) = depth;
371 LATTICE_INVARIANTS (ret) = invariants;
372 return ret;
375 /* Compute the lattice base for NEST. The lattice base is essentially a
376 non-singular transform from a dense base space to a sparse iteration space.
377 We use it so that we don't have to specially handle the case of a sparse
378 iteration space in other parts of the algorithm. As a result, this routine
379 only does something interesting (IE produce a matrix that isn't the
380 identity matrix) if NEST is a sparse space. */
382 static lambda_lattice
383 lambda_lattice_compute_base (lambda_loopnest nest)
385 lambda_lattice ret;
386 int depth, invariants;
387 lambda_matrix base;
389 int i, j, step;
390 lambda_loop loop;
391 lambda_linear_expression expression;
393 depth = LN_DEPTH (nest);
394 invariants = LN_INVARIANTS (nest);
396 ret = lambda_lattice_new (depth, invariants);
397 base = LATTICE_BASE (ret);
398 for (i = 0; i < depth; i++)
400 loop = LN_LOOPS (nest)[i];
401 gcc_assert (loop);
402 step = LL_STEP (loop);
403 /* If we have a step of 1, then the base is one, and the
404 origin and invariant coefficients are 0. */
405 if (step == 1)
407 for (j = 0; j < depth; j++)
408 base[i][j] = 0;
409 base[i][i] = 1;
410 LATTICE_ORIGIN (ret)[i] = 0;
411 for (j = 0; j < invariants; j++)
412 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
414 else
416 /* Otherwise, we need the lower bound expression (which must
417 be an affine function) to determine the base. */
418 expression = LL_LOWER_BOUND (loop);
419 gcc_assert (expression && LLE_NEXT (expression)
420 && LLE_DENOMINATOR (expression) == 1);
422 /* The lower triangular portion of the base is going to be the
423 coefficient times the step */
424 for (j = 0; j < i; j++)
425 base[i][j] = LLE_COEFFICIENTS (expression)[j]
426 * LL_STEP (LN_LOOPS (nest)[j]);
427 base[i][i] = step;
428 for (j = i + 1; j < depth; j++)
429 base[i][j] = 0;
431 /* Origin for this loop is the constant of the lower bound
432 expression. */
433 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
435 /* Coefficient for the invariants are equal to the invariant
436 coefficients in the expression. */
437 for (j = 0; j < invariants; j++)
438 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
439 LLE_INVARIANT_COEFFICIENTS (expression)[j];
442 return ret;
445 /* Compute the greatest common denominator of two numbers (A and B) using
446 Euclid's algorithm. */
448 static int
449 gcd (int a, int b)
452 int x, y, z;
454 x = abs (a);
455 y = abs (b);
457 while (x > 0)
459 z = y % x;
460 y = x;
461 x = z;
464 return (y);
467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
469 static int
470 gcd_vector (lambda_vector vector, int size)
472 int i;
473 int gcd1 = 0;
475 if (size > 0)
477 gcd1 = vector[0];
478 for (i = 1; i < size; i++)
479 gcd1 = gcd (gcd1, vector[i]);
481 return gcd1;
484 /* Compute the least common multiple of two numbers A and B . */
486 static int
487 lcm (int a, int b)
489 return (abs (a) * abs (b) / gcd (a, b));
492 /* Compute the loop bounds for the auxiliary space NEST.
493 Input system used is Ax <= b. TRANS is the unimodular transformation. */
495 static lambda_loopnest
496 lambda_compute_auxillary_space (lambda_loopnest nest,
497 lambda_trans_matrix trans)
499 lambda_matrix A, B, A1, B1, temp0;
500 lambda_vector a, a1, temp1;
501 lambda_matrix invertedtrans;
502 int determinant, depth, invariants, size, newsize;
503 int i, j, k;
504 lambda_loopnest auxillary_nest;
505 lambda_loop loop;
506 lambda_linear_expression expression;
507 lambda_lattice lattice;
509 int multiple, f1, f2;
511 depth = LN_DEPTH (nest);
512 invariants = LN_INVARIANTS (nest);
514 /* Unfortunately, we can't know the number of constraints we'll have
515 ahead of time, but this should be enough even in ridiculous loop nest
516 cases. We abort if we go over this limit. */
517 A = lambda_matrix_new (128, depth);
518 B = lambda_matrix_new (128, invariants);
519 a = lambda_vector_new (128);
521 A1 = lambda_matrix_new (128, depth);
522 B1 = lambda_matrix_new (128, invariants);
523 a1 = lambda_vector_new (128);
525 /* Store the bounds in the equation matrix A, constant vector a, and
526 invariant matrix B, so that we have Ax <= a + B.
527 This requires a little equation rearranging so that everything is on the
528 correct side of the inequality. */
529 size = 0;
530 for (i = 0; i < depth; i++)
532 loop = LN_LOOPS (nest)[i];
534 /* First we do the lower bound. */
535 if (LL_STEP (loop) > 0)
536 expression = LL_LOWER_BOUND (loop);
537 else
538 expression = LL_UPPER_BOUND (loop);
540 for (; expression != NULL; expression = LLE_NEXT (expression))
542 /* Fill in the coefficient. */
543 for (j = 0; j < i; j++)
544 A[size][j] = LLE_COEFFICIENTS (expression)[j];
546 /* And the invariant coefficient. */
547 for (j = 0; j < invariants; j++)
548 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
550 /* And the constant. */
551 a[size] = LLE_CONSTANT (expression);
553 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
554 constants and single variables on */
555 A[size][i] = -1 * LLE_DENOMINATOR (expression);
556 a[size] *= -1;
557 for (j = 0; j < invariants; j++)
558 B[size][j] *= -1;
560 size++;
561 /* Need to increase matrix sizes above. */
562 gcc_assert (size <= 127);
566 /* Then do the exact same thing for the upper bounds. */
567 if (LL_STEP (loop) > 0)
568 expression = LL_UPPER_BOUND (loop);
569 else
570 expression = LL_LOWER_BOUND (loop);
572 for (; expression != NULL; expression = LLE_NEXT (expression))
574 /* Fill in the coefficient. */
575 for (j = 0; j < i; j++)
576 A[size][j] = LLE_COEFFICIENTS (expression)[j];
578 /* And the invariant coefficient. */
579 for (j = 0; j < invariants; j++)
580 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
582 /* And the constant. */
583 a[size] = LLE_CONSTANT (expression);
585 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
586 for (j = 0; j < i; j++)
587 A[size][j] *= -1;
588 A[size][i] = LLE_DENOMINATOR (expression);
589 size++;
590 /* Need to increase matrix sizes above. */
591 gcc_assert (size <= 127);
596 /* Compute the lattice base x = base * y + origin, where y is the
597 base space. */
598 lattice = lambda_lattice_compute_base (nest);
600 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
602 /* A1 = A * L */
603 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
605 /* a1 = a - A * origin constant. */
606 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
607 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
609 /* B1 = B - A * origin invariant. */
610 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
611 invariants);
612 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
614 /* Now compute the auxiliary space bounds by first inverting U, multiplying
615 it by A1, then performing fourier motzkin. */
617 invertedtrans = lambda_matrix_new (depth, depth);
619 /* Compute the inverse of U. */
620 determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
621 invertedtrans, depth);
623 /* A = A1 inv(U). */
624 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
626 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
627 auxillary nest.
628 Fourier-Motzkin is a way of reducing systems of linear inequality so that
629 it is easy to calculate the answer and bounds.
630 A sketch of how it works:
631 Given a system of linear inequalities, ai * xj >= bk, you can always
632 rewrite the constraints so they are all of the form
633 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
634 in b1 ... bk, and some a in a1...ai)
635 You can then eliminate this x from the non-constant inequalities by
636 rewriting these as a <= b, x >= constant, and delete the x variable.
637 You can then repeat this for any remaining x variables, and then we have
638 an easy to use variable <= constant (or no variables at all) form that we
639 can construct our bounds from.
641 In our case, each time we eliminate, we construct part of the bound from
642 the ith variable, then delete the ith variable.
644 Remember the constant are in our vector a, our coefficient matrix is A,
645 and our invariant coefficient matrix is B */
647 /* Swap B and B1, and a1 and a */
648 temp0 = B1;
649 B1 = B;
650 B = temp0;
652 temp1 = a1;
653 a1 = a;
654 a = temp1;
656 auxillary_nest = lambda_loopnest_new (depth, invariants);
658 for (i = depth - 1; i >= 0; i--)
660 loop = lambda_loop_new ();
661 LN_LOOPS (auxillary_nest)[i] = loop;
662 LL_STEP (loop) = 1;
664 for (j = 0; j < size; j++)
666 if (A[j][i] < 0)
668 /* Lower bound. */
669 expression = lambda_linear_expression_new (depth, invariants);
671 for (k = 0; k < i; k++)
672 LLE_COEFFICIENTS (expression)[k] = A[j][k];
673 for (k = 0; k < invariants; k++)
674 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
675 LLE_DENOMINATOR (expression) = -1 * A[j][i];
676 LLE_CONSTANT (expression) = -1 * a[j];
677 /* Ignore if identical to the existing lower bound. */
678 if (!lle_equal (LL_LOWER_BOUND (loop),
679 expression, depth, invariants))
681 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
682 LL_LOWER_BOUND (loop) = expression;
686 else if (A[j][i] > 0)
688 /* Upper bound. */
689 expression = lambda_linear_expression_new (depth, invariants);
690 for (k = 0; k < i; k++)
691 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
692 LLE_CONSTANT (expression) = a[j];
694 for (k = 0; k < invariants; k++)
695 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
697 LLE_DENOMINATOR (expression) = A[j][i];
698 /* Ignore if identical to the existing upper bound. */
699 if (!lle_equal (LL_UPPER_BOUND (loop),
700 expression, depth, invariants))
702 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
703 LL_UPPER_BOUND (loop) = expression;
708 /* creates a new system by deleting the i'th variable. */
709 newsize = 0;
710 for (j = 0; j < size; j++)
712 if (A[j][i] == 0)
714 lambda_vector_copy (A[j], A1[newsize], depth);
715 lambda_vector_copy (B[j], B1[newsize], invariants);
716 a1[newsize] = a[j];
717 newsize++;
719 else if (A[j][i] > 0)
721 for (k = 0; k < size; k++)
723 if (A[k][i] < 0)
725 multiple = lcm (A[j][i], A[k][i]);
726 f1 = multiple / A[j][i];
727 f2 = -1 * multiple / A[k][i];
729 lambda_vector_add_mc (A[j], f1, A[k], f2,
730 A1[newsize], depth);
731 lambda_vector_add_mc (B[j], f1, B[k], f2,
732 B1[newsize], invariants);
733 a1[newsize] = f1 * a[j] + f2 * a[k];
734 newsize++;
740 temp0 = A;
741 A = A1;
742 A1 = temp0;
744 temp0 = B;
745 B = B1;
746 B1 = temp0;
748 temp1 = a;
749 a = a1;
750 a1 = temp1;
752 size = newsize;
755 return auxillary_nest;
758 /* Compute the loop bounds for the target space, using the bounds of
759 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is
760 done by matrix multiplication and then transformation of the new matrix
761 back into linear expression form.
762 Return the target loopnest. */
764 static lambda_loopnest
765 lambda_compute_target_space (lambda_loopnest auxillary_nest,
766 lambda_trans_matrix H, lambda_vector stepsigns)
768 lambda_matrix inverse, H1;
769 int determinant, i, j;
770 int gcd1, gcd2;
771 int factor;
773 lambda_loopnest target_nest;
774 int depth, invariants;
775 lambda_matrix target;
777 lambda_loop auxillary_loop, target_loop;
778 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
780 depth = LN_DEPTH (auxillary_nest);
781 invariants = LN_INVARIANTS (auxillary_nest);
783 inverse = lambda_matrix_new (depth, depth);
784 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
786 /* H1 is H excluding its diagonal. */
787 H1 = lambda_matrix_new (depth, depth);
788 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
790 for (i = 0; i < depth; i++)
791 H1[i][i] = 0;
793 /* Computes the linear offsets of the loop bounds. */
794 target = lambda_matrix_new (depth, depth);
795 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
797 target_nest = lambda_loopnest_new (depth, invariants);
799 for (i = 0; i < depth; i++)
802 /* Get a new loop structure. */
803 target_loop = lambda_loop_new ();
804 LN_LOOPS (target_nest)[i] = target_loop;
806 /* Computes the gcd of the coefficients of the linear part. */
807 gcd1 = gcd_vector (target[i], i);
809 /* Include the denominator in the GCD */
810 gcd1 = gcd (gcd1, determinant);
812 /* Now divide through by the gcd */
813 for (j = 0; j < i; j++)
814 target[i][j] = target[i][j] / gcd1;
816 expression = lambda_linear_expression_new (depth, invariants);
817 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
818 LLE_DENOMINATOR (expression) = determinant / gcd1;
819 LLE_CONSTANT (expression) = 0;
820 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
821 invariants);
822 LL_LINEAR_OFFSET (target_loop) = expression;
825 /* For each loop, compute the new bounds from H */
826 for (i = 0; i < depth; i++)
828 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
829 target_loop = LN_LOOPS (target_nest)[i];
830 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
831 factor = LTM_MATRIX (H)[i][i];
833 /* First we do the lower bound. */
834 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
836 for (; auxillary_expr != NULL;
837 auxillary_expr = LLE_NEXT (auxillary_expr))
839 target_expr = lambda_linear_expression_new (depth, invariants);
840 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
841 depth, inverse, depth,
842 LLE_COEFFICIENTS (target_expr));
843 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
844 LLE_COEFFICIENTS (target_expr), depth,
845 factor);
847 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
848 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
849 LLE_INVARIANT_COEFFICIENTS (target_expr),
850 invariants);
851 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
852 LLE_INVARIANT_COEFFICIENTS (target_expr),
853 invariants, factor);
854 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
856 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
858 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
859 * determinant;
860 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
861 (target_expr),
862 LLE_INVARIANT_COEFFICIENTS
863 (target_expr), invariants,
864 determinant);
865 LLE_DENOMINATOR (target_expr) =
866 LLE_DENOMINATOR (target_expr) * determinant;
868 /* Find the gcd and divide by it here, rather than doing it
869 at the tree level. */
870 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
871 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
872 invariants);
873 gcd1 = gcd (gcd1, gcd2);
874 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
875 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
876 for (j = 0; j < depth; j++)
877 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
878 for (j = 0; j < invariants; j++)
879 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
880 LLE_CONSTANT (target_expr) /= gcd1;
881 LLE_DENOMINATOR (target_expr) /= gcd1;
882 /* Ignore if identical to existing bound. */
883 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
884 invariants))
886 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
887 LL_LOWER_BOUND (target_loop) = target_expr;
890 /* Now do the upper bound. */
891 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
893 for (; auxillary_expr != NULL;
894 auxillary_expr = LLE_NEXT (auxillary_expr))
896 target_expr = lambda_linear_expression_new (depth, invariants);
897 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
898 depth, inverse, depth,
899 LLE_COEFFICIENTS (target_expr));
900 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
901 LLE_COEFFICIENTS (target_expr), depth,
902 factor);
903 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
904 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
905 LLE_INVARIANT_COEFFICIENTS (target_expr),
906 invariants);
907 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
908 LLE_INVARIANT_COEFFICIENTS (target_expr),
909 invariants, factor);
910 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
912 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
914 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
915 * determinant;
916 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
917 (target_expr),
918 LLE_INVARIANT_COEFFICIENTS
919 (target_expr), invariants,
920 determinant);
921 LLE_DENOMINATOR (target_expr) =
922 LLE_DENOMINATOR (target_expr) * determinant;
924 /* Find the gcd and divide by it here, instead of at the
925 tree level. */
926 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
927 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
928 invariants);
929 gcd1 = gcd (gcd1, gcd2);
930 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
931 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
932 for (j = 0; j < depth; j++)
933 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
934 for (j = 0; j < invariants; j++)
935 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
936 LLE_CONSTANT (target_expr) /= gcd1;
937 LLE_DENOMINATOR (target_expr) /= gcd1;
938 /* Ignore if equal to existing bound. */
939 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
940 invariants))
942 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
943 LL_UPPER_BOUND (target_loop) = target_expr;
947 for (i = 0; i < depth; i++)
949 target_loop = LN_LOOPS (target_nest)[i];
950 /* If necessary, exchange the upper and lower bounds and negate
951 the step size. */
952 if (stepsigns[i] < 0)
954 LL_STEP (target_loop) *= -1;
955 tmp_expr = LL_LOWER_BOUND (target_loop);
956 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
957 LL_UPPER_BOUND (target_loop) = tmp_expr;
960 return target_nest;
963 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
964 result. */
966 static lambda_vector
967 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
969 lambda_matrix matrix, H;
970 int size;
971 lambda_vector newsteps;
972 int i, j, factor, minimum_column;
973 int temp;
975 matrix = LTM_MATRIX (trans);
976 size = LTM_ROWSIZE (trans);
977 H = lambda_matrix_new (size, size);
979 newsteps = lambda_vector_new (size);
980 lambda_vector_copy (stepsigns, newsteps, size);
982 lambda_matrix_copy (matrix, H, size, size);
984 for (j = 0; j < size; j++)
986 lambda_vector row;
987 row = H[j];
988 for (i = j; i < size; i++)
989 if (row[i] < 0)
990 lambda_matrix_col_negate (H, size, i);
991 while (lambda_vector_first_nz (row, size, j + 1) < size)
993 minimum_column = lambda_vector_min_nz (row, size, j);
994 lambda_matrix_col_exchange (H, size, j, minimum_column);
996 temp = newsteps[j];
997 newsteps[j] = newsteps[minimum_column];
998 newsteps[minimum_column] = temp;
1000 for (i = j + 1; i < size; i++)
1002 factor = row[i] / row[j];
1003 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1007 return newsteps;
1010 /* Transform NEST according to TRANS, and return the new loopnest.
1011 This involves
1012 1. Computing a lattice base for the transformation
1013 2. Composing the dense base with the specified transformation (TRANS)
1014 3. Decomposing the combined transformation into a lower triangular portion,
1015 and a unimodular portion.
1016 4. Computing the auxillary nest using the unimodular portion.
1017 5. Computing the target nest using the auxillary nest and the lower
1018 triangular portion. */
1020 lambda_loopnest
1021 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1023 lambda_loopnest auxillary_nest, target_nest;
1025 int depth, invariants;
1026 int i, j;
1027 lambda_lattice lattice;
1028 lambda_trans_matrix trans1, H, U;
1029 lambda_loop loop;
1030 lambda_linear_expression expression;
1031 lambda_vector origin;
1032 lambda_matrix origin_invariants;
1033 lambda_vector stepsigns;
1034 int f;
1036 depth = LN_DEPTH (nest);
1037 invariants = LN_INVARIANTS (nest);
1039 /* Keep track of the signs of the loop steps. */
1040 stepsigns = lambda_vector_new (depth);
1041 for (i = 0; i < depth; i++)
1043 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1044 stepsigns[i] = 1;
1045 else
1046 stepsigns[i] = -1;
1049 /* Compute the lattice base. */
1050 lattice = lambda_lattice_compute_base (nest);
1051 trans1 = lambda_trans_matrix_new (depth, depth);
1053 /* Multiply the transformation matrix by the lattice base. */
1055 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1056 LTM_MATRIX (trans1), depth, depth, depth);
1058 /* Compute the Hermite normal form for the new transformation matrix. */
1059 H = lambda_trans_matrix_new (depth, depth);
1060 U = lambda_trans_matrix_new (depth, depth);
1061 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1062 LTM_MATRIX (U));
1064 /* Compute the auxiliary loop nest's space from the unimodular
1065 portion. */
1066 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1068 /* Compute the loop step signs from the old step signs and the
1069 transformation matrix. */
1070 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1072 /* Compute the target loop nest space from the auxiliary nest and
1073 the lower triangular matrix H. */
1074 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1075 origin = lambda_vector_new (depth);
1076 origin_invariants = lambda_matrix_new (depth, invariants);
1077 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1078 LATTICE_ORIGIN (lattice), origin);
1079 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1080 origin_invariants, depth, depth, invariants);
1082 for (i = 0; i < depth; i++)
1084 loop = LN_LOOPS (target_nest)[i];
1085 expression = LL_LINEAR_OFFSET (loop);
1086 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1087 f = 1;
1088 else
1089 f = LLE_DENOMINATOR (expression);
1091 LLE_CONSTANT (expression) += f * origin[i];
1093 for (j = 0; j < invariants; j++)
1094 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1095 f * origin_invariants[i][j];
1098 return target_nest;
1102 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1103 return the new expression. DEPTH is the depth of the loopnest.
1104 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1105 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1106 is the amount we have to add/subtract from the expression because of the
1107 type of comparison it is used in. */
1109 static lambda_linear_expression
1110 gcc_tree_to_linear_expression (int depth, tree expr,
1111 VEC(tree) *outerinductionvars,
1112 VEC(tree) *invariants, int extra)
1114 lambda_linear_expression lle = NULL;
1115 switch (TREE_CODE (expr))
1117 case INTEGER_CST:
1119 lle = lambda_linear_expression_new (depth, 2 * depth);
1120 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1121 if (extra != 0)
1122 LLE_CONSTANT (lle) = extra;
1124 LLE_DENOMINATOR (lle) = 1;
1126 break;
1127 case SSA_NAME:
1129 tree iv, invar;
1130 size_t i;
1131 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1132 if (iv != NULL)
1134 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1136 lle = lambda_linear_expression_new (depth, 2 * depth);
1137 LLE_COEFFICIENTS (lle)[i] = 1;
1138 if (extra != 0)
1139 LLE_CONSTANT (lle) = extra;
1141 LLE_DENOMINATOR (lle) = 1;
1144 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1145 if (invar != NULL)
1147 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1149 lle = lambda_linear_expression_new (depth, 2 * depth);
1150 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1151 if (extra != 0)
1152 LLE_CONSTANT (lle) = extra;
1153 LLE_DENOMINATOR (lle) = 1;
1157 break;
1158 default:
1159 return NULL;
1162 return lle;
1165 /* Return true if OP is invariant in LOOP and all outer loops. */
1167 static bool
1168 invariant_in_loop (struct loop *loop, tree op)
1170 if (is_gimple_min_invariant (op))
1171 return true;
1172 if (loop->depth == 0)
1173 return true;
1174 if (TREE_CODE (op) == SSA_NAME)
1176 tree def;
1177 def = SSA_NAME_DEF_STMT (op);
1178 if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL
1179 && IS_EMPTY_STMT (def))
1180 return true;
1181 if (IS_EMPTY_STMT (def))
1182 return false;
1183 if (loop->outer
1184 && !invariant_in_loop (loop->outer, op))
1185 return false;
1186 return !flow_bb_inside_loop_p (loop, bb_for_stmt (def));
1188 return false;
1191 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1192 or NULL if it could not be converted.
1193 DEPTH is the depth of the loop.
1194 INVARIANTS is a pointer to the array of loop invariants.
1195 The induction variable for this loop should be stored in the parameter
1196 OURINDUCTIONVAR.
1197 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1199 static lambda_loop
1200 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1201 VEC (tree) ** invariants,
1202 tree * ourinductionvar,
1203 VEC (tree) * outerinductionvars,
1204 VEC (tree) ** lboundvars,
1205 VEC (tree) ** uboundvars,
1206 VEC (int) ** steps)
1208 tree phi;
1209 tree exit_cond;
1210 tree access_fn, inductionvar;
1211 tree step;
1212 lambda_loop lloop = NULL;
1213 lambda_linear_expression lbound, ubound;
1214 tree test;
1215 int stepint;
1216 int extra = 0;
1217 tree lboundvar, uboundvar;
1218 use_optype uses;
1220 /* Find out induction var and exit condition. */
1221 inductionvar = find_induction_var_from_exit_cond (loop);
1222 exit_cond = get_loop_exit_condition (loop);
1224 if (inductionvar == NULL || exit_cond == NULL)
1226 if (dump_file && (dump_flags & TDF_DETAILS))
1227 fprintf (dump_file,
1228 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1229 return NULL;
1232 test = TREE_OPERAND (exit_cond, 0);
1234 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file,
1239 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1241 return NULL;
1244 phi = SSA_NAME_DEF_STMT (inductionvar);
1245 if (TREE_CODE (phi) != PHI_NODE)
1247 get_stmt_operands (phi);
1248 uses = STMT_USE_OPS (phi);
1250 if (!uses)
1253 if (dump_file && (dump_flags & TDF_DETAILS))
1254 fprintf (dump_file,
1255 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1257 return NULL;
1260 phi = USE_OP (uses, 0);
1261 phi = SSA_NAME_DEF_STMT (phi);
1262 if (TREE_CODE (phi) != PHI_NODE)
1265 if (dump_file && (dump_flags & TDF_DETAILS))
1266 fprintf (dump_file,
1267 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1268 return NULL;
1272 /* The induction variable name/version we want to put in the array is the
1273 result of the induction variable phi node. */
1274 *ourinductionvar = PHI_RESULT (phi);
1275 access_fn = instantiate_parameters
1276 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1277 if (!access_fn)
1279 if (dump_file && (dump_flags & TDF_DETAILS))
1280 fprintf (dump_file,
1281 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1283 return NULL;
1286 step = evolution_part_in_loop_num (access_fn, loop->num);
1287 if (!step || step == chrec_dont_know)
1289 if (dump_file && (dump_flags & TDF_DETAILS))
1290 fprintf (dump_file,
1291 "Unable to convert loop: Cannot determine step of loop.\n");
1293 return NULL;
1295 if (TREE_CODE (step) != INTEGER_CST)
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1299 fprintf (dump_file,
1300 "Unable to convert loop: Step of loop is not integer.\n");
1301 return NULL;
1304 stepint = TREE_INT_CST_LOW (step);
1306 /* Only want phis for induction vars, which will have two
1307 arguments. */
1308 if (PHI_NUM_ARGS (phi) != 2)
1310 if (dump_file && (dump_flags & TDF_DETAILS))
1311 fprintf (dump_file,
1312 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1313 return NULL;
1316 /* Another induction variable check. One argument's source should be
1317 in the loop, one outside the loop. */
1318 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1319 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1322 if (dump_file && (dump_flags & TDF_DETAILS))
1323 fprintf (dump_file,
1324 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1326 return NULL;
1329 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1331 lboundvar = PHI_ARG_DEF (phi, 1);
1332 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1333 outerinductionvars, *invariants,
1336 else
1338 lboundvar = PHI_ARG_DEF (phi, 0);
1339 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1340 outerinductionvars, *invariants,
1344 if (!lbound)
1347 if (dump_file && (dump_flags & TDF_DETAILS))
1348 fprintf (dump_file,
1349 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1351 return NULL;
1353 /* One part of the test may be a loop invariant tree. */
1354 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1355 && invariant_in_loop (loop, TREE_OPERAND (test, 1)))
1356 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1357 else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1358 && invariant_in_loop (loop, TREE_OPERAND (test, 0)))
1359 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
1361 /* The non-induction variable part of the test is the upper bound variable.
1363 if (TREE_OPERAND (test, 0) == inductionvar)
1364 uboundvar = TREE_OPERAND (test, 1);
1365 else
1366 uboundvar = TREE_OPERAND (test, 0);
1369 /* We only size the vectors assuming we have, at max, 2 times as many
1370 invariants as we do loops (one for each bound).
1371 This is just an arbitrary number, but it has to be matched against the
1372 code below. */
1373 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1376 /* We might have some leftover. */
1377 if (TREE_CODE (test) == LT_EXPR)
1378 extra = -1 * stepint;
1379 else if (TREE_CODE (test) == NE_EXPR)
1380 extra = -1 * stepint;
1381 else if (TREE_CODE (test) == GT_EXPR)
1382 extra = -1 * stepint;
1384 ubound = gcc_tree_to_linear_expression (depth,
1385 uboundvar,
1386 outerinductionvars,
1387 *invariants, extra);
1388 VEC_safe_push (tree, *uboundvars, build (PLUS_EXPR, integer_type_node,
1389 uboundvar,
1390 build_int_cst (integer_type_node, extra)));
1391 VEC_safe_push (tree, *lboundvars, lboundvar);
1392 VEC_safe_push (int, *steps, stepint);
1393 if (!ubound)
1396 if (dump_file && (dump_flags & TDF_DETAILS))
1397 fprintf (dump_file,
1398 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1399 return NULL;
1402 lloop = lambda_loop_new ();
1403 LL_STEP (lloop) = stepint;
1404 LL_LOWER_BOUND (lloop) = lbound;
1405 LL_UPPER_BOUND (lloop) = ubound;
1406 return lloop;
1409 /* Given a LOOP, find the induction variable it is testing against in the exit
1410 condition. Return the induction variable if found, NULL otherwise. */
1412 static tree
1413 find_induction_var_from_exit_cond (struct loop *loop)
1415 tree expr = get_loop_exit_condition (loop);
1416 tree ivarop;
1417 tree test;
1418 if (expr == NULL_TREE)
1419 return NULL_TREE;
1420 if (TREE_CODE (expr) != COND_EXPR)
1421 return NULL_TREE;
1422 test = TREE_OPERAND (expr, 0);
1423 if (!COMPARISON_CLASS_P (test))
1424 return NULL_TREE;
1425 /* This is a guess. We say that for a <,!=,<= b, a is the induction
1426 variable.
1427 For >, >=, we guess b is the induction variable.
1428 If we are wrong, it'll fail the rest of the induction variable tests, and
1429 everything will be fine anyway. */
1430 switch (TREE_CODE (test))
1432 case LT_EXPR:
1433 case LE_EXPR:
1434 case NE_EXPR:
1435 ivarop = TREE_OPERAND (test, 0);
1436 break;
1437 case GT_EXPR:
1438 case GE_EXPR:
1439 ivarop = TREE_OPERAND (test, 1);
1440 break;
1441 default:
1442 gcc_unreachable();
1444 if (TREE_CODE (ivarop) != SSA_NAME)
1445 return NULL_TREE;
1446 return ivarop;
1449 DEF_VEC_GC_P(lambda_loop);
1450 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1451 Return the new loop nest.
1452 INDUCTIONVARS is a pointer to an array of induction variables for the
1453 loopnest that will be filled in during this process.
1454 INVARIANTS is a pointer to an array of invariants that will be filled in
1455 during this process. */
1457 lambda_loopnest
1458 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1459 struct loop * loop_nest,
1460 VEC (tree) **inductionvars,
1461 VEC (tree) **invariants,
1462 bool need_perfect_nest)
1464 lambda_loopnest ret;
1465 struct loop *temp;
1466 int depth = 0;
1467 size_t i;
1468 VEC (lambda_loop) *loops;
1469 VEC (tree) *uboundvars;
1470 VEC (tree) *lboundvars;
1471 VEC (int) *steps;
1472 lambda_loop newloop;
1473 tree inductionvar = NULL;
1475 temp = loop_nest;
1476 while (temp)
1478 depth++;
1479 temp = temp->inner;
1481 loops = VEC_alloc (lambda_loop, 1);
1482 *inductionvars = VEC_alloc (tree, 1);
1483 *invariants = VEC_alloc (tree, 1);
1484 lboundvars = VEC_alloc (tree, 1);
1485 uboundvars = VEC_alloc (tree, 1);
1486 steps = VEC_alloc (int, 1);
1487 temp = loop_nest;
1488 while (temp)
1490 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1491 &inductionvar, *inductionvars,
1492 &lboundvars, &uboundvars,
1493 &steps);
1494 if (!newloop)
1495 return NULL;
1496 VEC_safe_push (tree, *inductionvars, inductionvar);
1497 VEC_safe_push (lambda_loop, loops, newloop);
1498 temp = temp->inner;
1500 if (need_perfect_nest
1501 && !perfect_nestify (currloops, loop_nest,
1502 lboundvars, uboundvars, steps, *inductionvars))
1504 if (dump_file)
1505 fprintf (dump_file, "Not a perfect nest and couldn't convert to one.\n");
1506 return NULL;
1508 ret = lambda_loopnest_new (depth, 2 * depth);
1509 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1510 LN_LOOPS (ret)[i] = newloop;
1512 return ret;
1516 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1517 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1518 inserted for us are stored. INDUCTION_VARS is the array of induction
1519 variables for the loop this LBV is from. */
1521 static tree
1522 lbv_to_gcc_expression (lambda_body_vector lbv,
1523 VEC (tree) *induction_vars, tree * stmts_to_insert)
1525 tree stmts, stmt, resvar, name;
1526 size_t i;
1527 tree_stmt_iterator tsi;
1529 /* Create a statement list and a linear expression temporary. */
1530 stmts = alloc_stmt_list ();
1531 resvar = create_tmp_var (integer_type_node, "lbvtmp");
1532 add_referenced_tmp_var (resvar);
1534 /* Start at 0. */
1535 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1536 name = make_ssa_name (resvar, stmt);
1537 TREE_OPERAND (stmt, 0) = name;
1538 tsi = tsi_last (stmts);
1539 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1541 for (i = 0; i < VEC_length (tree ,induction_vars) ; i++)
1543 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1545 tree newname;
1547 /* newname = coefficient * induction_variable */
1548 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1549 fold (build (MULT_EXPR, integer_type_node,
1550 VEC_index (tree, induction_vars, i),
1551 build_int_cst (integer_type_node,
1552 LBV_COEFFICIENTS (lbv)[i]))));
1553 newname = make_ssa_name (resvar, stmt);
1554 TREE_OPERAND (stmt, 0) = newname;
1555 tsi = tsi_last (stmts);
1556 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1557 /* name = name + newname */
1558 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1559 build (PLUS_EXPR, integer_type_node, name, newname));
1560 name = make_ssa_name (resvar, stmt);
1561 TREE_OPERAND (stmt, 0) = name;
1562 tsi = tsi_last (stmts);
1563 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1567 /* Handle any denominator that occurs. */
1568 if (LBV_DENOMINATOR (lbv) != 1)
1570 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1571 build (CEIL_DIV_EXPR, integer_type_node,
1572 name, build_int_cst (integer_type_node,
1573 LBV_DENOMINATOR (lbv))));
1574 name = make_ssa_name (resvar, stmt);
1575 TREE_OPERAND (stmt, 0) = name;
1576 tsi = tsi_last (stmts);
1577 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1579 *stmts_to_insert = stmts;
1580 return name;
1583 /* Convert a linear expression from coefficient and constant form to a
1584 gcc tree.
1585 Return the tree that represents the final value of the expression.
1586 LLE is the linear expression to convert.
1587 OFFSET is the linear offset to apply to the expression.
1588 INDUCTION_VARS is a vector of induction variables for the loops.
1589 INVARIANTS is a vector of the loop nest invariants.
1590 WRAP specifies what tree code to wrap the results in, if there is more than
1591 one (it is either MAX_EXPR, or MIN_EXPR).
1592 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1593 statements that need to be inserted for the linear expression. */
1595 static tree
1596 lle_to_gcc_expression (lambda_linear_expression lle,
1597 lambda_linear_expression offset,
1598 VEC(tree) *induction_vars,
1599 VEC(tree) *invariants,
1600 enum tree_code wrap, tree * stmts_to_insert)
1602 tree stmts, stmt, resvar, name;
1603 size_t i;
1604 tree_stmt_iterator tsi;
1605 VEC(tree) *results;
1607 name = NULL_TREE;
1608 /* Create a statement list and a linear expression temporary. */
1609 stmts = alloc_stmt_list ();
1610 resvar = create_tmp_var (integer_type_node, "lletmp");
1611 add_referenced_tmp_var (resvar);
1612 results = VEC_alloc (tree, 1);
1614 /* Build up the linear expressions, and put the variable representing the
1615 result in the results array. */
1616 for (; lle != NULL; lle = LLE_NEXT (lle))
1618 /* Start at name = 0. */
1619 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1620 name = make_ssa_name (resvar, stmt);
1621 TREE_OPERAND (stmt, 0) = name;
1622 tsi = tsi_last (stmts);
1623 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1625 /* First do the induction variables.
1626 at the end, name = name + all the induction variables added
1627 together. */
1628 for (i = 0; i < VEC_length (tree ,induction_vars); i++)
1630 if (LLE_COEFFICIENTS (lle)[i] != 0)
1632 tree newname;
1633 tree mult;
1634 tree coeff;
1636 /* mult = induction variable * coefficient. */
1637 if (LLE_COEFFICIENTS (lle)[i] == 1)
1639 mult = VEC_index (tree, induction_vars, i);
1641 else
1643 coeff = build_int_cst (integer_type_node,
1644 LLE_COEFFICIENTS (lle)[i]);
1645 mult = fold (build (MULT_EXPR, integer_type_node,
1646 VEC_index (tree, induction_vars, i),
1647 coeff));
1650 /* newname = mult */
1651 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1652 newname = make_ssa_name (resvar, stmt);
1653 TREE_OPERAND (stmt, 0) = newname;
1654 tsi = tsi_last (stmts);
1655 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1657 /* name = name + newname */
1658 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1659 build (PLUS_EXPR, integer_type_node,
1660 name, newname));
1661 name = make_ssa_name (resvar, stmt);
1662 TREE_OPERAND (stmt, 0) = name;
1663 tsi = tsi_last (stmts);
1664 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1668 /* Handle our invariants.
1669 At the end, we have name = name + result of adding all multiplied
1670 invariants. */
1671 for (i = 0; i < VEC_length (tree, invariants); i++)
1673 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1675 tree newname;
1676 tree mult;
1677 tree coeff;
1679 /* mult = invariant * coefficient */
1680 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] == 1)
1682 mult = VEC_index (tree, invariants, i);
1684 else
1686 coeff = build_int_cst (integer_type_node,
1687 LLE_INVARIANT_COEFFICIENTS (lle)[i]);
1688 mult = fold (build (MULT_EXPR, integer_type_node,
1689 VEC_index (tree, invariants, i),
1690 coeff));
1693 /* newname = mult */
1694 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1695 newname = make_ssa_name (resvar, stmt);
1696 TREE_OPERAND (stmt, 0) = newname;
1697 tsi = tsi_last (stmts);
1698 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1700 /* name = name + newname */
1701 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1702 build (PLUS_EXPR, integer_type_node,
1703 name, newname));
1704 name = make_ssa_name (resvar, stmt);
1705 TREE_OPERAND (stmt, 0) = name;
1706 tsi = tsi_last (stmts);
1707 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1711 /* Now handle the constant.
1712 name = name + constant. */
1713 if (LLE_CONSTANT (lle) != 0)
1715 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1716 build (PLUS_EXPR, integer_type_node,
1717 name, build_int_cst (integer_type_node,
1718 LLE_CONSTANT (lle))));
1719 name = make_ssa_name (resvar, stmt);
1720 TREE_OPERAND (stmt, 0) = name;
1721 tsi = tsi_last (stmts);
1722 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1725 /* Now handle the offset.
1726 name = name + linear offset. */
1727 if (LLE_CONSTANT (offset) != 0)
1729 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1730 build (PLUS_EXPR, integer_type_node,
1731 name, build_int_cst (integer_type_node,
1732 LLE_CONSTANT (offset))));
1733 name = make_ssa_name (resvar, stmt);
1734 TREE_OPERAND (stmt, 0) = name;
1735 tsi = tsi_last (stmts);
1736 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1739 /* Handle any denominator that occurs. */
1740 if (LLE_DENOMINATOR (lle) != 1)
1742 if (wrap == MAX_EXPR)
1743 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1744 build (CEIL_DIV_EXPR, integer_type_node,
1745 name, build_int_cst (integer_type_node,
1746 LLE_DENOMINATOR (lle))));
1747 else if (wrap == MIN_EXPR)
1748 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1749 build (FLOOR_DIV_EXPR, integer_type_node,
1750 name, build_int_cst (integer_type_node,
1751 LLE_DENOMINATOR (lle))));
1752 else
1753 gcc_unreachable();
1755 /* name = {ceil, floor}(name/denominator) */
1756 name = make_ssa_name (resvar, stmt);
1757 TREE_OPERAND (stmt, 0) = name;
1758 tsi = tsi_last (stmts);
1759 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1761 VEC_safe_push (tree, results, name);
1764 /* Again, out of laziness, we don't handle this case yet. It's not
1765 hard, it just hasn't occurred. */
1766 gcc_assert (VEC_length (tree, results) <= 2);
1768 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1769 if (VEC_length (tree, results) > 1)
1771 tree op1 = VEC_index (tree, results, 0);
1772 tree op2 = VEC_index (tree, results, 1);
1773 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1774 build (wrap, integer_type_node, op1, op2));
1775 name = make_ssa_name (resvar, stmt);
1776 TREE_OPERAND (stmt, 0) = name;
1777 tsi = tsi_last (stmts);
1778 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1781 *stmts_to_insert = stmts;
1782 return name;
1785 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1786 it, back into gcc code. This changes the
1787 loops, their induction variables, and their bodies, so that they
1788 match the transformed loopnest.
1789 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1790 loopnest.
1791 OLD_IVS is a vector of induction variables from the old loopnest.
1792 INVARIANTS is a vector of loop invariants from the old loopnest.
1793 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1794 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1795 NEW_LOOPNEST. */
1796 void
1797 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1798 VEC(tree) *old_ivs,
1799 VEC(tree) *invariants,
1800 lambda_loopnest new_loopnest,
1801 lambda_trans_matrix transform)
1804 struct loop *temp;
1805 size_t i = 0;
1806 size_t depth = 0;
1807 VEC(tree) *new_ivs;
1808 block_stmt_iterator bsi;
1810 if (dump_file)
1812 transform = lambda_trans_matrix_inverse (transform);
1813 fprintf (dump_file, "Inverse of transformation matrix:\n");
1814 print_lambda_trans_matrix (dump_file, transform);
1816 temp = old_loopnest;
1817 new_ivs = VEC_alloc (tree, 1);
1818 while (temp)
1820 temp = temp->inner;
1821 depth++;
1823 temp = old_loopnest;
1825 while (temp)
1827 lambda_loop newloop;
1828 basic_block bb;
1829 tree ivvar, ivvarinced, exitcond, stmts;
1830 enum tree_code testtype;
1831 tree newupperbound, newlowerbound;
1832 lambda_linear_expression offset;
1833 /* First, build the new induction variable temporary */
1835 ivvar = create_tmp_var (integer_type_node, "lnivtmp");
1836 add_referenced_tmp_var (ivvar);
1838 VEC_safe_push (tree, new_ivs, ivvar);
1840 newloop = LN_LOOPS (new_loopnest)[i];
1842 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1843 cases for now. */
1844 offset = LL_LINEAR_OFFSET (newloop);
1846 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1847 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1849 /* Now build the new lower bounds, and insert the statements
1850 necessary to generate it on the loop preheader. */
1851 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1852 LL_LINEAR_OFFSET (newloop),
1853 new_ivs,
1854 invariants, MAX_EXPR, &stmts);
1855 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1856 bsi_commit_edge_inserts (NULL);
1857 /* Build the new upper bound and insert its statements in the
1858 basic block of the exit condition */
1859 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1860 LL_LINEAR_OFFSET (newloop),
1861 new_ivs,
1862 invariants, MIN_EXPR, &stmts);
1863 exitcond = get_loop_exit_condition (temp);
1864 bb = bb_for_stmt (exitcond);
1865 bsi = bsi_start (bb);
1866 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1868 /* Create the new iv, and insert it's increment on the latch
1869 block. */
1871 bb = temp->latch->pred->src;
1872 bsi = bsi_last (bb);
1873 create_iv (newlowerbound,
1874 build_int_cst (integer_type_node, LL_STEP (newloop)),
1875 ivvar, temp, &bsi, false, &ivvar,
1876 &ivvarinced);
1878 /* Replace the exit condition with the new upper bound
1879 comparison. */
1880 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1881 COND_EXPR_COND (exitcond) = build (testtype,
1882 boolean_type_node,
1883 ivvarinced, newupperbound);
1884 modify_stmt (exitcond);
1885 VEC_replace (tree, new_ivs, i, ivvar);
1887 i++;
1888 temp = temp->inner;
1891 /* Rewrite uses of the old ivs so that they are now specified in terms of
1892 the new ivs. */
1893 temp = old_loopnest;
1894 for (i = 0; i < VEC_length (tree, old_ivs); i++)
1896 int j;
1897 tree oldiv = VEC_index (tree, old_ivs, i);
1898 dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv));
1899 for (j = 0; j < num_immediate_uses (imm); j++)
1901 size_t k;
1902 tree stmt = immediate_use (imm, j);
1903 use_optype uses;
1904 get_stmt_operands (stmt);
1905 uses = STMT_USE_OPS (stmt);
1906 for (k = 0; k < NUM_USES (uses); k++)
1908 use_operand_p use = USE_OP_PTR (uses, k);
1909 if (USE_FROM_PTR (use) == oldiv)
1911 tree newiv, stmts;
1912 lambda_body_vector lbv;
1913 /* Compute the new expression for the induction
1914 variable. */
1915 depth = VEC_length (tree, new_ivs);
1916 lbv = lambda_body_vector_new (depth);
1917 LBV_COEFFICIENTS (lbv)[i] = 1;
1918 lbv = lambda_body_vector_compute_new (transform, lbv);
1919 newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts);
1920 bsi = stmt_for_bsi (stmt);
1921 /* Insert the statements to build that
1922 expression. */
1923 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1924 SET_USE (use, newiv);
1925 modify_stmt (stmt);
1934 /* Returns true when the vector V is lexicographically positive, in
1935 other words, when the first nonzero element is positive. */
1937 static bool
1938 lambda_vector_lexico_pos (lambda_vector v,
1939 unsigned n)
1941 unsigned i;
1942 for (i = 0; i < n; i++)
1944 if (v[i] == 0)
1945 continue;
1946 if (v[i] < 0)
1947 return false;
1948 if (v[i] > 0)
1949 return true;
1951 return true;
1955 /* Return TRUE if this is not interesting statement from the perspective of
1956 determining if we have a perfect loop nest. */
1958 static bool
1959 not_interesting_stmt (tree stmt)
1961 /* Note that COND_EXPR's aren't interesting because if they were exiting the
1962 loop, we would have already failed the number of exits tests. */
1963 if (TREE_CODE (stmt) == LABEL_EXPR
1964 || TREE_CODE (stmt) == GOTO_EXPR
1965 || TREE_CODE (stmt) == COND_EXPR)
1966 return true;
1967 return false;
1970 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
1972 static bool
1973 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1975 int i;
1976 for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1977 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1978 if (PHI_ARG_DEF (phi, i) == def)
1979 return true;
1980 return false;
1983 /* Return TRUE if STMT is a use of PHI_RESULT. */
1985 static bool
1986 stmt_uses_phi_result (tree stmt, tree phi_result)
1988 use_optype uses = STMT_USE_OPS (stmt);
1990 /* This is conservatively true, because we only want SIMPLE bumpers
1991 of the form x +- constant for our pass. */
1992 if (NUM_USES (uses) != 1)
1993 return false;
1994 if (USE_OP (uses, 0) == phi_result)
1995 return true;
1997 return false;
2000 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2001 in-loop-edge in a phi node, and the operand it uses is the result of that
2002 phi node.
2003 I.E. i_29 = i_3 + 1
2004 i_3 = PHI (0, i_29); */
2006 static bool
2007 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2009 tree use;
2010 tree def;
2011 def_optype defs = STMT_DEF_OPS (stmt);
2012 dataflow_t imm;
2013 int i;
2015 if (NUM_DEFS (defs) != 1)
2016 return false;
2017 def = DEF_OP (defs, 0);
2018 imm = get_immediate_uses (stmt);
2019 for (i = 0; i < num_immediate_uses (imm); i++)
2021 use = immediate_use (imm, i);
2022 if (TREE_CODE (use) == PHI_NODE)
2024 if (phi_loop_edge_uses_def (loop, use, def))
2025 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2026 return true;
2029 return false;
2031 /* Return true if LOOP is a perfect loop nest.
2032 Perfect loop nests are those loop nests where all code occurs in the
2033 innermost loop body.
2034 If S is a program statement, then
2036 i.e.
2037 DO I = 1, 20
2039 DO J = 1, 20
2041 END DO
2042 END DO
2043 is not a perfect loop nest because of S1.
2045 DO I = 1, 20
2046 DO J = 1, 20
2049 END DO
2050 END DO
2051 is a perfect loop nest.
2053 Since we don't have high level loops anymore, we basically have to walk our
2054 statements and ignore those that are there because the loop needs them (IE
2055 the induction variable increment, and jump back to the top of the loop). */
2057 bool
2058 perfect_nest_p (struct loop *loop)
2060 basic_block *bbs;
2061 size_t i;
2062 tree exit_cond;
2064 if (!loop->inner)
2065 return true;
2066 bbs = get_loop_body (loop);
2067 exit_cond = get_loop_exit_condition (loop);
2068 for (i = 0; i < loop->num_nodes; i++)
2070 if (bbs[i]->loop_father == loop)
2072 block_stmt_iterator bsi;
2073 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2075 tree stmt = bsi_stmt (bsi);
2076 if (stmt == exit_cond
2077 || not_interesting_stmt (stmt)
2078 || stmt_is_bumper_for_loop (loop, stmt))
2079 continue;
2080 free (bbs);
2081 return false;
2085 free (bbs);
2086 /* See if the inner loops are perfectly nested as well. */
2087 if (loop->inner)
2088 return perfect_nest_p (loop->inner);
2089 return true;
2093 /* Add phi args using PENDINT_STMT list. */
2095 static void
2096 nestify_update_pending_stmts (edge e)
2098 basic_block dest;
2099 tree phi, arg, def;
2101 if (!PENDING_STMT (e))
2102 return;
2104 dest = e->dest;
2106 for (phi = phi_nodes (dest), arg = PENDING_STMT (e);
2107 phi;
2108 phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg))
2110 def = TREE_VALUE (arg);
2111 add_phi_arg (&phi, def, e);
2114 PENDING_STMT (e) = NULL;
2117 /* Replace the USES of tree X in STMT with tree Y */
2119 static void
2120 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2122 use_optype uses = STMT_USE_OPS (stmt);
2123 size_t i;
2124 for (i = 0; i < NUM_USES (uses); i++)
2126 if (USE_OP (uses, i) == x)
2127 SET_USE_OP (uses, i, y);
2131 /* Return TRUE if STMT uses tree OP in it's uses. */
2133 static bool
2134 stmt_uses_op (tree stmt, tree op)
2136 use_optype uses = STMT_USE_OPS (stmt);
2137 size_t i;
2138 for (i = 0; i < NUM_USES (uses); i++)
2140 if (USE_OP (uses, i) == op)
2141 return true;
2143 return false;
2146 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2147 one. LOOPIVS is a vector of induction variables, one per loop.
2148 ATM, we only handle imperfect nests of depth 2, where all of the statements
2149 occur after the inner loop. */
2151 static bool
2152 can_convert_to_perfect_nest (struct loop *loop,
2153 VEC (tree) *loopivs)
2155 basic_block *bbs;
2156 tree exit_condition;
2157 size_t i;
2158 block_stmt_iterator bsi;
2160 /* Can't handle triply nested+ loops yet. */
2161 if (!loop->inner || loop->inner->inner)
2162 return false;
2164 /* We only handle moving the after-inner-body statements right now, so make
2165 sure all the statements we need to move are located in that position. */
2166 bbs = get_loop_body (loop);
2167 exit_condition = get_loop_exit_condition (loop);
2168 for (i = 0; i < loop->num_nodes; i++)
2170 if (bbs[i]->loop_father == loop)
2172 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2174 size_t j;
2175 tree stmt = bsi_stmt (bsi);
2176 if (stmt == exit_condition
2177 || not_interesting_stmt (stmt)
2178 || stmt_is_bumper_for_loop (loop, stmt))
2179 continue;
2180 /* If the statement uses inner loop ivs, we == screwed. */
2181 for (j = 1; j < VEC_length (tree, loopivs); j++)
2182 if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
2184 free (bbs);
2185 return false;
2188 /* If the bb of a statement we care about isn't dominated by
2189 the header of the inner loop, then we are also screwed. */
2190 if (!dominated_by_p (CDI_DOMINATORS,
2191 bb_for_stmt (stmt),
2192 loop->inner->header))
2194 free (bbs);
2195 return false;
2200 return true;
2203 /* Transform the loop nest into a perfect nest, if possible.
2204 LOOPS is the current struct loops *
2205 LOOP is the loop nest to transform into a perfect nest
2206 LBOUNDS are the lower bounds for the loops to transform
2207 UBOUNDS are the upper bounds for the loops to transform
2208 STEPS is the STEPS for the loops to transform.
2209 LOOPIVS is the induction variables for the loops to transform.
2211 Basically, for the case of
2213 FOR (i = 0; i < 50; i++)
2215 FOR (j =0; j < 50; j++)
2217 <whatever>
2219 <some code>
2222 This function will transform it into a perfect loop nest by splitting the
2223 outer loop into two loops, like so:
2225 FOR (i = 0; i < 50; i++)
2227 FOR (j = 0; j < 50; j++)
2229 <whatever>
2233 FOR (i = 0; i < 50; i ++)
2235 <some code>
2238 Return FALSE if we can't make this loop into a perfect nest. */
2239 static bool
2240 perfect_nestify (struct loops *loops,
2241 struct loop *loop,
2242 VEC (tree) *lbounds,
2243 VEC (tree) *ubounds,
2244 VEC (int) *steps,
2245 VEC (tree) *loopivs)
2247 basic_block *bbs;
2248 tree exit_condition;
2249 tree then_label, else_label, cond_stmt;
2250 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2251 size_t i;
2252 block_stmt_iterator bsi;
2253 edge e;
2254 struct loop *newloop;
2255 tree phi;
2256 tree uboundvar;
2257 tree stmt;
2258 tree ivvar, ivvarinced;
2259 VEC (tree) *phis;
2261 if (!can_convert_to_perfect_nest (loop, loopivs))
2262 return false;
2264 phis = VEC_alloc (tree, 1);
2266 /* Create the new loop */
2268 olddest = loop->single_exit->dest;
2269 preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2270 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2272 /* This is done because otherwise, it will release the ssa_name too early
2273 when the edge gets redirected and it will get reused, causing the use of
2274 the phi node to get rewritten. */
2276 for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2278 /* These should be simple exit phi copies. */
2279 if (PHI_NUM_ARGS (phi) != 1)
2280 return false;
2281 VEC_safe_push (tree, phis, PHI_RESULT (phi));
2282 VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
2283 mark_for_rewrite (PHI_RESULT (phi));
2285 e = redirect_edge_and_branch (preheaderbb->succ, headerbb);
2286 unmark_all_for_rewrite ();
2287 bb_ann (olddest)->phi_nodes = NULL;
2288 /* Add back the old exit phis. */
2289 while (VEC_length (tree, phis) != 0)
2291 tree def;
2292 tree phiname;
2293 def = VEC_pop (tree, phis);
2294 phiname = VEC_pop (tree, phis);
2296 phi = create_phi_node (phiname, preheaderbb);
2297 add_phi_arg (&phi, def, preheaderbb->pred);
2300 nestify_update_pending_stmts (e);
2301 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2302 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2303 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2304 then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2305 else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2306 cond_stmt = build (COND_EXPR, void_type_node,
2307 build (NE_EXPR, boolean_type_node,
2308 integer_one_node,
2309 integer_zero_node),
2310 then_label, else_label);
2311 bsi = bsi_start (bodybb);
2312 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2313 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2314 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2315 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2317 /* Update the loop structures. */
2318 newloop = duplicate_loop (loops, loop, olddest->loop_father);
2319 newloop->header = headerbb;
2320 newloop->latch = latchbb;
2321 newloop->single_exit = e;
2322 add_bb_to_loop (latchbb, newloop);
2323 add_bb_to_loop (bodybb, newloop);
2324 add_bb_to_loop (headerbb, newloop);
2325 add_bb_to_loop (preheaderbb, olddest->loop_father);
2326 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2327 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2328 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2329 loop->single_exit->src);
2330 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2331 set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2332 /* Create the new iv. */
2333 ivvar = create_tmp_var (integer_type_node, "perfectiv");
2334 add_referenced_tmp_var (ivvar);
2335 bsi = bsi_last (newloop->latch->pred->src);
2336 create_iv (VEC_index (tree, lbounds, 0),
2337 build_int_cst (integer_type_node,
2338 VEC_index (int, steps, 0)),
2339 ivvar, newloop, &bsi, false, &ivvar, &ivvarinced);
2341 /* Create the new upper bound. This may be not just a variable, so we copy
2342 it to one just in case. */
2344 exit_condition = get_loop_exit_condition (newloop);
2345 uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2346 add_referenced_tmp_var (uboundvar);
2347 stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2348 VEC_index (tree, ubounds, 0));
2349 uboundvar = make_ssa_name (uboundvar, stmt);
2350 TREE_OPERAND (stmt, 0) = uboundvar;
2351 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2352 COND_EXPR_COND (exit_condition) = build (LE_EXPR,
2353 boolean_type_node,
2354 ivvarinced,
2355 uboundvar);
2357 bbs = get_loop_body (loop);
2358 /* Now replace the induction variable in the moved statements with the
2359 correct loop induction variable. */
2360 for (i = 0; i < loop->num_nodes; i++)
2362 block_stmt_iterator tobsi = bsi_last (bodybb);
2363 if (bbs[i]->loop_father == loop)
2365 /* Note that the bsi only needs to be explicitly incremented
2366 when we don't move something, since it is automatically
2367 incremented when we do. */
2368 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2370 tree stmt = bsi_stmt (bsi);
2371 if (stmt == exit_condition
2372 || not_interesting_stmt (stmt)
2373 || stmt_is_bumper_for_loop (loop, stmt))
2375 bsi_next (&bsi);
2376 continue;
2378 replace_uses_of_x_with_y (stmt,
2379 VEC_index (tree, loopivs, 0),
2380 ivvar);
2381 bsi_move_before (&bsi, &tobsi);
2385 free (bbs);
2386 flow_loops_find (loops, LOOP_ALL);
2387 return perfect_nest_p (loop);
2390 /* Return true if TRANS is a legal transformation matrix that respects
2391 the dependence vectors in DISTS and DIRS. The conservative answer
2392 is false.
2394 "Wolfe proves that a unimodular transformation represented by the
2395 matrix T is legal when applied to a loop nest with a set of
2396 lexicographically non-negative distance vectors RDG if and only if
2397 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2398 i.e.: if and only if it transforms the lexicographically positive
2399 distance vectors to lexicographically positive vectors. Note that
2400 a unimodular matrix must transform the zero vector (and only it) to
2401 the zero vector." S.Muchnick. */
2403 bool
2404 lambda_transform_legal_p (lambda_trans_matrix trans,
2405 int nb_loops,
2406 varray_type dependence_relations)
2408 unsigned int i;
2409 lambda_vector distres;
2410 struct data_dependence_relation *ddr;
2412 #if defined ENABLE_CHECKING
2413 if (LTM_COLSIZE (trans) != nb_loops
2414 || LTM_ROWSIZE (trans) != nb_loops)
2415 abort ();
2416 #endif
2418 /* When there is an unknown relation in the dependence_relations, we
2419 know that it is no worth looking at this loop nest: give up. */
2420 ddr = (struct data_dependence_relation *)
2421 VARRAY_GENERIC_PTR (dependence_relations, 0);
2422 if (ddr == NULL)
2423 return true;
2424 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2425 return false;
2427 distres = lambda_vector_new (nb_loops);
2429 /* For each distance vector in the dependence graph. */
2430 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2432 ddr = (struct data_dependence_relation *)
2433 VARRAY_GENERIC_PTR (dependence_relations, i);
2437 /* Don't care about relations for which we know that there is no
2438 dependence, nor about read-read (aka. output-dependences):
2439 these data accesses can happen in any order. */
2440 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2441 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2442 continue;
2443 /* Conservatively answer: "this transformation is not valid". */
2444 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2445 return false;
2447 /* Compute trans.dist_vect */
2448 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2449 DDR_DIST_VECT (ddr), distres);
2451 if (!lambda_vector_lexico_pos (distres, nb_loops))
2452 return false;
2454 return true;