* configure.ac: (target_alias): Default to $host_alias, not
[official-gcc.git] / gcc / lambda-code.c
blobd564f431ea4c8371bbd330f0f47a163835d9deb8
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
46 /* This loop nest code generation is based on non-singular matrix
47 math.
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
54 A loop iteration space represents the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
57 of basis vectors.
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
119 DEF_VEC_GC_P(int);
121 static bool perfect_nestify (struct loops *,
122 struct loop *, VEC (tree) *,
123 VEC (tree) *, VEC (int) *, VEC (tree) *);
124 /* Lattice stuff that is internal to the code generation algorithm. */
126 typedef struct
128 /* Lattice base matrix. */
129 lambda_matrix base;
130 /* Lattice dimension. */
131 int dimension;
132 /* Origin vector for the coefficients. */
133 lambda_vector origin;
134 /* Origin matrix for the invariants. */
135 lambda_matrix origin_invariants;
136 /* Number of invariants. */
137 int invariants;
138 } *lambda_lattice;
140 #define LATTICE_BASE(T) ((T)->base)
141 #define LATTICE_DIMENSION(T) ((T)->dimension)
142 #define LATTICE_ORIGIN(T) ((T)->origin)
143 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
144 #define LATTICE_INVARIANTS(T) ((T)->invariants)
146 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
147 int, int);
148 static lambda_lattice lambda_lattice_new (int, int);
149 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
151 static tree find_induction_var_from_exit_cond (struct loop *);
153 /* Create a new lambda body vector. */
155 lambda_body_vector
156 lambda_body_vector_new (int size)
158 lambda_body_vector ret;
160 ret = ggc_alloc (sizeof (*ret));
161 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
162 LBV_SIZE (ret) = size;
163 LBV_DENOMINATOR (ret) = 1;
164 return ret;
167 /* Compute the new coefficients for the vector based on the
168 *inverse* of the transformation matrix. */
170 lambda_body_vector
171 lambda_body_vector_compute_new (lambda_trans_matrix transform,
172 lambda_body_vector vect)
174 lambda_body_vector temp;
175 int depth;
177 /* Make sure the matrix is square. */
178 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
180 depth = LTM_ROWSIZE (transform);
182 temp = lambda_body_vector_new (depth);
183 LBV_DENOMINATOR (temp) =
184 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
185 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
186 LTM_MATRIX (transform), depth,
187 LBV_COEFFICIENTS (temp));
188 LBV_SIZE (temp) = LBV_SIZE (vect);
189 return temp;
192 /* Print out a lambda body vector. */
194 void
195 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
197 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
200 /* Return TRUE if two linear expressions are equal. */
202 static bool
203 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
204 int depth, int invariants)
206 int i;
208 if (lle1 == NULL || lle2 == NULL)
209 return false;
210 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
211 return false;
212 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
213 return false;
214 for (i = 0; i < depth; i++)
215 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
216 return false;
217 for (i = 0; i < invariants; i++)
218 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
219 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
220 return false;
221 return true;
224 /* Create a new linear expression with dimension DIM, and total number
225 of invariants INVARIANTS. */
227 lambda_linear_expression
228 lambda_linear_expression_new (int dim, int invariants)
230 lambda_linear_expression ret;
232 ret = ggc_alloc_cleared (sizeof (*ret));
234 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
235 LLE_CONSTANT (ret) = 0;
236 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
237 LLE_DENOMINATOR (ret) = 1;
238 LLE_NEXT (ret) = NULL;
240 return ret;
243 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
244 The starting letter used for variable names is START. */
246 static void
247 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
248 char start)
250 int i;
251 bool first = true;
252 for (i = 0; i < size; i++)
254 if (expr[i] != 0)
256 if (first)
258 if (expr[i] < 0)
259 fprintf (outfile, "-");
260 first = false;
262 else if (expr[i] > 0)
263 fprintf (outfile, " + ");
264 else
265 fprintf (outfile, " - ");
266 if (abs (expr[i]) == 1)
267 fprintf (outfile, "%c", start + i);
268 else
269 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
274 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
275 depth/number of coefficients is given by DEPTH, the number of invariants is
276 given by INVARIANTS, and the character to start variable names with is given
277 by START. */
279 void
280 print_lambda_linear_expression (FILE * outfile,
281 lambda_linear_expression expr,
282 int depth, int invariants, char start)
284 fprintf (outfile, "\tLinear expression: ");
285 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
286 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
287 fprintf (outfile, " invariants: ");
288 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
289 invariants, 'A');
290 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
293 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
294 coefficients is given by DEPTH, the number of invariants is
295 given by INVARIANTS, and the character to start variable names with is given
296 by START. */
298 void
299 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
300 int invariants, char start)
302 int step;
303 lambda_linear_expression expr;
305 gcc_assert (loop);
307 expr = LL_LINEAR_OFFSET (loop);
308 step = LL_STEP (loop);
309 fprintf (outfile, " step size = %d \n", step);
311 if (expr)
313 fprintf (outfile, " linear offset: \n");
314 print_lambda_linear_expression (outfile, expr, depth, invariants,
315 start);
318 fprintf (outfile, " lower bound: \n");
319 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
320 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
321 fprintf (outfile, " upper bound: \n");
322 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
323 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
326 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
327 number of invariants. */
329 lambda_loopnest
330 lambda_loopnest_new (int depth, int invariants)
332 lambda_loopnest ret;
333 ret = ggc_alloc (sizeof (*ret));
335 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
336 LN_DEPTH (ret) = depth;
337 LN_INVARIANTS (ret) = invariants;
339 return ret;
342 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
343 character to use for loop names is given by START. */
345 void
346 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
348 int i;
349 for (i = 0; i < LN_DEPTH (nest); i++)
351 fprintf (outfile, "Loop %c\n", start + i);
352 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
353 LN_INVARIANTS (nest), 'i');
354 fprintf (outfile, "\n");
358 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
359 of invariants. */
361 static lambda_lattice
362 lambda_lattice_new (int depth, int invariants)
364 lambda_lattice ret;
365 ret = ggc_alloc (sizeof (*ret));
366 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
367 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
368 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
369 LATTICE_DIMENSION (ret) = depth;
370 LATTICE_INVARIANTS (ret) = invariants;
371 return ret;
374 /* Compute the lattice base for NEST. The lattice base is essentially a
375 non-singular transform from a dense base space to a sparse iteration space.
376 We use it so that we don't have to specially handle the case of a sparse
377 iteration space in other parts of the algorithm. As a result, this routine
378 only does something interesting (IE produce a matrix that isn't the
379 identity matrix) if NEST is a sparse space. */
381 static lambda_lattice
382 lambda_lattice_compute_base (lambda_loopnest nest)
384 lambda_lattice ret;
385 int depth, invariants;
386 lambda_matrix base;
388 int i, j, step;
389 lambda_loop loop;
390 lambda_linear_expression expression;
392 depth = LN_DEPTH (nest);
393 invariants = LN_INVARIANTS (nest);
395 ret = lambda_lattice_new (depth, invariants);
396 base = LATTICE_BASE (ret);
397 for (i = 0; i < depth; i++)
399 loop = LN_LOOPS (nest)[i];
400 gcc_assert (loop);
401 step = LL_STEP (loop);
402 /* If we have a step of 1, then the base is one, and the
403 origin and invariant coefficients are 0. */
404 if (step == 1)
406 for (j = 0; j < depth; j++)
407 base[i][j] = 0;
408 base[i][i] = 1;
409 LATTICE_ORIGIN (ret)[i] = 0;
410 for (j = 0; j < invariants; j++)
411 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
413 else
415 /* Otherwise, we need the lower bound expression (which must
416 be an affine function) to determine the base. */
417 expression = LL_LOWER_BOUND (loop);
418 gcc_assert (expression && !LLE_NEXT (expression)
419 && LLE_DENOMINATOR (expression) == 1);
421 /* The lower triangular portion of the base is going to be the
422 coefficient times the step */
423 for (j = 0; j < i; j++)
424 base[i][j] = LLE_COEFFICIENTS (expression)[j]
425 * LL_STEP (LN_LOOPS (nest)[j]);
426 base[i][i] = step;
427 for (j = i + 1; j < depth; j++)
428 base[i][j] = 0;
430 /* Origin for this loop is the constant of the lower bound
431 expression. */
432 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
434 /* Coefficient for the invariants are equal to the invariant
435 coefficients in the expression. */
436 for (j = 0; j < invariants; j++)
437 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
438 LLE_INVARIANT_COEFFICIENTS (expression)[j];
441 return ret;
444 /* Compute the greatest common denominator of two numbers (A and B) using
445 Euclid's algorithm. */
447 static int
448 gcd (int a, int b)
451 int x, y, z;
453 x = abs (a);
454 y = abs (b);
456 while (x > 0)
458 z = y % x;
459 y = x;
460 x = z;
463 return (y);
466 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
468 static int
469 gcd_vector (lambda_vector vector, int size)
471 int i;
472 int gcd1 = 0;
474 if (size > 0)
476 gcd1 = vector[0];
477 for (i = 1; i < size; i++)
478 gcd1 = gcd (gcd1, vector[i]);
480 return gcd1;
483 /* Compute the least common multiple of two numbers A and B . */
485 static int
486 lcm (int a, int b)
488 return (abs (a) * abs (b) / gcd (a, b));
491 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
492 auxillary nest.
493 Fourier-Motzkin is a way of reducing systems of linear inequalities so that
494 it is easy to calculate the answer and bounds.
495 A sketch of how it works:
496 Given a system of linear inequalities, ai * xj >= bk, you can always
497 rewrite the constraints so they are all of the form
498 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
499 in b1 ... bk, and some a in a1...ai)
500 You can then eliminate this x from the non-constant inequalities by
501 rewriting these as a <= b, x >= constant, and delete the x variable.
502 You can then repeat this for any remaining x variables, and then we have
503 an easy to use variable <= constant (or no variables at all) form that we
504 can construct our bounds from.
506 In our case, each time we eliminate, we construct part of the bound from
507 the ith variable, then delete the ith variable.
509 Remember the constant are in our vector a, our coefficient matrix is A,
510 and our invariant coefficient matrix is B.
512 SIZE is the size of the matrices being passed.
513 DEPTH is the loop nest depth.
514 INVARIANTS is the number of loop invariants.
515 A, B, and a are the coefficient matrix, invariant coefficient, and a
516 vector of constants, respectively. */
518 static lambda_loopnest
519 compute_nest_using_fourier_motzkin (int size,
520 int depth,
521 int invariants,
522 lambda_matrix A,
523 lambda_matrix B,
524 lambda_vector a)
527 int multiple, f1, f2;
528 int i, j, k;
529 lambda_linear_expression expression;
530 lambda_loop loop;
531 lambda_loopnest auxillary_nest;
532 lambda_matrix swapmatrix, A1, B1;
533 lambda_vector swapvector, a1;
534 int newsize;
536 A1 = lambda_matrix_new (128, depth);
537 B1 = lambda_matrix_new (128, invariants);
538 a1 = lambda_vector_new (128);
540 auxillary_nest = lambda_loopnest_new (depth, invariants);
542 for (i = depth - 1; i >= 0; i--)
544 loop = lambda_loop_new ();
545 LN_LOOPS (auxillary_nest)[i] = loop;
546 LL_STEP (loop) = 1;
548 for (j = 0; j < size; j++)
550 if (A[j][i] < 0)
552 /* Any linear expression in the matrix with a coefficient less
553 than 0 becomes part of the new lower bound. */
554 expression = lambda_linear_expression_new (depth, invariants);
556 for (k = 0; k < i; k++)
557 LLE_COEFFICIENTS (expression)[k] = A[j][k];
559 for (k = 0; k < invariants; k++)
560 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
562 LLE_DENOMINATOR (expression) = -1 * A[j][i];
563 LLE_CONSTANT (expression) = -1 * a[j];
565 /* Ignore if identical to the existing lower bound. */
566 if (!lle_equal (LL_LOWER_BOUND (loop),
567 expression, depth, invariants))
569 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
570 LL_LOWER_BOUND (loop) = expression;
574 else if (A[j][i] > 0)
576 /* Any linear expression with a coefficient greater than 0
577 becomes part of the new upper bound. */
578 expression = lambda_linear_expression_new (depth, invariants);
579 for (k = 0; k < i; k++)
580 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
582 for (k = 0; k < invariants; k++)
583 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
585 LLE_DENOMINATOR (expression) = A[j][i];
586 LLE_CONSTANT (expression) = a[j];
588 /* Ignore if identical to the existing upper bound. */
589 if (!lle_equal (LL_UPPER_BOUND (loop),
590 expression, depth, invariants))
592 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
593 LL_UPPER_BOUND (loop) = expression;
599 /* This portion creates a new system of linear inequalities by deleting
600 the i'th variable, reducing the system by one variable. */
601 newsize = 0;
602 for (j = 0; j < size; j++)
604 /* If the coefficient for the i'th variable is 0, then we can just
605 eliminate the variable straightaway. Otherwise, we have to
606 multiply through by the coefficients we are eliminating. */
607 if (A[j][i] == 0)
609 lambda_vector_copy (A[j], A1[newsize], depth);
610 lambda_vector_copy (B[j], B1[newsize], invariants);
611 a1[newsize] = a[j];
612 newsize++;
614 else if (A[j][i] > 0)
616 for (k = 0; k < size; k++)
618 if (A[k][i] < 0)
620 multiple = lcm (A[j][i], A[k][i]);
621 f1 = multiple / A[j][i];
622 f2 = -1 * multiple / A[k][i];
624 lambda_vector_add_mc (A[j], f1, A[k], f2,
625 A1[newsize], depth);
626 lambda_vector_add_mc (B[j], f1, B[k], f2,
627 B1[newsize], invariants);
628 a1[newsize] = f1 * a[j] + f2 * a[k];
629 newsize++;
635 swapmatrix = A;
636 A = A1;
637 A1 = swapmatrix;
639 swapmatrix = B;
640 B = B1;
641 B1 = swapmatrix;
643 swapvector = a;
644 a = a1;
645 a1 = swapvector;
647 size = newsize;
650 return auxillary_nest;
653 /* Compute the loop bounds for the auxiliary space NEST.
654 Input system used is Ax <= b. TRANS is the unimodular transformation. */
656 static lambda_loopnest
657 lambda_compute_auxillary_space (lambda_loopnest nest,
658 lambda_trans_matrix trans)
660 lambda_matrix A, B, A1, B1;
661 lambda_vector a, a1;
662 lambda_matrix invertedtrans;
663 int determinant, depth, invariants, size;
664 int i, j;
665 lambda_loop loop;
666 lambda_linear_expression expression;
667 lambda_lattice lattice;
669 depth = LN_DEPTH (nest);
670 invariants = LN_INVARIANTS (nest);
672 /* Unfortunately, we can't know the number of constraints we'll have
673 ahead of time, but this should be enough even in ridiculous loop nest
674 cases. We abort if we go over this limit. */
675 A = lambda_matrix_new (128, depth);
676 B = lambda_matrix_new (128, invariants);
677 a = lambda_vector_new (128);
679 A1 = lambda_matrix_new (128, depth);
680 B1 = lambda_matrix_new (128, invariants);
681 a1 = lambda_vector_new (128);
683 /* Store the bounds in the equation matrix A, constant vector a, and
684 invariant matrix B, so that we have Ax <= a + B.
685 This requires a little equation rearranging so that everything is on the
686 correct side of the inequality. */
687 size = 0;
688 for (i = 0; i < depth; i++)
690 loop = LN_LOOPS (nest)[i];
692 /* First we do the lower bound. */
693 if (LL_STEP (loop) > 0)
694 expression = LL_LOWER_BOUND (loop);
695 else
696 expression = LL_UPPER_BOUND (loop);
698 for (; expression != NULL; expression = LLE_NEXT (expression))
700 /* Fill in the coefficient. */
701 for (j = 0; j < i; j++)
702 A[size][j] = LLE_COEFFICIENTS (expression)[j];
704 /* And the invariant coefficient. */
705 for (j = 0; j < invariants; j++)
706 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
708 /* And the constant. */
709 a[size] = LLE_CONSTANT (expression);
711 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
712 constants and single variables on */
713 A[size][i] = -1 * LLE_DENOMINATOR (expression);
714 a[size] *= -1;
715 for (j = 0; j < invariants; j++)
716 B[size][j] *= -1;
718 size++;
719 /* Need to increase matrix sizes above. */
720 gcc_assert (size <= 127);
724 /* Then do the exact same thing for the upper bounds. */
725 if (LL_STEP (loop) > 0)
726 expression = LL_UPPER_BOUND (loop);
727 else
728 expression = LL_LOWER_BOUND (loop);
730 for (; expression != NULL; expression = LLE_NEXT (expression))
732 /* Fill in the coefficient. */
733 for (j = 0; j < i; j++)
734 A[size][j] = LLE_COEFFICIENTS (expression)[j];
736 /* And the invariant coefficient. */
737 for (j = 0; j < invariants; j++)
738 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
740 /* And the constant. */
741 a[size] = LLE_CONSTANT (expression);
743 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
744 for (j = 0; j < i; j++)
745 A[size][j] *= -1;
746 A[size][i] = LLE_DENOMINATOR (expression);
747 size++;
748 /* Need to increase matrix sizes above. */
749 gcc_assert (size <= 127);
754 /* Compute the lattice base x = base * y + origin, where y is the
755 base space. */
756 lattice = lambda_lattice_compute_base (nest);
758 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
760 /* A1 = A * L */
761 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
763 /* a1 = a - A * origin constant. */
764 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
765 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
767 /* B1 = B - A * origin invariant. */
768 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
769 invariants);
770 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
772 /* Now compute the auxiliary space bounds by first inverting U, multiplying
773 it by A1, then performing fourier motzkin. */
775 invertedtrans = lambda_matrix_new (depth, depth);
777 /* Compute the inverse of U. */
778 determinant = lambda_matrix_inverse (LTM_MATRIX (trans),
779 invertedtrans, depth);
781 /* A = A1 inv(U). */
782 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
784 return compute_nest_using_fourier_motzkin (size, depth, invariants,
785 A, B1, a1);
788 /* Compute the loop bounds for the target space, using the bounds of
789 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is
790 done by matrix multiplication and then transformation of the new matrix
791 back into linear expression form.
792 Return the target loopnest. */
794 static lambda_loopnest
795 lambda_compute_target_space (lambda_loopnest auxillary_nest,
796 lambda_trans_matrix H, lambda_vector stepsigns)
798 lambda_matrix inverse, H1;
799 int determinant, i, j;
800 int gcd1, gcd2;
801 int factor;
803 lambda_loopnest target_nest;
804 int depth, invariants;
805 lambda_matrix target;
807 lambda_loop auxillary_loop, target_loop;
808 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
810 depth = LN_DEPTH (auxillary_nest);
811 invariants = LN_INVARIANTS (auxillary_nest);
813 inverse = lambda_matrix_new (depth, depth);
814 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
816 /* H1 is H excluding its diagonal. */
817 H1 = lambda_matrix_new (depth, depth);
818 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
820 for (i = 0; i < depth; i++)
821 H1[i][i] = 0;
823 /* Computes the linear offsets of the loop bounds. */
824 target = lambda_matrix_new (depth, depth);
825 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
827 target_nest = lambda_loopnest_new (depth, invariants);
829 for (i = 0; i < depth; i++)
832 /* Get a new loop structure. */
833 target_loop = lambda_loop_new ();
834 LN_LOOPS (target_nest)[i] = target_loop;
836 /* Computes the gcd of the coefficients of the linear part. */
837 gcd1 = gcd_vector (target[i], i);
839 /* Include the denominator in the GCD. */
840 gcd1 = gcd (gcd1, determinant);
842 /* Now divide through by the gcd. */
843 for (j = 0; j < i; j++)
844 target[i][j] = target[i][j] / gcd1;
846 expression = lambda_linear_expression_new (depth, invariants);
847 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
848 LLE_DENOMINATOR (expression) = determinant / gcd1;
849 LLE_CONSTANT (expression) = 0;
850 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
851 invariants);
852 LL_LINEAR_OFFSET (target_loop) = expression;
855 /* For each loop, compute the new bounds from H. */
856 for (i = 0; i < depth; i++)
858 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
859 target_loop = LN_LOOPS (target_nest)[i];
860 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
861 factor = LTM_MATRIX (H)[i][i];
863 /* First we do the lower bound. */
864 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
866 for (; auxillary_expr != NULL;
867 auxillary_expr = LLE_NEXT (auxillary_expr))
869 target_expr = lambda_linear_expression_new (depth, invariants);
870 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
871 depth, inverse, depth,
872 LLE_COEFFICIENTS (target_expr));
873 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
874 LLE_COEFFICIENTS (target_expr), depth,
875 factor);
877 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
878 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
879 LLE_INVARIANT_COEFFICIENTS (target_expr),
880 invariants);
881 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
882 LLE_INVARIANT_COEFFICIENTS (target_expr),
883 invariants, factor);
884 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
886 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
888 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
889 * determinant;
890 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
891 (target_expr),
892 LLE_INVARIANT_COEFFICIENTS
893 (target_expr), invariants,
894 determinant);
895 LLE_DENOMINATOR (target_expr) =
896 LLE_DENOMINATOR (target_expr) * determinant;
898 /* Find the gcd and divide by it here, rather than doing it
899 at the tree level. */
900 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
901 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
902 invariants);
903 gcd1 = gcd (gcd1, gcd2);
904 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
905 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
906 for (j = 0; j < depth; j++)
907 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
908 for (j = 0; j < invariants; j++)
909 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
910 LLE_CONSTANT (target_expr) /= gcd1;
911 LLE_DENOMINATOR (target_expr) /= gcd1;
912 /* Ignore if identical to existing bound. */
913 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
914 invariants))
916 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
917 LL_LOWER_BOUND (target_loop) = target_expr;
920 /* Now do the upper bound. */
921 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
923 for (; auxillary_expr != NULL;
924 auxillary_expr = LLE_NEXT (auxillary_expr))
926 target_expr = lambda_linear_expression_new (depth, invariants);
927 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
928 depth, inverse, depth,
929 LLE_COEFFICIENTS (target_expr));
930 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
931 LLE_COEFFICIENTS (target_expr), depth,
932 factor);
933 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
934 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
935 LLE_INVARIANT_COEFFICIENTS (target_expr),
936 invariants);
937 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
938 LLE_INVARIANT_COEFFICIENTS (target_expr),
939 invariants, factor);
940 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
942 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
944 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
945 * determinant;
946 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
947 (target_expr),
948 LLE_INVARIANT_COEFFICIENTS
949 (target_expr), invariants,
950 determinant);
951 LLE_DENOMINATOR (target_expr) =
952 LLE_DENOMINATOR (target_expr) * determinant;
954 /* Find the gcd and divide by it here, instead of at the
955 tree level. */
956 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
957 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
958 invariants);
959 gcd1 = gcd (gcd1, gcd2);
960 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
961 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
962 for (j = 0; j < depth; j++)
963 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
964 for (j = 0; j < invariants; j++)
965 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
966 LLE_CONSTANT (target_expr) /= gcd1;
967 LLE_DENOMINATOR (target_expr) /= gcd1;
968 /* Ignore if equal to existing bound. */
969 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
970 invariants))
972 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
973 LL_UPPER_BOUND (target_loop) = target_expr;
977 for (i = 0; i < depth; i++)
979 target_loop = LN_LOOPS (target_nest)[i];
980 /* If necessary, exchange the upper and lower bounds and negate
981 the step size. */
982 if (stepsigns[i] < 0)
984 LL_STEP (target_loop) *= -1;
985 tmp_expr = LL_LOWER_BOUND (target_loop);
986 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
987 LL_UPPER_BOUND (target_loop) = tmp_expr;
990 return target_nest;
993 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
994 result. */
996 static lambda_vector
997 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
999 lambda_matrix matrix, H;
1000 int size;
1001 lambda_vector newsteps;
1002 int i, j, factor, minimum_column;
1003 int temp;
1005 matrix = LTM_MATRIX (trans);
1006 size = LTM_ROWSIZE (trans);
1007 H = lambda_matrix_new (size, size);
1009 newsteps = lambda_vector_new (size);
1010 lambda_vector_copy (stepsigns, newsteps, size);
1012 lambda_matrix_copy (matrix, H, size, size);
1014 for (j = 0; j < size; j++)
1016 lambda_vector row;
1017 row = H[j];
1018 for (i = j; i < size; i++)
1019 if (row[i] < 0)
1020 lambda_matrix_col_negate (H, size, i);
1021 while (lambda_vector_first_nz (row, size, j + 1) < size)
1023 minimum_column = lambda_vector_min_nz (row, size, j);
1024 lambda_matrix_col_exchange (H, size, j, minimum_column);
1026 temp = newsteps[j];
1027 newsteps[j] = newsteps[minimum_column];
1028 newsteps[minimum_column] = temp;
1030 for (i = j + 1; i < size; i++)
1032 factor = row[i] / row[j];
1033 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1037 return newsteps;
1040 /* Transform NEST according to TRANS, and return the new loopnest.
1041 This involves
1042 1. Computing a lattice base for the transformation
1043 2. Composing the dense base with the specified transformation (TRANS)
1044 3. Decomposing the combined transformation into a lower triangular portion,
1045 and a unimodular portion.
1046 4. Computing the auxillary nest using the unimodular portion.
1047 5. Computing the target nest using the auxillary nest and the lower
1048 triangular portion. */
1050 lambda_loopnest
1051 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1053 lambda_loopnest auxillary_nest, target_nest;
1055 int depth, invariants;
1056 int i, j;
1057 lambda_lattice lattice;
1058 lambda_trans_matrix trans1, H, U;
1059 lambda_loop loop;
1060 lambda_linear_expression expression;
1061 lambda_vector origin;
1062 lambda_matrix origin_invariants;
1063 lambda_vector stepsigns;
1064 int f;
1066 depth = LN_DEPTH (nest);
1067 invariants = LN_INVARIANTS (nest);
1069 /* Keep track of the signs of the loop steps. */
1070 stepsigns = lambda_vector_new (depth);
1071 for (i = 0; i < depth; i++)
1073 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1074 stepsigns[i] = 1;
1075 else
1076 stepsigns[i] = -1;
1079 /* Compute the lattice base. */
1080 lattice = lambda_lattice_compute_base (nest);
1081 trans1 = lambda_trans_matrix_new (depth, depth);
1083 /* Multiply the transformation matrix by the lattice base. */
1085 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1086 LTM_MATRIX (trans1), depth, depth, depth);
1088 /* Compute the Hermite normal form for the new transformation matrix. */
1089 H = lambda_trans_matrix_new (depth, depth);
1090 U = lambda_trans_matrix_new (depth, depth);
1091 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1092 LTM_MATRIX (U));
1094 /* Compute the auxiliary loop nest's space from the unimodular
1095 portion. */
1096 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1098 /* Compute the loop step signs from the old step signs and the
1099 transformation matrix. */
1100 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1102 /* Compute the target loop nest space from the auxiliary nest and
1103 the lower triangular matrix H. */
1104 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1105 origin = lambda_vector_new (depth);
1106 origin_invariants = lambda_matrix_new (depth, invariants);
1107 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1108 LATTICE_ORIGIN (lattice), origin);
1109 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1110 origin_invariants, depth, depth, invariants);
1112 for (i = 0; i < depth; i++)
1114 loop = LN_LOOPS (target_nest)[i];
1115 expression = LL_LINEAR_OFFSET (loop);
1116 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1117 f = 1;
1118 else
1119 f = LLE_DENOMINATOR (expression);
1121 LLE_CONSTANT (expression) += f * origin[i];
1123 for (j = 0; j < invariants; j++)
1124 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1125 f * origin_invariants[i][j];
1128 return target_nest;
1132 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1133 return the new expression. DEPTH is the depth of the loopnest.
1134 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1135 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1136 is the amount we have to add/subtract from the expression because of the
1137 type of comparison it is used in. */
1139 static lambda_linear_expression
1140 gcc_tree_to_linear_expression (int depth, tree expr,
1141 VEC(tree) *outerinductionvars,
1142 VEC(tree) *invariants, int extra)
1144 lambda_linear_expression lle = NULL;
1145 switch (TREE_CODE (expr))
1147 case INTEGER_CST:
1149 lle = lambda_linear_expression_new (depth, 2 * depth);
1150 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1151 if (extra != 0)
1152 LLE_CONSTANT (lle) += extra;
1154 LLE_DENOMINATOR (lle) = 1;
1156 break;
1157 case SSA_NAME:
1159 tree iv, invar;
1160 size_t i;
1161 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1162 if (iv != NULL)
1164 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1166 lle = lambda_linear_expression_new (depth, 2 * depth);
1167 LLE_COEFFICIENTS (lle)[i] = 1;
1168 if (extra != 0)
1169 LLE_CONSTANT (lle) = extra;
1171 LLE_DENOMINATOR (lle) = 1;
1174 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1175 if (invar != NULL)
1177 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1179 lle = lambda_linear_expression_new (depth, 2 * depth);
1180 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1181 if (extra != 0)
1182 LLE_CONSTANT (lle) = extra;
1183 LLE_DENOMINATOR (lle) = 1;
1187 break;
1188 default:
1189 return NULL;
1192 return lle;
1195 /* Return the depth of the loopnest NEST */
1197 static int
1198 depth_of_nest (struct loop *nest)
1200 size_t depth = 0;
1201 while (nest)
1203 depth++;
1204 nest = nest->inner;
1206 return depth;
1210 /* Return true if OP is invariant in LOOP and all outer loops. */
1212 static bool
1213 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1215 if (is_gimple_min_invariant (op))
1216 return true;
1217 if (loop->depth == 0)
1218 return true;
1219 if (!expr_invariant_in_loop_p (loop, op))
1220 return false;
1221 if (loop->outer
1222 && !invariant_in_loop_and_outer_loops (loop->outer, op))
1223 return false;
1224 return true;
1227 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1228 or NULL if it could not be converted.
1229 DEPTH is the depth of the loop.
1230 INVARIANTS is a pointer to the array of loop invariants.
1231 The induction variable for this loop should be stored in the parameter
1232 OURINDUCTIONVAR.
1233 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1235 static lambda_loop
1236 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1237 VEC (tree) ** invariants,
1238 tree * ourinductionvar,
1239 VEC (tree) * outerinductionvars,
1240 VEC (tree) ** lboundvars,
1241 VEC (tree) ** uboundvars,
1242 VEC (int) ** steps)
1244 tree phi;
1245 tree exit_cond;
1246 tree access_fn, inductionvar;
1247 tree step;
1248 lambda_loop lloop = NULL;
1249 lambda_linear_expression lbound, ubound;
1250 tree test;
1251 int stepint;
1252 int extra = 0;
1253 tree lboundvar, uboundvar, uboundresult;
1254 use_optype uses;
1256 /* Find out induction var and exit condition. */
1257 inductionvar = find_induction_var_from_exit_cond (loop);
1258 exit_cond = get_loop_exit_condition (loop);
1260 if (inductionvar == NULL || exit_cond == NULL)
1262 if (dump_file && (dump_flags & TDF_DETAILS))
1263 fprintf (dump_file,
1264 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1265 return NULL;
1268 test = TREE_OPERAND (exit_cond, 0);
1270 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1273 if (dump_file && (dump_flags & TDF_DETAILS))
1274 fprintf (dump_file,
1275 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1277 return NULL;
1280 phi = SSA_NAME_DEF_STMT (inductionvar);
1281 if (TREE_CODE (phi) != PHI_NODE)
1283 get_stmt_operands (phi);
1284 uses = STMT_USE_OPS (phi);
1286 if (!uses)
1289 if (dump_file && (dump_flags & TDF_DETAILS))
1290 fprintf (dump_file,
1291 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1293 return NULL;
1296 phi = USE_OP (uses, 0);
1297 phi = SSA_NAME_DEF_STMT (phi);
1298 if (TREE_CODE (phi) != PHI_NODE)
1301 if (dump_file && (dump_flags & TDF_DETAILS))
1302 fprintf (dump_file,
1303 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1304 return NULL;
1309 /* The induction variable name/version we want to put in the array is the
1310 result of the induction variable phi node. */
1311 *ourinductionvar = PHI_RESULT (phi);
1312 access_fn = instantiate_parameters
1313 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1314 if (access_fn == chrec_dont_know)
1316 if (dump_file && (dump_flags & TDF_DETAILS))
1317 fprintf (dump_file,
1318 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1320 return NULL;
1323 step = evolution_part_in_loop_num (access_fn, loop->num);
1324 if (!step || step == chrec_dont_know)
1326 if (dump_file && (dump_flags & TDF_DETAILS))
1327 fprintf (dump_file,
1328 "Unable to convert loop: Cannot determine step of loop.\n");
1330 return NULL;
1332 if (TREE_CODE (step) != INTEGER_CST)
1335 if (dump_file && (dump_flags & TDF_DETAILS))
1336 fprintf (dump_file,
1337 "Unable to convert loop: Step of loop is not integer.\n");
1338 return NULL;
1341 stepint = TREE_INT_CST_LOW (step);
1343 /* Only want phis for induction vars, which will have two
1344 arguments. */
1345 if (PHI_NUM_ARGS (phi) != 2)
1347 if (dump_file && (dump_flags & TDF_DETAILS))
1348 fprintf (dump_file,
1349 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1350 return NULL;
1353 /* Another induction variable check. One argument's source should be
1354 in the loop, one outside the loop. */
1355 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1356 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1359 if (dump_file && (dump_flags & TDF_DETAILS))
1360 fprintf (dump_file,
1361 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1363 return NULL;
1366 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1368 lboundvar = PHI_ARG_DEF (phi, 1);
1369 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1370 outerinductionvars, *invariants,
1373 else
1375 lboundvar = PHI_ARG_DEF (phi, 0);
1376 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1377 outerinductionvars, *invariants,
1381 if (!lbound)
1384 if (dump_file && (dump_flags & TDF_DETAILS))
1385 fprintf (dump_file,
1386 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1388 return NULL;
1390 /* One part of the test may be a loop invariant tree. */
1391 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1392 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1393 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
1394 else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1395 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1396 VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
1398 /* The non-induction variable part of the test is the upper bound variable.
1400 if (TREE_OPERAND (test, 0) == inductionvar)
1401 uboundvar = TREE_OPERAND (test, 1);
1402 else
1403 uboundvar = TREE_OPERAND (test, 0);
1406 /* We only size the vectors assuming we have, at max, 2 times as many
1407 invariants as we do loops (one for each bound).
1408 This is just an arbitrary number, but it has to be matched against the
1409 code below. */
1410 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1413 /* We might have some leftover. */
1414 if (TREE_CODE (test) == LT_EXPR)
1415 extra = -1 * stepint;
1416 else if (TREE_CODE (test) == NE_EXPR)
1417 extra = -1 * stepint;
1418 else if (TREE_CODE (test) == GT_EXPR)
1419 extra = -1 * stepint;
1420 else if (TREE_CODE (test) == EQ_EXPR)
1421 extra = 1 * stepint;
1423 ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1424 outerinductionvars,
1425 *invariants, extra);
1426 uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1427 build_int_cst (TREE_TYPE (uboundvar), extra));
1428 VEC_safe_push (tree, *uboundvars, uboundresult);
1429 VEC_safe_push (tree, *lboundvars, lboundvar);
1430 VEC_safe_push (int, *steps, stepint);
1431 if (!ubound)
1433 if (dump_file && (dump_flags & TDF_DETAILS))
1434 fprintf (dump_file,
1435 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1436 return NULL;
1439 lloop = lambda_loop_new ();
1440 LL_STEP (lloop) = stepint;
1441 LL_LOWER_BOUND (lloop) = lbound;
1442 LL_UPPER_BOUND (lloop) = ubound;
1443 return lloop;
1446 /* Given a LOOP, find the induction variable it is testing against in the exit
1447 condition. Return the induction variable if found, NULL otherwise. */
1449 static tree
1450 find_induction_var_from_exit_cond (struct loop *loop)
1452 tree expr = get_loop_exit_condition (loop);
1453 tree ivarop;
1454 tree test;
1455 if (expr == NULL_TREE)
1456 return NULL_TREE;
1457 if (TREE_CODE (expr) != COND_EXPR)
1458 return NULL_TREE;
1459 test = TREE_OPERAND (expr, 0);
1460 if (!COMPARISON_CLASS_P (test))
1461 return NULL_TREE;
1463 /* Find the side that is invariant in this loop. The ivar must be the other
1464 side. */
1466 if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1467 ivarop = TREE_OPERAND (test, 1);
1468 else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1469 ivarop = TREE_OPERAND (test, 0);
1470 else
1471 return NULL_TREE;
1473 if (TREE_CODE (ivarop) != SSA_NAME)
1474 return NULL_TREE;
1475 return ivarop;
1478 DEF_VEC_GC_P(lambda_loop);
1479 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1480 Return the new loop nest.
1481 INDUCTIONVARS is a pointer to an array of induction variables for the
1482 loopnest that will be filled in during this process.
1483 INVARIANTS is a pointer to an array of invariants that will be filled in
1484 during this process. */
1486 lambda_loopnest
1487 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1488 struct loop * loop_nest,
1489 VEC (tree) **inductionvars,
1490 VEC (tree) **invariants,
1491 bool need_perfect_nest)
1493 lambda_loopnest ret;
1494 struct loop *temp;
1495 int depth = 0;
1496 size_t i;
1497 VEC (lambda_loop) *loops = NULL;
1498 VEC (tree) *uboundvars = NULL;
1499 VEC (tree) *lboundvars = NULL;
1500 VEC (int) *steps = NULL;
1501 lambda_loop newloop;
1502 tree inductionvar = NULL;
1504 depth = depth_of_nest (loop_nest);
1505 temp = loop_nest;
1506 while (temp)
1508 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1509 &inductionvar, *inductionvars,
1510 &lboundvars, &uboundvars,
1511 &steps);
1512 if (!newloop)
1513 return NULL;
1514 VEC_safe_push (tree, *inductionvars, inductionvar);
1515 VEC_safe_push (lambda_loop, loops, newloop);
1516 temp = temp->inner;
1518 if (need_perfect_nest)
1520 if (!perfect_nestify (currloops, loop_nest,
1521 lboundvars, uboundvars, steps, *inductionvars))
1523 if (dump_file)
1524 fprintf (dump_file, "Not a perfect loop nest and couldn't convert to one.\n");
1525 return NULL;
1527 else if (dump_file)
1528 fprintf (dump_file, "Successfully converted loop nest to perfect loop nest.\n");
1532 ret = lambda_loopnest_new (depth, 2 * depth);
1533 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1534 LN_LOOPS (ret)[i] = newloop;
1536 return ret;
1541 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1542 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1543 inserted for us are stored. INDUCTION_VARS is the array of induction
1544 variables for the loop this LBV is from. TYPE is the tree type to use for
1545 the variables and trees involved. */
1547 static tree
1548 lbv_to_gcc_expression (lambda_body_vector lbv,
1549 tree type, VEC (tree) *induction_vars,
1550 tree * stmts_to_insert)
1552 tree stmts, stmt, resvar, name;
1553 tree iv;
1554 size_t i;
1555 tree_stmt_iterator tsi;
1557 /* Create a statement list and a linear expression temporary. */
1558 stmts = alloc_stmt_list ();
1559 resvar = create_tmp_var (type, "lbvtmp");
1560 add_referenced_tmp_var (resvar);
1562 /* Start at 0. */
1563 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1564 name = make_ssa_name (resvar, stmt);
1565 TREE_OPERAND (stmt, 0) = name;
1566 tsi = tsi_last (stmts);
1567 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1569 for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1571 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1573 tree newname;
1574 tree coeffmult;
1576 /* newname = coefficient * induction_variable */
1577 coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1578 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1579 fold (build (MULT_EXPR, type, iv, coeffmult)));
1581 newname = make_ssa_name (resvar, stmt);
1582 TREE_OPERAND (stmt, 0) = newname;
1583 fold_stmt (&stmt);
1584 tsi = tsi_last (stmts);
1585 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1587 /* name = name + newname */
1588 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1589 build (PLUS_EXPR, type, name, newname));
1590 name = make_ssa_name (resvar, stmt);
1591 TREE_OPERAND (stmt, 0) = name;
1592 fold_stmt (&stmt);
1593 tsi = tsi_last (stmts);
1594 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1599 /* Handle any denominator that occurs. */
1600 if (LBV_DENOMINATOR (lbv) != 1)
1602 tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1603 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1604 build (CEIL_DIV_EXPR, type, name, denominator));
1605 name = make_ssa_name (resvar, stmt);
1606 TREE_OPERAND (stmt, 0) = name;
1607 fold_stmt (&stmt);
1608 tsi = tsi_last (stmts);
1609 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1611 *stmts_to_insert = stmts;
1612 return name;
1615 /* Convert a linear expression from coefficient and constant form to a
1616 gcc tree.
1617 Return the tree that represents the final value of the expression.
1618 LLE is the linear expression to convert.
1619 OFFSET is the linear offset to apply to the expression.
1620 TYPE is the tree type to use for the variables and math.
1621 INDUCTION_VARS is a vector of induction variables for the loops.
1622 INVARIANTS is a vector of the loop nest invariants.
1623 WRAP specifies what tree code to wrap the results in, if there is more than
1624 one (it is either MAX_EXPR, or MIN_EXPR).
1625 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1626 statements that need to be inserted for the linear expression. */
1628 static tree
1629 lle_to_gcc_expression (lambda_linear_expression lle,
1630 lambda_linear_expression offset,
1631 tree type,
1632 VEC(tree) *induction_vars,
1633 VEC(tree) *invariants,
1634 enum tree_code wrap, tree * stmts_to_insert)
1636 tree stmts, stmt, resvar, name;
1637 size_t i;
1638 tree_stmt_iterator tsi;
1639 tree iv, invar;
1640 VEC(tree) *results = NULL;
1642 name = NULL_TREE;
1643 /* Create a statement list and a linear expression temporary. */
1644 stmts = alloc_stmt_list ();
1645 resvar = create_tmp_var (type, "lletmp");
1646 add_referenced_tmp_var (resvar);
1648 /* Build up the linear expressions, and put the variable representing the
1649 result in the results array. */
1650 for (; lle != NULL; lle = LLE_NEXT (lle))
1652 /* Start at name = 0. */
1653 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1654 name = make_ssa_name (resvar, stmt);
1655 TREE_OPERAND (stmt, 0) = name;
1656 fold_stmt (&stmt);
1657 tsi = tsi_last (stmts);
1658 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1660 /* First do the induction variables.
1661 at the end, name = name + all the induction variables added
1662 together. */
1663 for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1665 if (LLE_COEFFICIENTS (lle)[i] != 0)
1667 tree newname;
1668 tree mult;
1669 tree coeff;
1671 /* mult = induction variable * coefficient. */
1672 if (LLE_COEFFICIENTS (lle)[i] == 1)
1674 mult = VEC_index (tree, induction_vars, i);
1676 else
1678 coeff = build_int_cst (type,
1679 LLE_COEFFICIENTS (lle)[i]);
1680 mult = fold (build (MULT_EXPR, type, iv, coeff));
1683 /* newname = mult */
1684 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1685 newname = make_ssa_name (resvar, stmt);
1686 TREE_OPERAND (stmt, 0) = newname;
1687 fold_stmt (&stmt);
1688 tsi = tsi_last (stmts);
1689 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1691 /* name = name + newname */
1692 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1693 build (PLUS_EXPR, type, name, newname));
1694 name = make_ssa_name (resvar, stmt);
1695 TREE_OPERAND (stmt, 0) = name;
1696 fold_stmt (&stmt);
1697 tsi = tsi_last (stmts);
1698 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1702 /* Handle our invariants.
1703 At the end, we have name = name + result of adding all multiplied
1704 invariants. */
1705 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1707 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1709 tree newname;
1710 tree mult;
1711 tree coeff;
1712 int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1713 /* mult = invariant * coefficient */
1714 if (invcoeff == 1)
1716 mult = invar;
1718 else
1720 coeff = build_int_cst (type, invcoeff);
1721 mult = fold (build (MULT_EXPR, type, invar, coeff));
1724 /* newname = mult */
1725 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1726 newname = make_ssa_name (resvar, stmt);
1727 TREE_OPERAND (stmt, 0) = newname;
1728 fold_stmt (&stmt);
1729 tsi = tsi_last (stmts);
1730 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1732 /* name = name + newname */
1733 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1734 build (PLUS_EXPR, type, name, newname));
1735 name = make_ssa_name (resvar, stmt);
1736 TREE_OPERAND (stmt, 0) = name;
1737 fold_stmt (&stmt);
1738 tsi = tsi_last (stmts);
1739 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1743 /* Now handle the constant.
1744 name = name + constant. */
1745 if (LLE_CONSTANT (lle) != 0)
1747 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1748 build (PLUS_EXPR, type, name,
1749 build_int_cst (type, LLE_CONSTANT (lle))));
1750 name = make_ssa_name (resvar, stmt);
1751 TREE_OPERAND (stmt, 0) = name;
1752 fold_stmt (&stmt);
1753 tsi = tsi_last (stmts);
1754 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1757 /* Now handle the offset.
1758 name = name + linear offset. */
1759 if (LLE_CONSTANT (offset) != 0)
1761 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1762 build (PLUS_EXPR, type, name,
1763 build_int_cst (type, LLE_CONSTANT (offset))));
1764 name = make_ssa_name (resvar, stmt);
1765 TREE_OPERAND (stmt, 0) = name;
1766 fold_stmt (&stmt);
1767 tsi = tsi_last (stmts);
1768 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1771 /* Handle any denominator that occurs. */
1772 if (LLE_DENOMINATOR (lle) != 1)
1774 if (wrap == MAX_EXPR)
1775 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1776 build (CEIL_DIV_EXPR, type, name,
1777 build_int_cst (type, LLE_DENOMINATOR (lle))));
1778 else if (wrap == MIN_EXPR)
1779 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1780 build (FLOOR_DIV_EXPR, type, name,
1781 build_int_cst (type, LLE_DENOMINATOR (lle))));
1782 else
1783 gcc_unreachable();
1785 /* name = {ceil, floor}(name/denominator) */
1786 name = make_ssa_name (resvar, stmt);
1787 TREE_OPERAND (stmt, 0) = name;
1788 tsi = tsi_last (stmts);
1789 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1791 VEC_safe_push (tree, results, name);
1794 /* Again, out of laziness, we don't handle this case yet. It's not
1795 hard, it just hasn't occurred. */
1796 gcc_assert (VEC_length (tree, results) <= 2);
1798 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1799 if (VEC_length (tree, results) > 1)
1801 tree op1 = VEC_index (tree, results, 0);
1802 tree op2 = VEC_index (tree, results, 1);
1803 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1804 build (wrap, type, op1, op2));
1805 name = make_ssa_name (resvar, stmt);
1806 TREE_OPERAND (stmt, 0) = name;
1807 tsi = tsi_last (stmts);
1808 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1811 *stmts_to_insert = stmts;
1812 return name;
1815 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1816 it, back into gcc code. This changes the
1817 loops, their induction variables, and their bodies, so that they
1818 match the transformed loopnest.
1819 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1820 loopnest.
1821 OLD_IVS is a vector of induction variables from the old loopnest.
1822 INVARIANTS is a vector of loop invariants from the old loopnest.
1823 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1824 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1825 NEW_LOOPNEST. */
1827 void
1828 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1829 VEC(tree) *old_ivs,
1830 VEC(tree) *invariants,
1831 lambda_loopnest new_loopnest,
1832 lambda_trans_matrix transform)
1835 struct loop *temp;
1836 size_t i = 0;
1837 size_t depth = 0;
1838 VEC(tree) *new_ivs = NULL;
1839 tree oldiv;
1841 block_stmt_iterator bsi;
1843 if (dump_file)
1845 transform = lambda_trans_matrix_inverse (transform);
1846 fprintf (dump_file, "Inverse of transformation matrix:\n");
1847 print_lambda_trans_matrix (dump_file, transform);
1849 depth = depth_of_nest (old_loopnest);
1850 temp = old_loopnest;
1852 while (temp)
1854 lambda_loop newloop;
1855 basic_block bb;
1856 tree ivvar, ivvarinced, exitcond, stmts;
1857 enum tree_code testtype;
1858 tree newupperbound, newlowerbound;
1859 lambda_linear_expression offset;
1860 tree type;
1862 oldiv = VEC_index (tree, old_ivs, i);
1863 type = TREE_TYPE (oldiv);
1865 /* First, build the new induction variable temporary */
1867 ivvar = create_tmp_var (type, "lnivtmp");
1868 add_referenced_tmp_var (ivvar);
1870 VEC_safe_push (tree, new_ivs, ivvar);
1872 newloop = LN_LOOPS (new_loopnest)[i];
1874 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1875 cases for now. */
1876 offset = LL_LINEAR_OFFSET (newloop);
1878 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1879 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1881 /* Now build the new lower bounds, and insert the statements
1882 necessary to generate it on the loop preheader. */
1883 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1884 LL_LINEAR_OFFSET (newloop),
1885 type,
1886 new_ivs,
1887 invariants, MAX_EXPR, &stmts);
1888 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1889 bsi_commit_edge_inserts (NULL);
1890 /* Build the new upper bound and insert its statements in the
1891 basic block of the exit condition */
1892 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1893 LL_LINEAR_OFFSET (newloop),
1894 type,
1895 new_ivs,
1896 invariants, MIN_EXPR, &stmts);
1897 exitcond = get_loop_exit_condition (temp);
1898 bb = bb_for_stmt (exitcond);
1899 bsi = bsi_start (bb);
1900 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1902 /* Create the new iv, and insert it's increment on the latch
1903 block. */
1905 bb = EDGE_PRED (temp->latch, 0)->src;
1906 bsi = bsi_last (bb);
1907 create_iv (newlowerbound,
1908 build_int_cst (type, LL_STEP (newloop)),
1909 ivvar, temp, &bsi, false, &ivvar,
1910 &ivvarinced);
1912 /* Replace the exit condition with the new upper bound
1913 comparison. */
1915 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1917 /* Since we don't know which cond_expr part currently points to each
1918 edge, check which one is invariant and make sure we reverse the
1919 comparison if we are trying to replace a <= 50 with 50 >= newiv.
1920 This ensures that we still canonicalize to <invariant> <test>
1921 <induction variable>. */
1922 if (!expr_invariant_in_loop_p (temp, TREE_OPERAND (exitcond, 0)))
1923 testtype = swap_tree_comparison (testtype);
1925 COND_EXPR_COND (exitcond) = build (testtype,
1926 boolean_type_node,
1927 newupperbound, ivvarinced);
1928 modify_stmt (exitcond);
1929 VEC_replace (tree, new_ivs, i, ivvar);
1931 i++;
1932 temp = temp->inner;
1935 /* Rewrite uses of the old ivs so that they are now specified in terms of
1936 the new ivs. */
1938 for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1940 int j;
1941 dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv));
1942 for (j = 0; j < num_immediate_uses (imm); j++)
1944 tree stmt = immediate_use (imm, j);
1945 use_operand_p use_p;
1946 ssa_op_iter iter;
1947 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1948 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1950 if (USE_FROM_PTR (use_p) == oldiv)
1952 tree newiv, stmts;
1953 lambda_body_vector lbv, newlbv;
1954 /* Compute the new expression for the induction
1955 variable. */
1956 depth = VEC_length (tree, new_ivs);
1957 lbv = lambda_body_vector_new (depth);
1958 LBV_COEFFICIENTS (lbv)[i] = 1;
1960 newlbv = lambda_body_vector_compute_new (transform, lbv);
1962 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1963 new_ivs, &stmts);
1964 bsi = bsi_for_stmt (stmt);
1965 /* Insert the statements to build that
1966 expression. */
1967 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1968 propagate_value (use_p, newiv);
1969 modify_stmt (stmt);
1978 /* Returns true when the vector V is lexicographically positive, in
1979 other words, when the first nonzero element is positive. */
1981 static bool
1982 lambda_vector_lexico_pos (lambda_vector v,
1983 unsigned n)
1985 unsigned i;
1986 for (i = 0; i < n; i++)
1988 if (v[i] == 0)
1989 continue;
1990 if (v[i] < 0)
1991 return false;
1992 if (v[i] > 0)
1993 return true;
1995 return true;
1999 /* Return TRUE if this is not interesting statement from the perspective of
2000 determining if we have a perfect loop nest. */
2002 static bool
2003 not_interesting_stmt (tree stmt)
2005 /* Note that COND_EXPR's aren't interesting because if they were exiting the
2006 loop, we would have already failed the number of exits tests. */
2007 if (TREE_CODE (stmt) == LABEL_EXPR
2008 || TREE_CODE (stmt) == GOTO_EXPR
2009 || TREE_CODE (stmt) == COND_EXPR)
2010 return true;
2011 return false;
2014 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
2016 static bool
2017 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2019 int i;
2020 for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2021 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2022 if (PHI_ARG_DEF (phi, i) == def)
2023 return true;
2024 return false;
2027 /* Return TRUE if STMT is a use of PHI_RESULT. */
2029 static bool
2030 stmt_uses_phi_result (tree stmt, tree phi_result)
2032 use_optype uses = STMT_USE_OPS (stmt);
2034 /* This is conservatively true, because we only want SIMPLE bumpers
2035 of the form x +- constant for our pass. */
2036 if (NUM_USES (uses) != 1)
2037 return false;
2038 if (USE_OP (uses, 0) == phi_result)
2039 return true;
2041 return false;
2044 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2045 in-loop-edge in a phi node, and the operand it uses is the result of that
2046 phi node.
2047 I.E. i_29 = i_3 + 1
2048 i_3 = PHI (0, i_29); */
2050 static bool
2051 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2053 tree use;
2054 tree def;
2055 def_optype defs = STMT_DEF_OPS (stmt);
2056 dataflow_t imm;
2057 int i;
2059 if (NUM_DEFS (defs) != 1)
2060 return false;
2061 def = DEF_OP (defs, 0);
2062 imm = get_immediate_uses (stmt);
2063 for (i = 0; i < num_immediate_uses (imm); i++)
2065 use = immediate_use (imm, i);
2066 if (TREE_CODE (use) == PHI_NODE)
2068 if (phi_loop_edge_uses_def (loop, use, def))
2069 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2070 return true;
2073 return false;
2077 /* Return true if LOOP is a perfect loop nest.
2078 Perfect loop nests are those loop nests where all code occurs in the
2079 innermost loop body.
2080 If S is a program statement, then
2082 i.e.
2083 DO I = 1, 20
2085 DO J = 1, 20
2087 END DO
2088 END DO
2089 is not a perfect loop nest because of S1.
2091 DO I = 1, 20
2092 DO J = 1, 20
2095 END DO
2096 END DO
2097 is a perfect loop nest.
2099 Since we don't have high level loops anymore, we basically have to walk our
2100 statements and ignore those that are there because the loop needs them (IE
2101 the induction variable increment, and jump back to the top of the loop). */
2103 bool
2104 perfect_nest_p (struct loop *loop)
2106 basic_block *bbs;
2107 size_t i;
2108 tree exit_cond;
2110 if (!loop->inner)
2111 return true;
2112 bbs = get_loop_body (loop);
2113 exit_cond = get_loop_exit_condition (loop);
2114 for (i = 0; i < loop->num_nodes; i++)
2116 if (bbs[i]->loop_father == loop)
2118 block_stmt_iterator bsi;
2119 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2121 tree stmt = bsi_stmt (bsi);
2122 if (stmt == exit_cond
2123 || not_interesting_stmt (stmt)
2124 || stmt_is_bumper_for_loop (loop, stmt))
2125 continue;
2126 free (bbs);
2127 return false;
2131 free (bbs);
2132 /* See if the inner loops are perfectly nested as well. */
2133 if (loop->inner)
2134 return perfect_nest_p (loop->inner);
2135 return true;
2138 /* Replace the USES of tree X in STMT with tree Y */
2140 static void
2141 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2143 use_optype uses = STMT_USE_OPS (stmt);
2144 size_t i;
2145 for (i = 0; i < NUM_USES (uses); i++)
2147 if (USE_OP (uses, i) == x)
2148 SET_USE_OP (uses, i, y);
2152 /* Return TRUE if STMT uses tree OP in it's uses. */
2154 static bool
2155 stmt_uses_op (tree stmt, tree op)
2157 use_optype uses = STMT_USE_OPS (stmt);
2158 size_t i;
2159 for (i = 0; i < NUM_USES (uses); i++)
2161 if (USE_OP (uses, i) == op)
2162 return true;
2164 return false;
2167 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2168 one. LOOPIVS is a vector of induction variables, one per loop.
2169 ATM, we only handle imperfect nests of depth 2, where all of the statements
2170 occur after the inner loop. */
2172 static bool
2173 can_convert_to_perfect_nest (struct loop *loop,
2174 VEC (tree) *loopivs)
2176 basic_block *bbs;
2177 tree exit_condition;
2178 size_t i;
2179 block_stmt_iterator bsi;
2181 /* Can't handle triply nested+ loops yet. */
2182 if (!loop->inner || loop->inner->inner)
2183 return false;
2185 /* We only handle moving the after-inner-body statements right now, so make
2186 sure all the statements we need to move are located in that position. */
2187 bbs = get_loop_body (loop);
2188 exit_condition = get_loop_exit_condition (loop);
2189 for (i = 0; i < loop->num_nodes; i++)
2191 if (bbs[i]->loop_father == loop)
2193 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2195 size_t j;
2196 tree stmt = bsi_stmt (bsi);
2197 if (stmt == exit_condition
2198 || not_interesting_stmt (stmt)
2199 || stmt_is_bumper_for_loop (loop, stmt))
2200 continue;
2201 /* If the statement uses inner loop ivs, we == screwed. */
2202 for (j = 1; j < VEC_length (tree, loopivs); j++)
2203 if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j)))
2205 free (bbs);
2206 return false;
2209 /* If the bb of a statement we care about isn't dominated by
2210 the header of the inner loop, then we are also screwed. */
2211 if (!dominated_by_p (CDI_DOMINATORS,
2212 bb_for_stmt (stmt),
2213 loop->inner->header))
2215 free (bbs);
2216 return false;
2221 return true;
2224 /* Transform the loop nest into a perfect nest, if possible.
2225 LOOPS is the current struct loops *
2226 LOOP is the loop nest to transform into a perfect nest
2227 LBOUNDS are the lower bounds for the loops to transform
2228 UBOUNDS are the upper bounds for the loops to transform
2229 STEPS is the STEPS for the loops to transform.
2230 LOOPIVS is the induction variables for the loops to transform.
2232 Basically, for the case of
2234 FOR (i = 0; i < 50; i++)
2236 FOR (j =0; j < 50; j++)
2238 <whatever>
2240 <some code>
2243 This function will transform it into a perfect loop nest by splitting the
2244 outer loop into two loops, like so:
2246 FOR (i = 0; i < 50; i++)
2248 FOR (j = 0; j < 50; j++)
2250 <whatever>
2254 FOR (i = 0; i < 50; i ++)
2256 <some code>
2259 Return FALSE if we can't make this loop into a perfect nest. */
2260 static bool
2261 perfect_nestify (struct loops *loops,
2262 struct loop *loop,
2263 VEC (tree) *lbounds,
2264 VEC (tree) *ubounds,
2265 VEC (int) *steps,
2266 VEC (tree) *loopivs)
2268 basic_block *bbs;
2269 tree exit_condition;
2270 tree then_label, else_label, cond_stmt;
2271 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2272 size_t i;
2273 block_stmt_iterator bsi;
2274 edge e;
2275 struct loop *newloop;
2276 tree phi;
2277 tree uboundvar;
2278 tree stmt;
2279 tree oldivvar, ivvar, ivvarinced;
2280 VEC (tree) *phis = NULL;
2282 if (!can_convert_to_perfect_nest (loop, loopivs))
2283 return false;
2285 /* Create the new loop */
2287 olddest = loop->single_exit->dest;
2288 preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2289 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2291 /* This is done because otherwise, it will release the ssa_name too early
2292 when the edge gets redirected and it will get reused, causing the use of
2293 the phi node to get rewritten. */
2295 for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2297 /* These should be simple exit phi copies. */
2298 if (PHI_NUM_ARGS (phi) != 1)
2299 return false;
2300 VEC_safe_push (tree, phis, PHI_RESULT (phi));
2301 VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0));
2302 mark_for_rewrite (PHI_RESULT (phi));
2304 e = redirect_edge_and_branch (EDGE_SUCC (preheaderbb, 0), headerbb);
2306 /* Remove the exit phis from the old basic block. */
2307 while (phi_nodes (olddest) != NULL)
2308 remove_phi_node (phi_nodes (olddest), NULL, olddest);
2310 /* and add them to the new basic block. */
2311 while (VEC_length (tree, phis) != 0)
2313 tree def;
2314 tree phiname;
2315 def = VEC_pop (tree, phis);
2316 phiname = VEC_pop (tree, phis);
2317 phi = create_phi_node (phiname, preheaderbb);
2318 add_phi_arg (&phi, def, EDGE_PRED (preheaderbb, 0));
2320 flush_pending_stmts (e);
2321 unmark_all_for_rewrite ();
2323 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2324 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2325 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2326 then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2327 else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2328 cond_stmt = build (COND_EXPR, void_type_node,
2329 build (NE_EXPR, boolean_type_node,
2330 integer_one_node,
2331 integer_zero_node),
2332 then_label, else_label);
2333 bsi = bsi_start (bodybb);
2334 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2335 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2336 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2337 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2339 /* Update the loop structures. */
2340 newloop = duplicate_loop (loops, loop, olddest->loop_father);
2341 newloop->header = headerbb;
2342 newloop->latch = latchbb;
2343 newloop->single_exit = e;
2344 add_bb_to_loop (latchbb, newloop);
2345 add_bb_to_loop (bodybb, newloop);
2346 add_bb_to_loop (headerbb, newloop);
2347 add_bb_to_loop (preheaderbb, olddest->loop_father);
2348 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2349 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2350 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2351 loop->single_exit->src);
2352 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2353 set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2354 /* Create the new iv. */
2355 ivvar = create_tmp_var (integer_type_node, "perfectiv");
2356 add_referenced_tmp_var (ivvar);
2357 bsi = bsi_last (EDGE_PRED (newloop->latch, 0)->src);
2358 create_iv (VEC_index (tree, lbounds, 0),
2359 build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
2360 ivvar, newloop, &bsi, false, &ivvar, &ivvarinced);
2362 /* Create the new upper bound. This may be not just a variable, so we copy
2363 it to one just in case. */
2365 exit_condition = get_loop_exit_condition (newloop);
2366 uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2367 add_referenced_tmp_var (uboundvar);
2368 stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2369 VEC_index (tree, ubounds, 0));
2370 uboundvar = make_ssa_name (uboundvar, stmt);
2371 TREE_OPERAND (stmt, 0) = uboundvar;
2372 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2373 COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2374 boolean_type_node,
2375 uboundvar,
2376 ivvarinced);
2378 bbs = get_loop_body (loop);
2379 /* Now replace the induction variable in the moved statements with the
2380 correct loop induction variable. */
2381 oldivvar = VEC_index (tree, loopivs, 0);
2382 for (i = 0; i < loop->num_nodes; i++)
2384 block_stmt_iterator tobsi = bsi_last (bodybb);
2385 if (bbs[i]->loop_father == loop)
2387 /* Note that the bsi only needs to be explicitly incremented
2388 when we don't move something, since it is automatically
2389 incremented when we do. */
2390 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2392 tree stmt = bsi_stmt (bsi);
2393 if (stmt == exit_condition
2394 || not_interesting_stmt (stmt)
2395 || stmt_is_bumper_for_loop (loop, stmt))
2397 bsi_next (&bsi);
2398 continue;
2400 replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
2401 bsi_move_before (&bsi, &tobsi);
2405 free (bbs);
2406 flow_loops_find (loops, LOOP_ALL);
2407 return perfect_nest_p (loop);
2410 /* Return true if TRANS is a legal transformation matrix that respects
2411 the dependence vectors in DISTS and DIRS. The conservative answer
2412 is false.
2414 "Wolfe proves that a unimodular transformation represented by the
2415 matrix T is legal when applied to a loop nest with a set of
2416 lexicographically non-negative distance vectors RDG if and only if
2417 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2418 i.e.: if and only if it transforms the lexicographically positive
2419 distance vectors to lexicographically positive vectors. Note that
2420 a unimodular matrix must transform the zero vector (and only it) to
2421 the zero vector." S.Muchnick. */
2423 bool
2424 lambda_transform_legal_p (lambda_trans_matrix trans,
2425 int nb_loops,
2426 varray_type dependence_relations)
2428 unsigned int i;
2429 lambda_vector distres;
2430 struct data_dependence_relation *ddr;
2432 #if defined ENABLE_CHECKING
2433 if (LTM_COLSIZE (trans) != nb_loops
2434 || LTM_ROWSIZE (trans) != nb_loops)
2435 abort ();
2436 #endif
2438 /* When there is an unknown relation in the dependence_relations, we
2439 know that it is no worth looking at this loop nest: give up. */
2440 ddr = (struct data_dependence_relation *)
2441 VARRAY_GENERIC_PTR (dependence_relations, 0);
2442 if (ddr == NULL)
2443 return true;
2444 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2445 return false;
2447 distres = lambda_vector_new (nb_loops);
2449 /* For each distance vector in the dependence graph. */
2450 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2452 ddr = (struct data_dependence_relation *)
2453 VARRAY_GENERIC_PTR (dependence_relations, i);
2455 /* Don't care about relations for which we know that there is no
2456 dependence, nor about read-read (aka. output-dependences):
2457 these data accesses can happen in any order. */
2458 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2459 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2460 continue;
2462 /* Conservatively answer: "this transformation is not valid". */
2463 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2464 return false;
2466 /* If the dependence could not be captured by a distance vector,
2467 conservatively answer that the transform is not valid. */
2468 if (DDR_DIST_VECT (ddr) == NULL)
2469 return false;
2471 /* Compute trans.dist_vect */
2472 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2473 DDR_DIST_VECT (ddr), distres);
2475 if (!lambda_vector_lexico_pos (distres, nb_loops))
2476 return false;
2478 return true;