2005-04-29 Jim Tison <jtison@us.ibm.com>
[official-gcc.git] / gcc / lambda-code.c
bloba81af8f8314daed84566a186280aca6ee1936785
1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "errors.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "timevar.h"
36 #include "cfgloop.h"
37 #include "expr.h"
38 #include "optabs.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
43 #include "vec.h"
44 #include "lambda.h"
46 /* This loop nest code generation is based on non-singular matrix
47 math.
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
54 A loop iteration space represents the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as an integral combinations of a set
57 of basis vectors.
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
80 space.
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice. */
118 DEF_VEC_P(int);
119 DEF_VEC_ALLOC_P(int,heap);
121 static bool perfect_nestify (struct loops *,
122 struct loop *, VEC(tree,heap) *,
123 VEC(tree,heap) *, VEC(int,heap) *,
124 VEC(tree,heap) *);
125 /* Lattice stuff that is internal to the code generation algorithm. */
127 typedef struct
129 /* Lattice base matrix. */
130 lambda_matrix base;
131 /* Lattice dimension. */
132 int dimension;
133 /* Origin vector for the coefficients. */
134 lambda_vector origin;
135 /* Origin matrix for the invariants. */
136 lambda_matrix origin_invariants;
137 /* Number of invariants. */
138 int invariants;
139 } *lambda_lattice;
141 #define LATTICE_BASE(T) ((T)->base)
142 #define LATTICE_DIMENSION(T) ((T)->dimension)
143 #define LATTICE_ORIGIN(T) ((T)->origin)
144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
147 static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
148 int, int);
149 static lambda_lattice lambda_lattice_new (int, int);
150 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
152 static tree find_induction_var_from_exit_cond (struct loop *);
154 /* Create a new lambda body vector. */
156 lambda_body_vector
157 lambda_body_vector_new (int size)
159 lambda_body_vector ret;
161 ret = ggc_alloc (sizeof (*ret));
162 LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
163 LBV_SIZE (ret) = size;
164 LBV_DENOMINATOR (ret) = 1;
165 return ret;
168 /* Compute the new coefficients for the vector based on the
169 *inverse* of the transformation matrix. */
171 lambda_body_vector
172 lambda_body_vector_compute_new (lambda_trans_matrix transform,
173 lambda_body_vector vect)
175 lambda_body_vector temp;
176 int depth;
178 /* Make sure the matrix is square. */
179 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
181 depth = LTM_ROWSIZE (transform);
183 temp = lambda_body_vector_new (depth);
184 LBV_DENOMINATOR (temp) =
185 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
186 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
187 LTM_MATRIX (transform), depth,
188 LBV_COEFFICIENTS (temp));
189 LBV_SIZE (temp) = LBV_SIZE (vect);
190 return temp;
193 /* Print out a lambda body vector. */
195 void
196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
198 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
201 /* Return TRUE if two linear expressions are equal. */
203 static bool
204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
205 int depth, int invariants)
207 int i;
209 if (lle1 == NULL || lle2 == NULL)
210 return false;
211 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
212 return false;
213 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
214 return false;
215 for (i = 0; i < depth; i++)
216 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
217 return false;
218 for (i = 0; i < invariants; i++)
219 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
220 LLE_INVARIANT_COEFFICIENTS (lle2)[i])
221 return false;
222 return true;
225 /* Create a new linear expression with dimension DIM, and total number
226 of invariants INVARIANTS. */
228 lambda_linear_expression
229 lambda_linear_expression_new (int dim, int invariants)
231 lambda_linear_expression ret;
233 ret = ggc_alloc_cleared (sizeof (*ret));
235 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
236 LLE_CONSTANT (ret) = 0;
237 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
238 LLE_DENOMINATOR (ret) = 1;
239 LLE_NEXT (ret) = NULL;
241 return ret;
244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
245 The starting letter used for variable names is START. */
247 static void
248 print_linear_expression (FILE * outfile, lambda_vector expr, int size,
249 char start)
251 int i;
252 bool first = true;
253 for (i = 0; i < size; i++)
255 if (expr[i] != 0)
257 if (first)
259 if (expr[i] < 0)
260 fprintf (outfile, "-");
261 first = false;
263 else if (expr[i] > 0)
264 fprintf (outfile, " + ");
265 else
266 fprintf (outfile, " - ");
267 if (abs (expr[i]) == 1)
268 fprintf (outfile, "%c", start + i);
269 else
270 fprintf (outfile, "%d%c", abs (expr[i]), start + i);
275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
276 depth/number of coefficients is given by DEPTH, the number of invariants is
277 given by INVARIANTS, and the character to start variable names with is given
278 by START. */
280 void
281 print_lambda_linear_expression (FILE * outfile,
282 lambda_linear_expression expr,
283 int depth, int invariants, char start)
285 fprintf (outfile, "\tLinear expression: ");
286 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
287 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
288 fprintf (outfile, " invariants: ");
289 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
290 invariants, 'A');
291 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr));
294 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
295 coefficients is given by DEPTH, the number of invariants is
296 given by INVARIANTS, and the character to start variable names with is given
297 by START. */
299 void
300 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
301 int invariants, char start)
303 int step;
304 lambda_linear_expression expr;
306 gcc_assert (loop);
308 expr = LL_LINEAR_OFFSET (loop);
309 step = LL_STEP (loop);
310 fprintf (outfile, " step size = %d \n", step);
312 if (expr)
314 fprintf (outfile, " linear offset: \n");
315 print_lambda_linear_expression (outfile, expr, depth, invariants,
316 start);
319 fprintf (outfile, " lower bound: \n");
320 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
321 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
322 fprintf (outfile, " upper bound: \n");
323 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
324 print_lambda_linear_expression (outfile, expr, depth, invariants, start);
327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
328 number of invariants. */
330 lambda_loopnest
331 lambda_loopnest_new (int depth, int invariants)
333 lambda_loopnest ret;
334 ret = ggc_alloc (sizeof (*ret));
336 LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
337 LN_DEPTH (ret) = depth;
338 LN_INVARIANTS (ret) = invariants;
340 return ret;
343 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
344 character to use for loop names is given by START. */
346 void
347 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
349 int i;
350 for (i = 0; i < LN_DEPTH (nest); i++)
352 fprintf (outfile, "Loop %c\n", start + i);
353 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
354 LN_INVARIANTS (nest), 'i');
355 fprintf (outfile, "\n");
359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
360 of invariants. */
362 static lambda_lattice
363 lambda_lattice_new (int depth, int invariants)
365 lambda_lattice ret;
366 ret = ggc_alloc (sizeof (*ret));
367 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
368 LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
369 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
370 LATTICE_DIMENSION (ret) = depth;
371 LATTICE_INVARIANTS (ret) = invariants;
372 return ret;
375 /* Compute the lattice base for NEST. The lattice base is essentially a
376 non-singular transform from a dense base space to a sparse iteration space.
377 We use it so that we don't have to specially handle the case of a sparse
378 iteration space in other parts of the algorithm. As a result, this routine
379 only does something interesting (IE produce a matrix that isn't the
380 identity matrix) if NEST is a sparse space. */
382 static lambda_lattice
383 lambda_lattice_compute_base (lambda_loopnest nest)
385 lambda_lattice ret;
386 int depth, invariants;
387 lambda_matrix base;
389 int i, j, step;
390 lambda_loop loop;
391 lambda_linear_expression expression;
393 depth = LN_DEPTH (nest);
394 invariants = LN_INVARIANTS (nest);
396 ret = lambda_lattice_new (depth, invariants);
397 base = LATTICE_BASE (ret);
398 for (i = 0; i < depth; i++)
400 loop = LN_LOOPS (nest)[i];
401 gcc_assert (loop);
402 step = LL_STEP (loop);
403 /* If we have a step of 1, then the base is one, and the
404 origin and invariant coefficients are 0. */
405 if (step == 1)
407 for (j = 0; j < depth; j++)
408 base[i][j] = 0;
409 base[i][i] = 1;
410 LATTICE_ORIGIN (ret)[i] = 0;
411 for (j = 0; j < invariants; j++)
412 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
414 else
416 /* Otherwise, we need the lower bound expression (which must
417 be an affine function) to determine the base. */
418 expression = LL_LOWER_BOUND (loop);
419 gcc_assert (expression && !LLE_NEXT (expression)
420 && LLE_DENOMINATOR (expression) == 1);
422 /* The lower triangular portion of the base is going to be the
423 coefficient times the step */
424 for (j = 0; j < i; j++)
425 base[i][j] = LLE_COEFFICIENTS (expression)[j]
426 * LL_STEP (LN_LOOPS (nest)[j]);
427 base[i][i] = step;
428 for (j = i + 1; j < depth; j++)
429 base[i][j] = 0;
431 /* Origin for this loop is the constant of the lower bound
432 expression. */
433 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
435 /* Coefficient for the invariants are equal to the invariant
436 coefficients in the expression. */
437 for (j = 0; j < invariants; j++)
438 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
439 LLE_INVARIANT_COEFFICIENTS (expression)[j];
442 return ret;
445 /* Compute the greatest common denominator of two numbers (A and B) using
446 Euclid's algorithm. */
448 static int
449 gcd (int a, int b)
452 int x, y, z;
454 x = abs (a);
455 y = abs (b);
457 while (x > 0)
459 z = y % x;
460 y = x;
461 x = z;
464 return (y);
467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
469 static int
470 gcd_vector (lambda_vector vector, int size)
472 int i;
473 int gcd1 = 0;
475 if (size > 0)
477 gcd1 = vector[0];
478 for (i = 1; i < size; i++)
479 gcd1 = gcd (gcd1, vector[i]);
481 return gcd1;
484 /* Compute the least common multiple of two numbers A and B . */
486 static int
487 lcm (int a, int b)
489 return (abs (a) * abs (b) / gcd (a, b));
492 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
493 auxiliary nest.
494 Fourier-Motzkin is a way of reducing systems of linear inequalities so that
495 it is easy to calculate the answer and bounds.
496 A sketch of how it works:
497 Given a system of linear inequalities, ai * xj >= bk, you can always
498 rewrite the constraints so they are all of the form
499 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
500 in b1 ... bk, and some a in a1...ai)
501 You can then eliminate this x from the non-constant inequalities by
502 rewriting these as a <= b, x >= constant, and delete the x variable.
503 You can then repeat this for any remaining x variables, and then we have
504 an easy to use variable <= constant (or no variables at all) form that we
505 can construct our bounds from.
507 In our case, each time we eliminate, we construct part of the bound from
508 the ith variable, then delete the ith variable.
510 Remember the constant are in our vector a, our coefficient matrix is A,
511 and our invariant coefficient matrix is B.
513 SIZE is the size of the matrices being passed.
514 DEPTH is the loop nest depth.
515 INVARIANTS is the number of loop invariants.
516 A, B, and a are the coefficient matrix, invariant coefficient, and a
517 vector of constants, respectively. */
519 static lambda_loopnest
520 compute_nest_using_fourier_motzkin (int size,
521 int depth,
522 int invariants,
523 lambda_matrix A,
524 lambda_matrix B,
525 lambda_vector a)
528 int multiple, f1, f2;
529 int i, j, k;
530 lambda_linear_expression expression;
531 lambda_loop loop;
532 lambda_loopnest auxillary_nest;
533 lambda_matrix swapmatrix, A1, B1;
534 lambda_vector swapvector, a1;
535 int newsize;
537 A1 = lambda_matrix_new (128, depth);
538 B1 = lambda_matrix_new (128, invariants);
539 a1 = lambda_vector_new (128);
541 auxillary_nest = lambda_loopnest_new (depth, invariants);
543 for (i = depth - 1; i >= 0; i--)
545 loop = lambda_loop_new ();
546 LN_LOOPS (auxillary_nest)[i] = loop;
547 LL_STEP (loop) = 1;
549 for (j = 0; j < size; j++)
551 if (A[j][i] < 0)
553 /* Any linear expression in the matrix with a coefficient less
554 than 0 becomes part of the new lower bound. */
555 expression = lambda_linear_expression_new (depth, invariants);
557 for (k = 0; k < i; k++)
558 LLE_COEFFICIENTS (expression)[k] = A[j][k];
560 for (k = 0; k < invariants; k++)
561 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
563 LLE_DENOMINATOR (expression) = -1 * A[j][i];
564 LLE_CONSTANT (expression) = -1 * a[j];
566 /* Ignore if identical to the existing lower bound. */
567 if (!lle_equal (LL_LOWER_BOUND (loop),
568 expression, depth, invariants))
570 LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
571 LL_LOWER_BOUND (loop) = expression;
575 else if (A[j][i] > 0)
577 /* Any linear expression with a coefficient greater than 0
578 becomes part of the new upper bound. */
579 expression = lambda_linear_expression_new (depth, invariants);
580 for (k = 0; k < i; k++)
581 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
583 for (k = 0; k < invariants; k++)
584 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
586 LLE_DENOMINATOR (expression) = A[j][i];
587 LLE_CONSTANT (expression) = a[j];
589 /* Ignore if identical to the existing upper bound. */
590 if (!lle_equal (LL_UPPER_BOUND (loop),
591 expression, depth, invariants))
593 LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
594 LL_UPPER_BOUND (loop) = expression;
600 /* This portion creates a new system of linear inequalities by deleting
601 the i'th variable, reducing the system by one variable. */
602 newsize = 0;
603 for (j = 0; j < size; j++)
605 /* If the coefficient for the i'th variable is 0, then we can just
606 eliminate the variable straightaway. Otherwise, we have to
607 multiply through by the coefficients we are eliminating. */
608 if (A[j][i] == 0)
610 lambda_vector_copy (A[j], A1[newsize], depth);
611 lambda_vector_copy (B[j], B1[newsize], invariants);
612 a1[newsize] = a[j];
613 newsize++;
615 else if (A[j][i] > 0)
617 for (k = 0; k < size; k++)
619 if (A[k][i] < 0)
621 multiple = lcm (A[j][i], A[k][i]);
622 f1 = multiple / A[j][i];
623 f2 = -1 * multiple / A[k][i];
625 lambda_vector_add_mc (A[j], f1, A[k], f2,
626 A1[newsize], depth);
627 lambda_vector_add_mc (B[j], f1, B[k], f2,
628 B1[newsize], invariants);
629 a1[newsize] = f1 * a[j] + f2 * a[k];
630 newsize++;
636 swapmatrix = A;
637 A = A1;
638 A1 = swapmatrix;
640 swapmatrix = B;
641 B = B1;
642 B1 = swapmatrix;
644 swapvector = a;
645 a = a1;
646 a1 = swapvector;
648 size = newsize;
651 return auxillary_nest;
654 /* Compute the loop bounds for the auxiliary space NEST.
655 Input system used is Ax <= b. TRANS is the unimodular transformation.
656 Given the original nest, this function will
657 1. Convert the nest into matrix form, which consists of a matrix for the
658 coefficients, a matrix for the
659 invariant coefficients, and a vector for the constants.
660 2. Use the matrix form to calculate the lattice base for the nest (which is
661 a dense space)
662 3. Compose the dense space transform with the user specified transform, to
663 get a transform we can easily calculate transformed bounds for.
664 4. Multiply the composed transformation matrix times the matrix form of the
665 loop.
666 5. Transform the newly created matrix (from step 4) back into a loop nest
667 using fourier motzkin elimination to figure out the bounds. */
669 static lambda_loopnest
670 lambda_compute_auxillary_space (lambda_loopnest nest,
671 lambda_trans_matrix trans)
673 lambda_matrix A, B, A1, B1;
674 lambda_vector a, a1;
675 lambda_matrix invertedtrans;
676 int depth, invariants, size;
677 int i, j;
678 lambda_loop loop;
679 lambda_linear_expression expression;
680 lambda_lattice lattice;
682 depth = LN_DEPTH (nest);
683 invariants = LN_INVARIANTS (nest);
685 /* Unfortunately, we can't know the number of constraints we'll have
686 ahead of time, but this should be enough even in ridiculous loop nest
687 cases. We must not go over this limit. */
688 A = lambda_matrix_new (128, depth);
689 B = lambda_matrix_new (128, invariants);
690 a = lambda_vector_new (128);
692 A1 = lambda_matrix_new (128, depth);
693 B1 = lambda_matrix_new (128, invariants);
694 a1 = lambda_vector_new (128);
696 /* Store the bounds in the equation matrix A, constant vector a, and
697 invariant matrix B, so that we have Ax <= a + B.
698 This requires a little equation rearranging so that everything is on the
699 correct side of the inequality. */
700 size = 0;
701 for (i = 0; i < depth; i++)
703 loop = LN_LOOPS (nest)[i];
705 /* First we do the lower bound. */
706 if (LL_STEP (loop) > 0)
707 expression = LL_LOWER_BOUND (loop);
708 else
709 expression = LL_UPPER_BOUND (loop);
711 for (; expression != NULL; expression = LLE_NEXT (expression))
713 /* Fill in the coefficient. */
714 for (j = 0; j < i; j++)
715 A[size][j] = LLE_COEFFICIENTS (expression)[j];
717 /* And the invariant coefficient. */
718 for (j = 0; j < invariants; j++)
719 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
721 /* And the constant. */
722 a[size] = LLE_CONSTANT (expression);
724 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
725 constants and single variables on */
726 A[size][i] = -1 * LLE_DENOMINATOR (expression);
727 a[size] *= -1;
728 for (j = 0; j < invariants; j++)
729 B[size][j] *= -1;
731 size++;
732 /* Need to increase matrix sizes above. */
733 gcc_assert (size <= 127);
737 /* Then do the exact same thing for the upper bounds. */
738 if (LL_STEP (loop) > 0)
739 expression = LL_UPPER_BOUND (loop);
740 else
741 expression = LL_LOWER_BOUND (loop);
743 for (; expression != NULL; expression = LLE_NEXT (expression))
745 /* Fill in the coefficient. */
746 for (j = 0; j < i; j++)
747 A[size][j] = LLE_COEFFICIENTS (expression)[j];
749 /* And the invariant coefficient. */
750 for (j = 0; j < invariants; j++)
751 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
753 /* And the constant. */
754 a[size] = LLE_CONSTANT (expression);
756 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
757 for (j = 0; j < i; j++)
758 A[size][j] *= -1;
759 A[size][i] = LLE_DENOMINATOR (expression);
760 size++;
761 /* Need to increase matrix sizes above. */
762 gcc_assert (size <= 127);
767 /* Compute the lattice base x = base * y + origin, where y is the
768 base space. */
769 lattice = lambda_lattice_compute_base (nest);
771 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
773 /* A1 = A * L */
774 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
776 /* a1 = a - A * origin constant. */
777 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
778 lambda_vector_add_mc (a, 1, a1, -1, a1, size);
780 /* B1 = B - A * origin invariant. */
781 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
782 invariants);
783 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
785 /* Now compute the auxiliary space bounds by first inverting U, multiplying
786 it by A1, then performing fourier motzkin. */
788 invertedtrans = lambda_matrix_new (depth, depth);
790 /* Compute the inverse of U. */
791 lambda_matrix_inverse (LTM_MATRIX (trans),
792 invertedtrans, depth);
794 /* A = A1 inv(U). */
795 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
797 return compute_nest_using_fourier_motzkin (size, depth, invariants,
798 A, B1, a1);
801 /* Compute the loop bounds for the target space, using the bounds of
802 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
803 The target space loop bounds are computed by multiplying the triangular
804 matrix H by the auxiliary nest, to get the new loop bounds. The sign of
805 the loop steps (positive or negative) is then used to swap the bounds if
806 the loop counts downwards.
807 Return the target loopnest. */
809 static lambda_loopnest
810 lambda_compute_target_space (lambda_loopnest auxillary_nest,
811 lambda_trans_matrix H, lambda_vector stepsigns)
813 lambda_matrix inverse, H1;
814 int determinant, i, j;
815 int gcd1, gcd2;
816 int factor;
818 lambda_loopnest target_nest;
819 int depth, invariants;
820 lambda_matrix target;
822 lambda_loop auxillary_loop, target_loop;
823 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
825 depth = LN_DEPTH (auxillary_nest);
826 invariants = LN_INVARIANTS (auxillary_nest);
828 inverse = lambda_matrix_new (depth, depth);
829 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
831 /* H1 is H excluding its diagonal. */
832 H1 = lambda_matrix_new (depth, depth);
833 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
835 for (i = 0; i < depth; i++)
836 H1[i][i] = 0;
838 /* Computes the linear offsets of the loop bounds. */
839 target = lambda_matrix_new (depth, depth);
840 lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
842 target_nest = lambda_loopnest_new (depth, invariants);
844 for (i = 0; i < depth; i++)
847 /* Get a new loop structure. */
848 target_loop = lambda_loop_new ();
849 LN_LOOPS (target_nest)[i] = target_loop;
851 /* Computes the gcd of the coefficients of the linear part. */
852 gcd1 = gcd_vector (target[i], i);
854 /* Include the denominator in the GCD. */
855 gcd1 = gcd (gcd1, determinant);
857 /* Now divide through by the gcd. */
858 for (j = 0; j < i; j++)
859 target[i][j] = target[i][j] / gcd1;
861 expression = lambda_linear_expression_new (depth, invariants);
862 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
863 LLE_DENOMINATOR (expression) = determinant / gcd1;
864 LLE_CONSTANT (expression) = 0;
865 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
866 invariants);
867 LL_LINEAR_OFFSET (target_loop) = expression;
870 /* For each loop, compute the new bounds from H. */
871 for (i = 0; i < depth; i++)
873 auxillary_loop = LN_LOOPS (auxillary_nest)[i];
874 target_loop = LN_LOOPS (target_nest)[i];
875 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
876 factor = LTM_MATRIX (H)[i][i];
878 /* First we do the lower bound. */
879 auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
881 for (; auxillary_expr != NULL;
882 auxillary_expr = LLE_NEXT (auxillary_expr))
884 target_expr = lambda_linear_expression_new (depth, invariants);
885 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
886 depth, inverse, depth,
887 LLE_COEFFICIENTS (target_expr));
888 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
889 LLE_COEFFICIENTS (target_expr), depth,
890 factor);
892 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
893 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
894 LLE_INVARIANT_COEFFICIENTS (target_expr),
895 invariants);
896 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
897 LLE_INVARIANT_COEFFICIENTS (target_expr),
898 invariants, factor);
899 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
901 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
903 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
904 * determinant;
905 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
906 (target_expr),
907 LLE_INVARIANT_COEFFICIENTS
908 (target_expr), invariants,
909 determinant);
910 LLE_DENOMINATOR (target_expr) =
911 LLE_DENOMINATOR (target_expr) * determinant;
913 /* Find the gcd and divide by it here, rather than doing it
914 at the tree level. */
915 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
916 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
917 invariants);
918 gcd1 = gcd (gcd1, gcd2);
919 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
920 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
921 for (j = 0; j < depth; j++)
922 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
923 for (j = 0; j < invariants; j++)
924 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
925 LLE_CONSTANT (target_expr) /= gcd1;
926 LLE_DENOMINATOR (target_expr) /= gcd1;
927 /* Ignore if identical to existing bound. */
928 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
929 invariants))
931 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
932 LL_LOWER_BOUND (target_loop) = target_expr;
935 /* Now do the upper bound. */
936 auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
938 for (; auxillary_expr != NULL;
939 auxillary_expr = LLE_NEXT (auxillary_expr))
941 target_expr = lambda_linear_expression_new (depth, invariants);
942 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
943 depth, inverse, depth,
944 LLE_COEFFICIENTS (target_expr));
945 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
946 LLE_COEFFICIENTS (target_expr), depth,
947 factor);
948 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
949 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
950 LLE_INVARIANT_COEFFICIENTS (target_expr),
951 invariants);
952 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
953 LLE_INVARIANT_COEFFICIENTS (target_expr),
954 invariants, factor);
955 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
957 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
959 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
960 * determinant;
961 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
962 (target_expr),
963 LLE_INVARIANT_COEFFICIENTS
964 (target_expr), invariants,
965 determinant);
966 LLE_DENOMINATOR (target_expr) =
967 LLE_DENOMINATOR (target_expr) * determinant;
969 /* Find the gcd and divide by it here, instead of at the
970 tree level. */
971 gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
972 gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
973 invariants);
974 gcd1 = gcd (gcd1, gcd2);
975 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
976 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
977 for (j = 0; j < depth; j++)
978 LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
979 for (j = 0; j < invariants; j++)
980 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
981 LLE_CONSTANT (target_expr) /= gcd1;
982 LLE_DENOMINATOR (target_expr) /= gcd1;
983 /* Ignore if equal to existing bound. */
984 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
985 invariants))
987 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
988 LL_UPPER_BOUND (target_loop) = target_expr;
992 for (i = 0; i < depth; i++)
994 target_loop = LN_LOOPS (target_nest)[i];
995 /* If necessary, exchange the upper and lower bounds and negate
996 the step size. */
997 if (stepsigns[i] < 0)
999 LL_STEP (target_loop) *= -1;
1000 tmp_expr = LL_LOWER_BOUND (target_loop);
1001 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1002 LL_UPPER_BOUND (target_loop) = tmp_expr;
1005 return target_nest;
1008 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
1009 result. */
1011 static lambda_vector
1012 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1014 lambda_matrix matrix, H;
1015 int size;
1016 lambda_vector newsteps;
1017 int i, j, factor, minimum_column;
1018 int temp;
1020 matrix = LTM_MATRIX (trans);
1021 size = LTM_ROWSIZE (trans);
1022 H = lambda_matrix_new (size, size);
1024 newsteps = lambda_vector_new (size);
1025 lambda_vector_copy (stepsigns, newsteps, size);
1027 lambda_matrix_copy (matrix, H, size, size);
1029 for (j = 0; j < size; j++)
1031 lambda_vector row;
1032 row = H[j];
1033 for (i = j; i < size; i++)
1034 if (row[i] < 0)
1035 lambda_matrix_col_negate (H, size, i);
1036 while (lambda_vector_first_nz (row, size, j + 1) < size)
1038 minimum_column = lambda_vector_min_nz (row, size, j);
1039 lambda_matrix_col_exchange (H, size, j, minimum_column);
1041 temp = newsteps[j];
1042 newsteps[j] = newsteps[minimum_column];
1043 newsteps[minimum_column] = temp;
1045 for (i = j + 1; i < size; i++)
1047 factor = row[i] / row[j];
1048 lambda_matrix_col_add (H, size, j, i, -1 * factor);
1052 return newsteps;
1055 /* Transform NEST according to TRANS, and return the new loopnest.
1056 This involves
1057 1. Computing a lattice base for the transformation
1058 2. Composing the dense base with the specified transformation (TRANS)
1059 3. Decomposing the combined transformation into a lower triangular portion,
1060 and a unimodular portion.
1061 4. Computing the auxiliary nest using the unimodular portion.
1062 5. Computing the target nest using the auxiliary nest and the lower
1063 triangular portion. */
1065 lambda_loopnest
1066 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1068 lambda_loopnest auxillary_nest, target_nest;
1070 int depth, invariants;
1071 int i, j;
1072 lambda_lattice lattice;
1073 lambda_trans_matrix trans1, H, U;
1074 lambda_loop loop;
1075 lambda_linear_expression expression;
1076 lambda_vector origin;
1077 lambda_matrix origin_invariants;
1078 lambda_vector stepsigns;
1079 int f;
1081 depth = LN_DEPTH (nest);
1082 invariants = LN_INVARIANTS (nest);
1084 /* Keep track of the signs of the loop steps. */
1085 stepsigns = lambda_vector_new (depth);
1086 for (i = 0; i < depth; i++)
1088 if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1089 stepsigns[i] = 1;
1090 else
1091 stepsigns[i] = -1;
1094 /* Compute the lattice base. */
1095 lattice = lambda_lattice_compute_base (nest);
1096 trans1 = lambda_trans_matrix_new (depth, depth);
1098 /* Multiply the transformation matrix by the lattice base. */
1100 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1101 LTM_MATRIX (trans1), depth, depth, depth);
1103 /* Compute the Hermite normal form for the new transformation matrix. */
1104 H = lambda_trans_matrix_new (depth, depth);
1105 U = lambda_trans_matrix_new (depth, depth);
1106 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1107 LTM_MATRIX (U));
1109 /* Compute the auxiliary loop nest's space from the unimodular
1110 portion. */
1111 auxillary_nest = lambda_compute_auxillary_space (nest, U);
1113 /* Compute the loop step signs from the old step signs and the
1114 transformation matrix. */
1115 stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1117 /* Compute the target loop nest space from the auxiliary nest and
1118 the lower triangular matrix H. */
1119 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1120 origin = lambda_vector_new (depth);
1121 origin_invariants = lambda_matrix_new (depth, invariants);
1122 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1123 LATTICE_ORIGIN (lattice), origin);
1124 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1125 origin_invariants, depth, depth, invariants);
1127 for (i = 0; i < depth; i++)
1129 loop = LN_LOOPS (target_nest)[i];
1130 expression = LL_LINEAR_OFFSET (loop);
1131 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1132 f = 1;
1133 else
1134 f = LLE_DENOMINATOR (expression);
1136 LLE_CONSTANT (expression) += f * origin[i];
1138 for (j = 0; j < invariants; j++)
1139 LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1140 f * origin_invariants[i][j];
1143 return target_nest;
1147 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1148 return the new expression. DEPTH is the depth of the loopnest.
1149 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1150 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1151 is the amount we have to add/subtract from the expression because of the
1152 type of comparison it is used in. */
1154 static lambda_linear_expression
1155 gcc_tree_to_linear_expression (int depth, tree expr,
1156 VEC(tree,heap) *outerinductionvars,
1157 VEC(tree,heap) *invariants, int extra)
1159 lambda_linear_expression lle = NULL;
1160 switch (TREE_CODE (expr))
1162 case INTEGER_CST:
1164 lle = lambda_linear_expression_new (depth, 2 * depth);
1165 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1166 if (extra != 0)
1167 LLE_CONSTANT (lle) += extra;
1169 LLE_DENOMINATOR (lle) = 1;
1171 break;
1172 case SSA_NAME:
1174 tree iv, invar;
1175 size_t i;
1176 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1177 if (iv != NULL)
1179 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1181 lle = lambda_linear_expression_new (depth, 2 * depth);
1182 LLE_COEFFICIENTS (lle)[i] = 1;
1183 if (extra != 0)
1184 LLE_CONSTANT (lle) = extra;
1186 LLE_DENOMINATOR (lle) = 1;
1189 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1190 if (invar != NULL)
1192 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1194 lle = lambda_linear_expression_new (depth, 2 * depth);
1195 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1196 if (extra != 0)
1197 LLE_CONSTANT (lle) = extra;
1198 LLE_DENOMINATOR (lle) = 1;
1202 break;
1203 default:
1204 return NULL;
1207 return lle;
1210 /* Return the depth of the loopnest NEST */
1212 static int
1213 depth_of_nest (struct loop *nest)
1215 size_t depth = 0;
1216 while (nest)
1218 depth++;
1219 nest = nest->inner;
1221 return depth;
1225 /* Return true if OP is invariant in LOOP and all outer loops. */
1227 static bool
1228 invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1230 if (is_gimple_min_invariant (op))
1231 return true;
1232 if (loop->depth == 0)
1233 return true;
1234 if (!expr_invariant_in_loop_p (loop, op))
1235 return false;
1236 if (loop->outer
1237 && !invariant_in_loop_and_outer_loops (loop->outer, op))
1238 return false;
1239 return true;
1242 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1243 or NULL if it could not be converted.
1244 DEPTH is the depth of the loop.
1245 INVARIANTS is a pointer to the array of loop invariants.
1246 The induction variable for this loop should be stored in the parameter
1247 OURINDUCTIONVAR.
1248 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1250 static lambda_loop
1251 gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1252 VEC(tree,heap) ** invariants,
1253 tree * ourinductionvar,
1254 VEC(tree,heap) * outerinductionvars,
1255 VEC(tree,heap) ** lboundvars,
1256 VEC(tree,heap) ** uboundvars,
1257 VEC(int,heap) ** steps)
1259 tree phi;
1260 tree exit_cond;
1261 tree access_fn, inductionvar;
1262 tree step;
1263 lambda_loop lloop = NULL;
1264 lambda_linear_expression lbound, ubound;
1265 tree test;
1266 int stepint;
1267 int extra = 0;
1268 tree lboundvar, uboundvar, uboundresult;
1269 use_optype uses;
1271 /* Find out induction var and exit condition. */
1272 inductionvar = find_induction_var_from_exit_cond (loop);
1273 exit_cond = get_loop_exit_condition (loop);
1275 if (inductionvar == NULL || exit_cond == NULL)
1277 if (dump_file && (dump_flags & TDF_DETAILS))
1278 fprintf (dump_file,
1279 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1280 return NULL;
1283 test = TREE_OPERAND (exit_cond, 0);
1285 if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1288 if (dump_file && (dump_flags & TDF_DETAILS))
1289 fprintf (dump_file,
1290 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1292 return NULL;
1295 phi = SSA_NAME_DEF_STMT (inductionvar);
1296 if (TREE_CODE (phi) != PHI_NODE)
1298 uses = STMT_USE_OPS (phi);
1300 if (!uses)
1303 if (dump_file && (dump_flags & TDF_DETAILS))
1304 fprintf (dump_file,
1305 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1307 return NULL;
1310 phi = USE_OP (uses, 0);
1311 phi = SSA_NAME_DEF_STMT (phi);
1312 if (TREE_CODE (phi) != PHI_NODE)
1315 if (dump_file && (dump_flags & TDF_DETAILS))
1316 fprintf (dump_file,
1317 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1318 return NULL;
1323 /* The induction variable name/version we want to put in the array is the
1324 result of the induction variable phi node. */
1325 *ourinductionvar = PHI_RESULT (phi);
1326 access_fn = instantiate_parameters
1327 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1328 if (access_fn == chrec_dont_know)
1330 if (dump_file && (dump_flags & TDF_DETAILS))
1331 fprintf (dump_file,
1332 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1334 return NULL;
1337 step = evolution_part_in_loop_num (access_fn, loop->num);
1338 if (!step || step == chrec_dont_know)
1340 if (dump_file && (dump_flags & TDF_DETAILS))
1341 fprintf (dump_file,
1342 "Unable to convert loop: Cannot determine step of loop.\n");
1344 return NULL;
1346 if (TREE_CODE (step) != INTEGER_CST)
1349 if (dump_file && (dump_flags & TDF_DETAILS))
1350 fprintf (dump_file,
1351 "Unable to convert loop: Step of loop is not integer.\n");
1352 return NULL;
1355 stepint = TREE_INT_CST_LOW (step);
1357 /* Only want phis for induction vars, which will have two
1358 arguments. */
1359 if (PHI_NUM_ARGS (phi) != 2)
1361 if (dump_file && (dump_flags & TDF_DETAILS))
1362 fprintf (dump_file,
1363 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1364 return NULL;
1367 /* Another induction variable check. One argument's source should be
1368 in the loop, one outside the loop. */
1369 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1370 && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1373 if (dump_file && (dump_flags & TDF_DETAILS))
1374 fprintf (dump_file,
1375 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1377 return NULL;
1380 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1382 lboundvar = PHI_ARG_DEF (phi, 1);
1383 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1384 outerinductionvars, *invariants,
1387 else
1389 lboundvar = PHI_ARG_DEF (phi, 0);
1390 lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1391 outerinductionvars, *invariants,
1395 if (!lbound)
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file,
1400 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1402 return NULL;
1404 /* One part of the test may be a loop invariant tree. */
1405 VEC_reserve (tree, heap, *invariants, 1);
1406 if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1407 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1408 VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1409 else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1410 && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1411 VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1413 /* The non-induction variable part of the test is the upper bound variable.
1415 if (TREE_OPERAND (test, 0) == inductionvar)
1416 uboundvar = TREE_OPERAND (test, 1);
1417 else
1418 uboundvar = TREE_OPERAND (test, 0);
1421 /* We only size the vectors assuming we have, at max, 2 times as many
1422 invariants as we do loops (one for each bound).
1423 This is just an arbitrary number, but it has to be matched against the
1424 code below. */
1425 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1428 /* We might have some leftover. */
1429 if (TREE_CODE (test) == LT_EXPR)
1430 extra = -1 * stepint;
1431 else if (TREE_CODE (test) == NE_EXPR)
1432 extra = -1 * stepint;
1433 else if (TREE_CODE (test) == GT_EXPR)
1434 extra = -1 * stepint;
1435 else if (TREE_CODE (test) == EQ_EXPR)
1436 extra = 1 * stepint;
1438 ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1439 outerinductionvars,
1440 *invariants, extra);
1441 uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1442 build_int_cst (TREE_TYPE (uboundvar), extra));
1443 VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1444 VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1445 VEC_safe_push (int, heap, *steps, stepint);
1446 if (!ubound)
1448 if (dump_file && (dump_flags & TDF_DETAILS))
1449 fprintf (dump_file,
1450 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1451 return NULL;
1454 lloop = lambda_loop_new ();
1455 LL_STEP (lloop) = stepint;
1456 LL_LOWER_BOUND (lloop) = lbound;
1457 LL_UPPER_BOUND (lloop) = ubound;
1458 return lloop;
1461 /* Given a LOOP, find the induction variable it is testing against in the exit
1462 condition. Return the induction variable if found, NULL otherwise. */
1464 static tree
1465 find_induction_var_from_exit_cond (struct loop *loop)
1467 tree expr = get_loop_exit_condition (loop);
1468 tree ivarop;
1469 tree test;
1470 if (expr == NULL_TREE)
1471 return NULL_TREE;
1472 if (TREE_CODE (expr) != COND_EXPR)
1473 return NULL_TREE;
1474 test = TREE_OPERAND (expr, 0);
1475 if (!COMPARISON_CLASS_P (test))
1476 return NULL_TREE;
1478 /* Find the side that is invariant in this loop. The ivar must be the other
1479 side. */
1481 if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1482 ivarop = TREE_OPERAND (test, 1);
1483 else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1484 ivarop = TREE_OPERAND (test, 0);
1485 else
1486 return NULL_TREE;
1488 if (TREE_CODE (ivarop) != SSA_NAME)
1489 return NULL_TREE;
1490 return ivarop;
1493 DEF_VEC_P(lambda_loop);
1494 DEF_VEC_ALLOC_P(lambda_loop,heap);
1496 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1497 Return the new loop nest.
1498 INDUCTIONVARS is a pointer to an array of induction variables for the
1499 loopnest that will be filled in during this process.
1500 INVARIANTS is a pointer to an array of invariants that will be filled in
1501 during this process. */
1503 lambda_loopnest
1504 gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1505 struct loop * loop_nest,
1506 VEC(tree,heap) **inductionvars,
1507 VEC(tree,heap) **invariants,
1508 bool need_perfect_nest)
1510 lambda_loopnest ret = NULL;
1511 struct loop *temp;
1512 int depth = 0;
1513 size_t i;
1514 VEC(lambda_loop,heap) *loops = NULL;
1515 VEC(tree,heap) *uboundvars = NULL;
1516 VEC(tree,heap) *lboundvars = NULL;
1517 VEC(int,heap) *steps = NULL;
1518 lambda_loop newloop;
1519 tree inductionvar = NULL;
1521 depth = depth_of_nest (loop_nest);
1522 temp = loop_nest;
1523 while (temp)
1525 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1526 &inductionvar, *inductionvars,
1527 &lboundvars, &uboundvars,
1528 &steps);
1529 if (!newloop)
1530 return NULL;
1531 VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1532 VEC_safe_push (lambda_loop, heap, loops, newloop);
1533 temp = temp->inner;
1535 if (need_perfect_nest)
1537 if (!perfect_nestify (currloops, loop_nest,
1538 lboundvars, uboundvars, steps, *inductionvars))
1540 if (dump_file)
1541 fprintf (dump_file,
1542 "Not a perfect loop nest and couldn't convert to one.\n");
1543 goto fail;
1545 else if (dump_file)
1546 fprintf (dump_file,
1547 "Successfully converted loop nest to perfect loop nest.\n");
1549 ret = lambda_loopnest_new (depth, 2 * depth);
1550 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1551 LN_LOOPS (ret)[i] = newloop;
1552 fail:
1553 VEC_free (lambda_loop, heap, loops);
1554 VEC_free (tree, heap, uboundvars);
1555 VEC_free (tree, heap, lboundvars);
1556 VEC_free (int, heap, steps);
1558 return ret;
1561 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1562 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1563 inserted for us are stored. INDUCTION_VARS is the array of induction
1564 variables for the loop this LBV is from. TYPE is the tree type to use for
1565 the variables and trees involved. */
1567 static tree
1568 lbv_to_gcc_expression (lambda_body_vector lbv,
1569 tree type, VEC(tree,heap) *induction_vars,
1570 tree *stmts_to_insert)
1572 tree stmts, stmt, resvar, name;
1573 tree iv;
1574 size_t i;
1575 tree_stmt_iterator tsi;
1577 /* Create a statement list and a linear expression temporary. */
1578 stmts = alloc_stmt_list ();
1579 resvar = create_tmp_var (type, "lbvtmp");
1580 add_referenced_tmp_var (resvar);
1582 /* Start at 0. */
1583 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1584 name = make_ssa_name (resvar, stmt);
1585 TREE_OPERAND (stmt, 0) = name;
1586 tsi = tsi_last (stmts);
1587 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1589 for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1591 if (LBV_COEFFICIENTS (lbv)[i] != 0)
1593 tree newname;
1594 tree coeffmult;
1596 /* newname = coefficient * induction_variable */
1597 coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1598 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1599 fold (build (MULT_EXPR, type, iv, coeffmult)));
1601 newname = make_ssa_name (resvar, stmt);
1602 TREE_OPERAND (stmt, 0) = newname;
1603 fold_stmt (&stmt);
1604 tsi = tsi_last (stmts);
1605 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1607 /* name = name + newname */
1608 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1609 build (PLUS_EXPR, type, name, newname));
1610 name = make_ssa_name (resvar, stmt);
1611 TREE_OPERAND (stmt, 0) = name;
1612 fold_stmt (&stmt);
1613 tsi = tsi_last (stmts);
1614 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1619 /* Handle any denominator that occurs. */
1620 if (LBV_DENOMINATOR (lbv) != 1)
1622 tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1623 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1624 build (CEIL_DIV_EXPR, type, name, denominator));
1625 name = make_ssa_name (resvar, stmt);
1626 TREE_OPERAND (stmt, 0) = name;
1627 fold_stmt (&stmt);
1628 tsi = tsi_last (stmts);
1629 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1631 *stmts_to_insert = stmts;
1632 return name;
1635 /* Convert a linear expression from coefficient and constant form to a
1636 gcc tree.
1637 Return the tree that represents the final value of the expression.
1638 LLE is the linear expression to convert.
1639 OFFSET is the linear offset to apply to the expression.
1640 TYPE is the tree type to use for the variables and math.
1641 INDUCTION_VARS is a vector of induction variables for the loops.
1642 INVARIANTS is a vector of the loop nest invariants.
1643 WRAP specifies what tree code to wrap the results in, if there is more than
1644 one (it is either MAX_EXPR, or MIN_EXPR).
1645 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1646 statements that need to be inserted for the linear expression. */
1648 static tree
1649 lle_to_gcc_expression (lambda_linear_expression lle,
1650 lambda_linear_expression offset,
1651 tree type,
1652 VEC(tree,heap) *induction_vars,
1653 VEC(tree,heap) *invariants,
1654 enum tree_code wrap, tree *stmts_to_insert)
1656 tree stmts, stmt, resvar, name;
1657 size_t i;
1658 tree_stmt_iterator tsi;
1659 tree iv, invar;
1660 VEC(tree,heap) *results = NULL;
1662 gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1663 name = NULL_TREE;
1664 /* Create a statement list and a linear expression temporary. */
1665 stmts = alloc_stmt_list ();
1666 resvar = create_tmp_var (type, "lletmp");
1667 add_referenced_tmp_var (resvar);
1669 /* Build up the linear expressions, and put the variable representing the
1670 result in the results array. */
1671 for (; lle != NULL; lle = LLE_NEXT (lle))
1673 /* Start at name = 0. */
1674 stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1675 name = make_ssa_name (resvar, stmt);
1676 TREE_OPERAND (stmt, 0) = name;
1677 fold_stmt (&stmt);
1678 tsi = tsi_last (stmts);
1679 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1681 /* First do the induction variables.
1682 at the end, name = name + all the induction variables added
1683 together. */
1684 for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1686 if (LLE_COEFFICIENTS (lle)[i] != 0)
1688 tree newname;
1689 tree mult;
1690 tree coeff;
1692 /* mult = induction variable * coefficient. */
1693 if (LLE_COEFFICIENTS (lle)[i] == 1)
1695 mult = VEC_index (tree, induction_vars, i);
1697 else
1699 coeff = build_int_cst (type,
1700 LLE_COEFFICIENTS (lle)[i]);
1701 mult = fold (build (MULT_EXPR, type, iv, coeff));
1704 /* newname = mult */
1705 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1706 newname = make_ssa_name (resvar, stmt);
1707 TREE_OPERAND (stmt, 0) = newname;
1708 fold_stmt (&stmt);
1709 tsi = tsi_last (stmts);
1710 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1712 /* name = name + newname */
1713 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1714 build (PLUS_EXPR, type, name, newname));
1715 name = make_ssa_name (resvar, stmt);
1716 TREE_OPERAND (stmt, 0) = name;
1717 fold_stmt (&stmt);
1718 tsi = tsi_last (stmts);
1719 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723 /* Handle our invariants.
1724 At the end, we have name = name + result of adding all multiplied
1725 invariants. */
1726 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1728 if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1730 tree newname;
1731 tree mult;
1732 tree coeff;
1733 int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1734 /* mult = invariant * coefficient */
1735 if (invcoeff == 1)
1737 mult = invar;
1739 else
1741 coeff = build_int_cst (type, invcoeff);
1742 mult = fold (build (MULT_EXPR, type, invar, coeff));
1745 /* newname = mult */
1746 stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1747 newname = make_ssa_name (resvar, stmt);
1748 TREE_OPERAND (stmt, 0) = newname;
1749 fold_stmt (&stmt);
1750 tsi = tsi_last (stmts);
1751 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1753 /* name = name + newname */
1754 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1755 build (PLUS_EXPR, type, name, newname));
1756 name = make_ssa_name (resvar, stmt);
1757 TREE_OPERAND (stmt, 0) = name;
1758 fold_stmt (&stmt);
1759 tsi = tsi_last (stmts);
1760 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1764 /* Now handle the constant.
1765 name = name + constant. */
1766 if (LLE_CONSTANT (lle) != 0)
1768 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1769 build (PLUS_EXPR, type, name,
1770 build_int_cst (type, LLE_CONSTANT (lle))));
1771 name = make_ssa_name (resvar, stmt);
1772 TREE_OPERAND (stmt, 0) = name;
1773 fold_stmt (&stmt);
1774 tsi = tsi_last (stmts);
1775 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1778 /* Now handle the offset.
1779 name = name + linear offset. */
1780 if (LLE_CONSTANT (offset) != 0)
1782 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1783 build (PLUS_EXPR, type, name,
1784 build_int_cst (type, LLE_CONSTANT (offset))));
1785 name = make_ssa_name (resvar, stmt);
1786 TREE_OPERAND (stmt, 0) = name;
1787 fold_stmt (&stmt);
1788 tsi = tsi_last (stmts);
1789 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1792 /* Handle any denominator that occurs. */
1793 if (LLE_DENOMINATOR (lle) != 1)
1795 stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1796 stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1797 type, name, stmt);
1798 stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1800 /* name = {ceil, floor}(name/denominator) */
1801 name = make_ssa_name (resvar, stmt);
1802 TREE_OPERAND (stmt, 0) = name;
1803 tsi = tsi_last (stmts);
1804 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1806 VEC_safe_push (tree, heap, results, name);
1809 /* Again, out of laziness, we don't handle this case yet. It's not
1810 hard, it just hasn't occurred. */
1811 gcc_assert (VEC_length (tree, results) <= 2);
1813 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1814 if (VEC_length (tree, results) > 1)
1816 tree op1 = VEC_index (tree, results, 0);
1817 tree op2 = VEC_index (tree, results, 1);
1818 stmt = build (MODIFY_EXPR, void_type_node, resvar,
1819 build (wrap, type, op1, op2));
1820 name = make_ssa_name (resvar, stmt);
1821 TREE_OPERAND (stmt, 0) = name;
1822 tsi = tsi_last (stmts);
1823 tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1826 VEC_free (tree, heap, results);
1828 *stmts_to_insert = stmts;
1829 return name;
1832 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1833 it, back into gcc code. This changes the
1834 loops, their induction variables, and their bodies, so that they
1835 match the transformed loopnest.
1836 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1837 loopnest.
1838 OLD_IVS is a vector of induction variables from the old loopnest.
1839 INVARIANTS is a vector of loop invariants from the old loopnest.
1840 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1841 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1842 NEW_LOOPNEST. */
1844 void
1845 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1846 VEC(tree,heap) *old_ivs,
1847 VEC(tree,heap) *invariants,
1848 lambda_loopnest new_loopnest,
1849 lambda_trans_matrix transform)
1851 struct loop *temp;
1852 size_t i = 0;
1853 size_t depth = 0;
1854 VEC(tree,heap) *new_ivs = NULL;
1855 tree oldiv;
1857 block_stmt_iterator bsi;
1859 if (dump_file)
1861 transform = lambda_trans_matrix_inverse (transform);
1862 fprintf (dump_file, "Inverse of transformation matrix:\n");
1863 print_lambda_trans_matrix (dump_file, transform);
1865 depth = depth_of_nest (old_loopnest);
1866 temp = old_loopnest;
1868 while (temp)
1870 lambda_loop newloop;
1871 basic_block bb;
1872 edge exit;
1873 tree ivvar, ivvarinced, exitcond, stmts;
1874 enum tree_code testtype;
1875 tree newupperbound, newlowerbound;
1876 lambda_linear_expression offset;
1877 tree type;
1878 bool insert_after;
1879 tree inc_stmt;
1881 oldiv = VEC_index (tree, old_ivs, i);
1882 type = TREE_TYPE (oldiv);
1884 /* First, build the new induction variable temporary */
1886 ivvar = create_tmp_var (type, "lnivtmp");
1887 add_referenced_tmp_var (ivvar);
1889 VEC_safe_push (tree, heap, new_ivs, ivvar);
1891 newloop = LN_LOOPS (new_loopnest)[i];
1893 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1894 cases for now. */
1895 offset = LL_LINEAR_OFFSET (newloop);
1897 gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1898 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1900 /* Now build the new lower bounds, and insert the statements
1901 necessary to generate it on the loop preheader. */
1902 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1903 LL_LINEAR_OFFSET (newloop),
1904 type,
1905 new_ivs,
1906 invariants, MAX_EXPR, &stmts);
1907 bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1908 bsi_commit_edge_inserts ();
1909 /* Build the new upper bound and insert its statements in the
1910 basic block of the exit condition */
1911 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1912 LL_LINEAR_OFFSET (newloop),
1913 type,
1914 new_ivs,
1915 invariants, MIN_EXPR, &stmts);
1916 exit = temp->single_exit;
1917 exitcond = get_loop_exit_condition (temp);
1918 bb = bb_for_stmt (exitcond);
1919 bsi = bsi_start (bb);
1920 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1922 /* Create the new iv. */
1924 standard_iv_increment_position (temp, &bsi, &insert_after);
1925 create_iv (newlowerbound,
1926 build_int_cst (type, LL_STEP (newloop)),
1927 ivvar, temp, &bsi, insert_after, &ivvar,
1928 NULL);
1930 /* Unfortunately, the incremented ivvar that create_iv inserted may not
1931 dominate the block containing the exit condition.
1932 So we simply create our own incremented iv to use in the new exit
1933 test, and let redundancy elimination sort it out. */
1934 inc_stmt = build (PLUS_EXPR, type,
1935 ivvar, build_int_cst (type, LL_STEP (newloop)));
1936 inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1937 inc_stmt);
1938 ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1939 TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1940 bsi = bsi_for_stmt (exitcond);
1941 bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1943 /* Replace the exit condition with the new upper bound
1944 comparison. */
1946 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1948 /* We want to build a conditional where true means exit the loop, and
1949 false means continue the loop.
1950 So swap the testtype if this isn't the way things are.*/
1952 if (exit->flags & EDGE_FALSE_VALUE)
1953 testtype = swap_tree_comparison (testtype);
1955 COND_EXPR_COND (exitcond) = build (testtype,
1956 boolean_type_node,
1957 newupperbound, ivvarinced);
1958 update_stmt (exitcond);
1959 VEC_replace (tree, new_ivs, i, ivvar);
1961 i++;
1962 temp = temp->inner;
1965 /* Rewrite uses of the old ivs so that they are now specified in terms of
1966 the new ivs. */
1968 for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1970 imm_use_iterator imm_iter;
1971 use_operand_p imm_use;
1972 tree oldiv_def;
1973 tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1975 gcc_assert (TREE_CODE (oldiv_stmt) == PHI_NODE
1976 || NUM_DEFS (STMT_DEF_OPS (oldiv_stmt)) == 1);
1977 if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1978 oldiv_def = PHI_RESULT (oldiv_stmt);
1979 else
1980 oldiv_def = DEF_OP (STMT_DEF_OPS (oldiv_stmt), 0);
1982 FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1984 tree stmt = USE_STMT (imm_use);
1985 use_operand_p use_p;
1986 ssa_op_iter iter;
1987 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1988 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1990 if (USE_FROM_PTR (use_p) == oldiv)
1992 tree newiv, stmts;
1993 lambda_body_vector lbv, newlbv;
1994 /* Compute the new expression for the induction
1995 variable. */
1996 depth = VEC_length (tree, new_ivs);
1997 lbv = lambda_body_vector_new (depth);
1998 LBV_COEFFICIENTS (lbv)[i] = 1;
2000 newlbv = lambda_body_vector_compute_new (transform, lbv);
2002 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
2003 new_ivs, &stmts);
2004 bsi = bsi_for_stmt (stmt);
2005 /* Insert the statements to build that
2006 expression. */
2007 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2008 propagate_value (use_p, newiv);
2009 update_stmt (stmt);
2015 VEC_free (tree, heap, new_ivs);
2018 /* Returns true when the vector V is lexicographically positive, in
2019 other words, when the first nonzero element is positive. */
2021 static bool
2022 lambda_vector_lexico_pos (lambda_vector v,
2023 unsigned n)
2025 unsigned i;
2026 for (i = 0; i < n; i++)
2028 if (v[i] == 0)
2029 continue;
2030 if (v[i] < 0)
2031 return false;
2032 if (v[i] > 0)
2033 return true;
2035 return true;
2039 /* Return TRUE if this is not interesting statement from the perspective of
2040 determining if we have a perfect loop nest. */
2042 static bool
2043 not_interesting_stmt (tree stmt)
2045 /* Note that COND_EXPR's aren't interesting because if they were exiting the
2046 loop, we would have already failed the number of exits tests. */
2047 if (TREE_CODE (stmt) == LABEL_EXPR
2048 || TREE_CODE (stmt) == GOTO_EXPR
2049 || TREE_CODE (stmt) == COND_EXPR)
2050 return true;
2051 return false;
2054 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
2056 static bool
2057 phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2059 int i;
2060 for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2061 if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2062 if (PHI_ARG_DEF (phi, i) == def)
2063 return true;
2064 return false;
2067 /* Return TRUE if STMT is a use of PHI_RESULT. */
2069 static bool
2070 stmt_uses_phi_result (tree stmt, tree phi_result)
2072 use_optype uses = STMT_USE_OPS (stmt);
2074 /* This is conservatively true, because we only want SIMPLE bumpers
2075 of the form x +- constant for our pass. */
2076 if (NUM_USES (uses) != 1)
2077 return false;
2078 if (USE_OP (uses, 0) == phi_result)
2079 return true;
2081 return false;
2084 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2085 in-loop-edge in a phi node, and the operand it uses is the result of that
2086 phi node.
2087 I.E. i_29 = i_3 + 1
2088 i_3 = PHI (0, i_29); */
2090 static bool
2091 stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2093 tree use;
2094 tree def;
2095 def_optype defs = STMT_DEF_OPS (stmt);
2096 imm_use_iterator iter;
2097 use_operand_p use_p;
2099 if (NUM_DEFS (defs) != 1)
2100 return false;
2101 def = DEF_OP (defs, 0);
2102 FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2104 use = USE_STMT (use_p);
2105 if (TREE_CODE (use) == PHI_NODE)
2107 if (phi_loop_edge_uses_def (loop, use, def))
2108 if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2109 return true;
2112 return false;
2116 /* Return true if LOOP is a perfect loop nest.
2117 Perfect loop nests are those loop nests where all code occurs in the
2118 innermost loop body.
2119 If S is a program statement, then
2121 i.e.
2122 DO I = 1, 20
2124 DO J = 1, 20
2126 END DO
2127 END DO
2128 is not a perfect loop nest because of S1.
2130 DO I = 1, 20
2131 DO J = 1, 20
2134 END DO
2135 END DO
2136 is a perfect loop nest.
2138 Since we don't have high level loops anymore, we basically have to walk our
2139 statements and ignore those that are there because the loop needs them (IE
2140 the induction variable increment, and jump back to the top of the loop). */
2142 bool
2143 perfect_nest_p (struct loop *loop)
2145 basic_block *bbs;
2146 size_t i;
2147 tree exit_cond;
2149 if (!loop->inner)
2150 return true;
2151 bbs = get_loop_body (loop);
2152 exit_cond = get_loop_exit_condition (loop);
2153 for (i = 0; i < loop->num_nodes; i++)
2155 if (bbs[i]->loop_father == loop)
2157 block_stmt_iterator bsi;
2158 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2160 tree stmt = bsi_stmt (bsi);
2161 if (stmt == exit_cond
2162 || not_interesting_stmt (stmt)
2163 || stmt_is_bumper_for_loop (loop, stmt))
2164 continue;
2165 free (bbs);
2166 return false;
2170 free (bbs);
2171 /* See if the inner loops are perfectly nested as well. */
2172 if (loop->inner)
2173 return perfect_nest_p (loop->inner);
2174 return true;
2177 /* Replace the USES of tree X in STMT with tree Y */
2179 static void
2180 replace_uses_of_x_with_y (tree stmt, tree x, tree y)
2182 use_optype uses = STMT_USE_OPS (stmt);
2183 size_t i;
2184 for (i = 0; i < NUM_USES (uses); i++)
2186 if (USE_OP (uses, i) == x)
2187 SET_USE_OP (uses, i, y);
2191 /* Return TRUE if STMT uses tree OP in it's uses. */
2193 static bool
2194 stmt_uses_op (tree stmt, tree op)
2196 use_optype uses = STMT_USE_OPS (stmt);
2197 size_t i;
2198 for (i = 0; i < NUM_USES (uses); i++)
2200 if (USE_OP (uses, i) == op)
2201 return true;
2203 return false;
2206 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2207 one. LOOPIVS is a vector of induction variables, one per loop.
2208 ATM, we only handle imperfect nests of depth 2, where all of the statements
2209 occur after the inner loop. */
2211 static bool
2212 can_convert_to_perfect_nest (struct loop *loop,
2213 VEC(tree,heap) *loopivs)
2215 basic_block *bbs;
2216 tree exit_condition, phi;
2217 size_t i;
2218 block_stmt_iterator bsi;
2219 basic_block exitdest;
2221 /* Can't handle triply nested+ loops yet. */
2222 if (!loop->inner || loop->inner->inner)
2223 return false;
2225 /* We only handle moving the after-inner-body statements right now, so make
2226 sure all the statements we need to move are located in that position. */
2227 bbs = get_loop_body (loop);
2228 exit_condition = get_loop_exit_condition (loop);
2229 for (i = 0; i < loop->num_nodes; i++)
2231 if (bbs[i]->loop_father == loop)
2233 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2235 size_t j;
2236 tree stmt = bsi_stmt (bsi);
2237 tree iv;
2239 if (stmt == exit_condition
2240 || not_interesting_stmt (stmt)
2241 || stmt_is_bumper_for_loop (loop, stmt))
2242 continue;
2243 /* If the statement uses inner loop ivs, we == screwed. */
2244 for (j = 1; VEC_iterate (tree, loopivs, j, iv); j++)
2245 if (stmt_uses_op (stmt, iv))
2246 goto fail;
2248 /* If the bb of a statement we care about isn't dominated by
2249 the header of the inner loop, then we are also screwed. */
2250 if (!dominated_by_p (CDI_DOMINATORS,
2251 bb_for_stmt (stmt),
2252 loop->inner->header))
2253 goto fail;
2258 /* We also need to make sure the loop exit only has simple copy phis in it,
2259 otherwise we don't know how to transform it into a perfect nest right
2260 now. */
2261 exitdest = loop->single_exit->dest;
2263 for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2264 if (PHI_NUM_ARGS (phi) != 1)
2265 goto fail;
2267 free (bbs);
2268 return true;
2270 fail:
2271 free (bbs);
2272 return false;
2275 /* Transform the loop nest into a perfect nest, if possible.
2276 LOOPS is the current struct loops *
2277 LOOP is the loop nest to transform into a perfect nest
2278 LBOUNDS are the lower bounds for the loops to transform
2279 UBOUNDS are the upper bounds for the loops to transform
2280 STEPS is the STEPS for the loops to transform.
2281 LOOPIVS is the induction variables for the loops to transform.
2283 Basically, for the case of
2285 FOR (i = 0; i < 50; i++)
2287 FOR (j =0; j < 50; j++)
2289 <whatever>
2291 <some code>
2294 This function will transform it into a perfect loop nest by splitting the
2295 outer loop into two loops, like so:
2297 FOR (i = 0; i < 50; i++)
2299 FOR (j = 0; j < 50; j++)
2301 <whatever>
2305 FOR (i = 0; i < 50; i ++)
2307 <some code>
2310 Return FALSE if we can't make this loop into a perfect nest. */
2311 static bool
2312 perfect_nestify (struct loops *loops,
2313 struct loop *loop,
2314 VEC(tree,heap) *lbounds,
2315 VEC(tree,heap) *ubounds,
2316 VEC(int,heap) *steps,
2317 VEC(tree,heap) *loopivs)
2319 basic_block *bbs;
2320 tree exit_condition;
2321 tree then_label, else_label, cond_stmt;
2322 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2323 size_t i;
2324 block_stmt_iterator bsi;
2325 bool insert_after;
2326 edge e;
2327 struct loop *newloop;
2328 tree phi;
2329 tree uboundvar;
2330 tree stmt;
2331 tree oldivvar, ivvar, ivvarinced;
2332 VEC(tree,heap) *phis = NULL;
2334 if (!can_convert_to_perfect_nest (loop, loopivs))
2335 return false;
2337 /* Create the new loop */
2339 olddest = loop->single_exit->dest;
2340 preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2341 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2343 /* Push the exit phi nodes that we are moving. */
2344 for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2346 VEC_reserve (tree, heap, phis, 2);
2347 VEC_quick_push (tree, phis, PHI_RESULT (phi));
2348 VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2350 e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2352 /* Remove the exit phis from the old basic block. Make sure to set
2353 PHI_RESULT to null so it doesn't get released. */
2354 while (phi_nodes (olddest) != NULL)
2356 SET_PHI_RESULT (phi_nodes (olddest), NULL);
2357 remove_phi_node (phi_nodes (olddest), NULL);
2360 /* and add them back to the new basic block. */
2361 while (VEC_length (tree, phis) != 0)
2363 tree def;
2364 tree phiname;
2365 def = VEC_pop (tree, phis);
2366 phiname = VEC_pop (tree, phis);
2367 phi = create_phi_node (phiname, preheaderbb);
2368 add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2370 flush_pending_stmts (e);
2371 VEC_free (tree, heap, phis);
2373 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2374 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2375 make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2376 then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2377 else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2378 cond_stmt = build (COND_EXPR, void_type_node,
2379 build (NE_EXPR, boolean_type_node,
2380 integer_one_node,
2381 integer_zero_node),
2382 then_label, else_label);
2383 bsi = bsi_start (bodybb);
2384 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2385 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2386 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2387 make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2389 /* Update the loop structures. */
2390 newloop = duplicate_loop (loops, loop, olddest->loop_father);
2391 newloop->header = headerbb;
2392 newloop->latch = latchbb;
2393 newloop->single_exit = e;
2394 add_bb_to_loop (latchbb, newloop);
2395 add_bb_to_loop (bodybb, newloop);
2396 add_bb_to_loop (headerbb, newloop);
2397 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2398 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2399 set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2400 loop->single_exit->src);
2401 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2402 set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2403 /* Create the new iv. */
2404 ivvar = create_tmp_var (integer_type_node, "perfectiv");
2405 add_referenced_tmp_var (ivvar);
2406 standard_iv_increment_position (newloop, &bsi, &insert_after);
2407 create_iv (VEC_index (tree, lbounds, 0),
2408 build_int_cst (integer_type_node, VEC_index (int, steps, 0)),
2409 ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2411 /* Create the new upper bound. This may be not just a variable, so we copy
2412 it to one just in case. */
2414 exit_condition = get_loop_exit_condition (newloop);
2415 uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2416 add_referenced_tmp_var (uboundvar);
2417 stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2418 VEC_index (tree, ubounds, 0));
2419 uboundvar = make_ssa_name (uboundvar, stmt);
2420 TREE_OPERAND (stmt, 0) = uboundvar;
2422 if (insert_after)
2423 bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2424 else
2425 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2427 COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2428 boolean_type_node,
2429 uboundvar,
2430 ivvarinced);
2432 bbs = get_loop_body (loop);
2433 /* Now replace the induction variable in the moved statements with the
2434 correct loop induction variable. */
2435 oldivvar = VEC_index (tree, loopivs, 0);
2436 for (i = 0; i < loop->num_nodes; i++)
2438 block_stmt_iterator tobsi = bsi_last (bodybb);
2439 if (bbs[i]->loop_father == loop)
2441 /* Note that the bsi only needs to be explicitly incremented
2442 when we don't move something, since it is automatically
2443 incremented when we do. */
2444 for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2446 ssa_op_iter i;
2447 tree n, stmt = bsi_stmt (bsi);
2449 if (stmt == exit_condition
2450 || not_interesting_stmt (stmt)
2451 || stmt_is_bumper_for_loop (loop, stmt))
2453 bsi_next (&bsi);
2454 continue;
2457 replace_uses_of_x_with_y (stmt, oldivvar, ivvar);
2458 bsi_move_before (&bsi, &tobsi);
2460 /* If the statement has any virtual operands, they may
2461 need to be rewired because the original loop may
2462 still reference them. */
2463 FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2464 mark_sym_for_renaming (SSA_NAME_VAR (n));
2469 free (bbs);
2470 return perfect_nest_p (loop);
2473 /* Return true if TRANS is a legal transformation matrix that respects
2474 the dependence vectors in DISTS and DIRS. The conservative answer
2475 is false.
2477 "Wolfe proves that a unimodular transformation represented by the
2478 matrix T is legal when applied to a loop nest with a set of
2479 lexicographically non-negative distance vectors RDG if and only if
2480 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2481 i.e.: if and only if it transforms the lexicographically positive
2482 distance vectors to lexicographically positive vectors. Note that
2483 a unimodular matrix must transform the zero vector (and only it) to
2484 the zero vector." S.Muchnick. */
2486 bool
2487 lambda_transform_legal_p (lambda_trans_matrix trans,
2488 int nb_loops,
2489 varray_type dependence_relations)
2491 unsigned int i;
2492 lambda_vector distres;
2493 struct data_dependence_relation *ddr;
2495 gcc_assert (LTM_COLSIZE (trans) == nb_loops
2496 && LTM_ROWSIZE (trans) == nb_loops);
2498 /* When there is an unknown relation in the dependence_relations, we
2499 know that it is no worth looking at this loop nest: give up. */
2500 ddr = (struct data_dependence_relation *)
2501 VARRAY_GENERIC_PTR (dependence_relations, 0);
2502 if (ddr == NULL)
2503 return true;
2504 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2505 return false;
2507 distres = lambda_vector_new (nb_loops);
2509 /* For each distance vector in the dependence graph. */
2510 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2512 ddr = (struct data_dependence_relation *)
2513 VARRAY_GENERIC_PTR (dependence_relations, i);
2515 /* Don't care about relations for which we know that there is no
2516 dependence, nor about read-read (aka. output-dependences):
2517 these data accesses can happen in any order. */
2518 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2519 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2520 continue;
2522 /* Conservatively answer: "this transformation is not valid". */
2523 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2524 return false;
2526 /* If the dependence could not be captured by a distance vector,
2527 conservatively answer that the transform is not valid. */
2528 if (DDR_DIST_VECT (ddr) == NULL)
2529 return false;
2531 /* Compute trans.dist_vect */
2532 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2533 DDR_DIST_VECT (ddr), distres);
2535 if (!lambda_vector_lexico_pos (distres, nb_loops))
2536 return false;
2538 return true;