1 /* Loop transformation code generation
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dberlin@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-chrec.h"
40 #include "tree-data-ref.h"
41 #include "tree-pass.h"
42 #include "tree-scalar-evolution.h"
46 /* This loop nest code generation is based on non-singular matrix
49 A little terminology and a general sketch of the algorithm. See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
54 A loop iteration space are the points traversed by the loop. A point in the
55 iteration space can be represented by a vector of size <loop depth>. You can
56 therefore represent the iteration space as a integral combinations of a set
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space. Every loop with a step of 1
61 therefore has a dense iteration space.
63 for i = 1 to 3, step 1 is a dense iteration space.
65 A loop iteration space is sparse if it is not dense. That is, the iteration
66 space skips integer points that are within the loop bounds.
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with. Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79 1. It traverses the iterations in the same lexicographic order as the source
81 2. It is a dense space when the source is a dense space (even if the target
82 space is going to be sparse).
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense. Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse). We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay). We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
122 static bool perfect_nestify (struct loops
*,
123 struct loop
*, VEC (tree
) *,
124 VEC (tree
) *, VEC (int) *, VEC (tree
) *);
125 /* Lattice stuff that is internal to the code generation algorithm. */
129 /* Lattice base matrix. */
131 /* Lattice dimension. */
133 /* Origin vector for the coefficients. */
134 lambda_vector origin
;
135 /* Origin matrix for the invariants. */
136 lambda_matrix origin_invariants
;
137 /* Number of invariants. */
141 #define LATTICE_BASE(T) ((T)->base)
142 #define LATTICE_DIMENSION(T) ((T)->dimension)
143 #define LATTICE_ORIGIN(T) ((T)->origin)
144 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
145 #define LATTICE_INVARIANTS(T) ((T)->invariants)
147 static bool lle_equal (lambda_linear_expression
, lambda_linear_expression
,
149 static lambda_lattice
lambda_lattice_new (int, int);
150 static lambda_lattice
lambda_lattice_compute_base (lambda_loopnest
);
152 static tree
find_induction_var_from_exit_cond (struct loop
*);
154 /* Create a new lambda body vector. */
157 lambda_body_vector_new (int size
)
159 lambda_body_vector ret
;
161 ret
= ggc_alloc (sizeof (*ret
));
162 LBV_COEFFICIENTS (ret
) = lambda_vector_new (size
);
163 LBV_SIZE (ret
) = size
;
164 LBV_DENOMINATOR (ret
) = 1;
168 /* Compute the new coefficients for the vector based on the
169 *inverse* of the transformation matrix. */
172 lambda_body_vector_compute_new (lambda_trans_matrix transform
,
173 lambda_body_vector vect
)
175 lambda_body_vector temp
;
178 /* Make sure the matrix is square. */
179 gcc_assert (LTM_ROWSIZE (transform
) == LTM_COLSIZE (transform
));
181 depth
= LTM_ROWSIZE (transform
);
183 temp
= lambda_body_vector_new (depth
);
184 LBV_DENOMINATOR (temp
) =
185 LBV_DENOMINATOR (vect
) * LTM_DENOMINATOR (transform
);
186 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect
), depth
,
187 LTM_MATRIX (transform
), depth
,
188 LBV_COEFFICIENTS (temp
));
189 LBV_SIZE (temp
) = LBV_SIZE (vect
);
193 /* Print out a lambda body vector. */
196 print_lambda_body_vector (FILE * outfile
, lambda_body_vector body
)
198 print_lambda_vector (outfile
, LBV_COEFFICIENTS (body
), LBV_SIZE (body
));
201 /* Return TRUE if two linear expressions are equal. */
204 lle_equal (lambda_linear_expression lle1
, lambda_linear_expression lle2
,
205 int depth
, int invariants
)
209 if (lle1
== NULL
|| lle2
== NULL
)
211 if (LLE_CONSTANT (lle1
) != LLE_CONSTANT (lle2
))
213 if (LLE_DENOMINATOR (lle1
) != LLE_DENOMINATOR (lle2
))
215 for (i
= 0; i
< depth
; i
++)
216 if (LLE_COEFFICIENTS (lle1
)[i
] != LLE_COEFFICIENTS (lle2
)[i
])
218 for (i
= 0; i
< invariants
; i
++)
219 if (LLE_INVARIANT_COEFFICIENTS (lle1
)[i
] !=
220 LLE_INVARIANT_COEFFICIENTS (lle2
)[i
])
225 /* Create a new linear expression with dimension DIM, and total number
226 of invariants INVARIANTS. */
228 lambda_linear_expression
229 lambda_linear_expression_new (int dim
, int invariants
)
231 lambda_linear_expression ret
;
233 ret
= ggc_alloc_cleared (sizeof (*ret
));
235 LLE_COEFFICIENTS (ret
) = lambda_vector_new (dim
);
236 LLE_CONSTANT (ret
) = 0;
237 LLE_INVARIANT_COEFFICIENTS (ret
) = lambda_vector_new (invariants
);
238 LLE_DENOMINATOR (ret
) = 1;
239 LLE_NEXT (ret
) = NULL
;
244 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
245 The starting letter used for variable names is START. */
248 print_linear_expression (FILE * outfile
, lambda_vector expr
, int size
,
253 for (i
= 0; i
< size
; i
++)
260 fprintf (outfile
, "-");
263 else if (expr
[i
] > 0)
264 fprintf (outfile
, " + ");
266 fprintf (outfile
, " - ");
267 if (abs (expr
[i
]) == 1)
268 fprintf (outfile
, "%c", start
+ i
);
270 fprintf (outfile
, "%d%c", abs (expr
[i
]), start
+ i
);
275 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
276 depth/number of coefficients is given by DEPTH, the number of invariants is
277 given by INVARIANTS, and the character to start variable names with is given
281 print_lambda_linear_expression (FILE * outfile
,
282 lambda_linear_expression expr
,
283 int depth
, int invariants
, char start
)
285 fprintf (outfile
, "\tLinear expression: ");
286 print_linear_expression (outfile
, LLE_COEFFICIENTS (expr
), depth
, start
);
287 fprintf (outfile
, " constant: %d ", LLE_CONSTANT (expr
));
288 fprintf (outfile
, " invariants: ");
289 print_linear_expression (outfile
, LLE_INVARIANT_COEFFICIENTS (expr
),
291 fprintf (outfile
, " denominator: %d\n", LLE_DENOMINATOR (expr
));
294 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of
295 coefficients is given by DEPTH, the number of invariants is
296 given by INVARIANTS, and the character to start variable names with is given
300 print_lambda_loop (FILE * outfile
, lambda_loop loop
, int depth
,
301 int invariants
, char start
)
304 lambda_linear_expression expr
;
308 expr
= LL_LINEAR_OFFSET (loop
);
309 step
= LL_STEP (loop
);
310 fprintf (outfile
, " step size = %d \n", step
);
314 fprintf (outfile
, " linear offset: \n");
315 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
,
319 fprintf (outfile
, " lower bound: \n");
320 for (expr
= LL_LOWER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
321 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
322 fprintf (outfile
, " upper bound: \n");
323 for (expr
= LL_UPPER_BOUND (loop
); expr
!= NULL
; expr
= LLE_NEXT (expr
))
324 print_lambda_linear_expression (outfile
, expr
, depth
, invariants
, start
);
327 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
328 number of invariants. */
331 lambda_loopnest_new (int depth
, int invariants
)
334 ret
= ggc_alloc (sizeof (*ret
));
336 LN_LOOPS (ret
) = ggc_alloc_cleared (depth
* sizeof (lambda_loop
));
337 LN_DEPTH (ret
) = depth
;
338 LN_INVARIANTS (ret
) = invariants
;
343 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting
344 character to use for loop names is given by START. */
347 print_lambda_loopnest (FILE * outfile
, lambda_loopnest nest
, char start
)
350 for (i
= 0; i
< LN_DEPTH (nest
); i
++)
352 fprintf (outfile
, "Loop %c\n", start
+ i
);
353 print_lambda_loop (outfile
, LN_LOOPS (nest
)[i
], LN_DEPTH (nest
),
354 LN_INVARIANTS (nest
), 'i');
355 fprintf (outfile
, "\n");
359 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
362 static lambda_lattice
363 lambda_lattice_new (int depth
, int invariants
)
366 ret
= ggc_alloc (sizeof (*ret
));
367 LATTICE_BASE (ret
) = lambda_matrix_new (depth
, depth
);
368 LATTICE_ORIGIN (ret
) = lambda_vector_new (depth
);
369 LATTICE_ORIGIN_INVARIANTS (ret
) = lambda_matrix_new (depth
, invariants
);
370 LATTICE_DIMENSION (ret
) = depth
;
371 LATTICE_INVARIANTS (ret
) = invariants
;
375 /* Compute the lattice base for NEST. The lattice base is essentially a
376 non-singular transform from a dense base space to a sparse iteration space.
377 We use it so that we don't have to specially handle the case of a sparse
378 iteration space in other parts of the algorithm. As a result, this routine
379 only does something interesting (IE produce a matrix that isn't the
380 identity matrix) if NEST is a sparse space. */
382 static lambda_lattice
383 lambda_lattice_compute_base (lambda_loopnest nest
)
386 int depth
, invariants
;
391 lambda_linear_expression expression
;
393 depth
= LN_DEPTH (nest
);
394 invariants
= LN_INVARIANTS (nest
);
396 ret
= lambda_lattice_new (depth
, invariants
);
397 base
= LATTICE_BASE (ret
);
398 for (i
= 0; i
< depth
; i
++)
400 loop
= LN_LOOPS (nest
)[i
];
402 step
= LL_STEP (loop
);
403 /* If we have a step of 1, then the base is one, and the
404 origin and invariant coefficients are 0. */
407 for (j
= 0; j
< depth
; j
++)
410 LATTICE_ORIGIN (ret
)[i
] = 0;
411 for (j
= 0; j
< invariants
; j
++)
412 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] = 0;
416 /* Otherwise, we need the lower bound expression (which must
417 be an affine function) to determine the base. */
418 expression
= LL_LOWER_BOUND (loop
);
419 gcc_assert (expression
&& LLE_NEXT (expression
)
420 && LLE_DENOMINATOR (expression
) == 1);
422 /* The lower triangular portion of the base is going to be the
423 coefficient times the step */
424 for (j
= 0; j
< i
; j
++)
425 base
[i
][j
] = LLE_COEFFICIENTS (expression
)[j
]
426 * LL_STEP (LN_LOOPS (nest
)[j
]);
428 for (j
= i
+ 1; j
< depth
; j
++)
431 /* Origin for this loop is the constant of the lower bound
433 LATTICE_ORIGIN (ret
)[i
] = LLE_CONSTANT (expression
);
435 /* Coefficient for the invariants are equal to the invariant
436 coefficients in the expression. */
437 for (j
= 0; j
< invariants
; j
++)
438 LATTICE_ORIGIN_INVARIANTS (ret
)[i
][j
] =
439 LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
445 /* Compute the greatest common denominator of two numbers (A and B) using
446 Euclid's algorithm. */
467 /* Compute the greatest common denominator of a VECTOR of SIZE numbers. */
470 gcd_vector (lambda_vector vector
, int size
)
478 for (i
= 1; i
< size
; i
++)
479 gcd1
= gcd (gcd1
, vector
[i
]);
484 /* Compute the least common multiple of two numbers A and B . */
489 return (abs (a
) * abs (b
) / gcd (a
, b
));
492 /* Compute the loop bounds for the auxiliary space NEST.
493 Input system used is Ax <= b. TRANS is the unimodular transformation. */
495 static lambda_loopnest
496 lambda_compute_auxillary_space (lambda_loopnest nest
,
497 lambda_trans_matrix trans
)
499 lambda_matrix A
, B
, A1
, B1
, temp0
;
500 lambda_vector a
, a1
, temp1
;
501 lambda_matrix invertedtrans
;
502 int determinant
, depth
, invariants
, size
, newsize
;
504 lambda_loopnest auxillary_nest
;
506 lambda_linear_expression expression
;
507 lambda_lattice lattice
;
509 int multiple
, f1
, f2
;
511 depth
= LN_DEPTH (nest
);
512 invariants
= LN_INVARIANTS (nest
);
514 /* Unfortunately, we can't know the number of constraints we'll have
515 ahead of time, but this should be enough even in ridiculous loop nest
516 cases. We abort if we go over this limit. */
517 A
= lambda_matrix_new (128, depth
);
518 B
= lambda_matrix_new (128, invariants
);
519 a
= lambda_vector_new (128);
521 A1
= lambda_matrix_new (128, depth
);
522 B1
= lambda_matrix_new (128, invariants
);
523 a1
= lambda_vector_new (128);
525 /* Store the bounds in the equation matrix A, constant vector a, and
526 invariant matrix B, so that we have Ax <= a + B.
527 This requires a little equation rearranging so that everything is on the
528 correct side of the inequality. */
530 for (i
= 0; i
< depth
; i
++)
532 loop
= LN_LOOPS (nest
)[i
];
534 /* First we do the lower bound. */
535 if (LL_STEP (loop
) > 0)
536 expression
= LL_LOWER_BOUND (loop
);
538 expression
= LL_UPPER_BOUND (loop
);
540 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
542 /* Fill in the coefficient. */
543 for (j
= 0; j
< i
; j
++)
544 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
546 /* And the invariant coefficient. */
547 for (j
= 0; j
< invariants
; j
++)
548 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
550 /* And the constant. */
551 a
[size
] = LLE_CONSTANT (expression
);
553 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all
554 constants and single variables on */
555 A
[size
][i
] = -1 * LLE_DENOMINATOR (expression
);
557 for (j
= 0; j
< invariants
; j
++)
561 /* Need to increase matrix sizes above. */
562 gcc_assert (size
<= 127);
566 /* Then do the exact same thing for the upper bounds. */
567 if (LL_STEP (loop
) > 0)
568 expression
= LL_UPPER_BOUND (loop
);
570 expression
= LL_LOWER_BOUND (loop
);
572 for (; expression
!= NULL
; expression
= LLE_NEXT (expression
))
574 /* Fill in the coefficient. */
575 for (j
= 0; j
< i
; j
++)
576 A
[size
][j
] = LLE_COEFFICIENTS (expression
)[j
];
578 /* And the invariant coefficient. */
579 for (j
= 0; j
< invariants
; j
++)
580 B
[size
][j
] = LLE_INVARIANT_COEFFICIENTS (expression
)[j
];
582 /* And the constant. */
583 a
[size
] = LLE_CONSTANT (expression
);
585 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */
586 for (j
= 0; j
< i
; j
++)
588 A
[size
][i
] = LLE_DENOMINATOR (expression
);
590 /* Need to increase matrix sizes above. */
591 gcc_assert (size
<= 127);
596 /* Compute the lattice base x = base * y + origin, where y is the
598 lattice
= lambda_lattice_compute_base (nest
);
600 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */
603 lambda_matrix_mult (A
, LATTICE_BASE (lattice
), A1
, size
, depth
, depth
);
605 /* a1 = a - A * origin constant. */
606 lambda_matrix_vector_mult (A
, size
, depth
, LATTICE_ORIGIN (lattice
), a1
);
607 lambda_vector_add_mc (a
, 1, a1
, -1, a1
, size
);
609 /* B1 = B - A * origin invariant. */
610 lambda_matrix_mult (A
, LATTICE_ORIGIN_INVARIANTS (lattice
), B1
, size
, depth
,
612 lambda_matrix_add_mc (B
, 1, B1
, -1, B1
, size
, invariants
);
614 /* Now compute the auxiliary space bounds by first inverting U, multiplying
615 it by A1, then performing fourier motzkin. */
617 invertedtrans
= lambda_matrix_new (depth
, depth
);
619 /* Compute the inverse of U. */
620 determinant
= lambda_matrix_inverse (LTM_MATRIX (trans
),
621 invertedtrans
, depth
);
624 lambda_matrix_mult (A1
, invertedtrans
, A
, size
, depth
, depth
);
626 /* Perform Fourier-Motzkin elimination to calculate the bounds of the
628 Fourier-Motzkin is a way of reducing systems of linear inequality so that
629 it is easy to calculate the answer and bounds.
630 A sketch of how it works:
631 Given a system of linear inequalities, ai * xj >= bk, you can always
632 rewrite the constraints so they are all of the form
633 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
634 in b1 ... bk, and some a in a1...ai)
635 You can then eliminate this x from the non-constant inequalities by
636 rewriting these as a <= b, x >= constant, and delete the x variable.
637 You can then repeat this for any remaining x variables, and then we have
638 an easy to use variable <= constant (or no variables at all) form that we
639 can construct our bounds from.
641 In our case, each time we eliminate, we construct part of the bound from
642 the ith variable, then delete the ith variable.
644 Remember the constant are in our vector a, our coefficient matrix is A,
645 and our invariant coefficient matrix is B */
647 /* Swap B and B1, and a1 and a. */
656 auxillary_nest
= lambda_loopnest_new (depth
, invariants
);
658 for (i
= depth
- 1; i
>= 0; i
--)
660 loop
= lambda_loop_new ();
661 LN_LOOPS (auxillary_nest
)[i
] = loop
;
664 for (j
= 0; j
< size
; j
++)
669 expression
= lambda_linear_expression_new (depth
, invariants
);
671 for (k
= 0; k
< i
; k
++)
672 LLE_COEFFICIENTS (expression
)[k
] = A
[j
][k
];
673 for (k
= 0; k
< invariants
; k
++)
674 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = -1 * B
[j
][k
];
675 LLE_DENOMINATOR (expression
) = -1 * A
[j
][i
];
676 LLE_CONSTANT (expression
) = -1 * a
[j
];
677 /* Ignore if identical to the existing lower bound. */
678 if (!lle_equal (LL_LOWER_BOUND (loop
),
679 expression
, depth
, invariants
))
681 LLE_NEXT (expression
) = LL_LOWER_BOUND (loop
);
682 LL_LOWER_BOUND (loop
) = expression
;
686 else if (A
[j
][i
] > 0)
689 expression
= lambda_linear_expression_new (depth
, invariants
);
690 for (k
= 0; k
< i
; k
++)
691 LLE_COEFFICIENTS (expression
)[k
] = -1 * A
[j
][k
];
692 LLE_CONSTANT (expression
) = a
[j
];
694 for (k
= 0; k
< invariants
; k
++)
695 LLE_INVARIANT_COEFFICIENTS (expression
)[k
] = B
[j
][k
];
697 LLE_DENOMINATOR (expression
) = A
[j
][i
];
698 /* Ignore if identical to the existing upper bound. */
699 if (!lle_equal (LL_UPPER_BOUND (loop
),
700 expression
, depth
, invariants
))
702 LLE_NEXT (expression
) = LL_UPPER_BOUND (loop
);
703 LL_UPPER_BOUND (loop
) = expression
;
708 /* creates a new system by deleting the i'th variable. */
710 for (j
= 0; j
< size
; j
++)
714 lambda_vector_copy (A
[j
], A1
[newsize
], depth
);
715 lambda_vector_copy (B
[j
], B1
[newsize
], invariants
);
719 else if (A
[j
][i
] > 0)
721 for (k
= 0; k
< size
; k
++)
725 multiple
= lcm (A
[j
][i
], A
[k
][i
]);
726 f1
= multiple
/ A
[j
][i
];
727 f2
= -1 * multiple
/ A
[k
][i
];
729 lambda_vector_add_mc (A
[j
], f1
, A
[k
], f2
,
731 lambda_vector_add_mc (B
[j
], f1
, B
[k
], f2
,
732 B1
[newsize
], invariants
);
733 a1
[newsize
] = f1
* a
[j
] + f2
* a
[k
];
755 return auxillary_nest
;
758 /* Compute the loop bounds for the target space, using the bounds of
759 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. This is
760 done by matrix multiplication and then transformation of the new matrix
761 back into linear expression form.
762 Return the target loopnest. */
764 static lambda_loopnest
765 lambda_compute_target_space (lambda_loopnest auxillary_nest
,
766 lambda_trans_matrix H
, lambda_vector stepsigns
)
768 lambda_matrix inverse
, H1
;
769 int determinant
, i
, j
;
773 lambda_loopnest target_nest
;
774 int depth
, invariants
;
775 lambda_matrix target
;
777 lambda_loop auxillary_loop
, target_loop
;
778 lambda_linear_expression expression
, auxillary_expr
, target_expr
, tmp_expr
;
780 depth
= LN_DEPTH (auxillary_nest
);
781 invariants
= LN_INVARIANTS (auxillary_nest
);
783 inverse
= lambda_matrix_new (depth
, depth
);
784 determinant
= lambda_matrix_inverse (LTM_MATRIX (H
), inverse
, depth
);
786 /* H1 is H excluding its diagonal. */
787 H1
= lambda_matrix_new (depth
, depth
);
788 lambda_matrix_copy (LTM_MATRIX (H
), H1
, depth
, depth
);
790 for (i
= 0; i
< depth
; i
++)
793 /* Computes the linear offsets of the loop bounds. */
794 target
= lambda_matrix_new (depth
, depth
);
795 lambda_matrix_mult (H1
, inverse
, target
, depth
, depth
, depth
);
797 target_nest
= lambda_loopnest_new (depth
, invariants
);
799 for (i
= 0; i
< depth
; i
++)
802 /* Get a new loop structure. */
803 target_loop
= lambda_loop_new ();
804 LN_LOOPS (target_nest
)[i
] = target_loop
;
806 /* Computes the gcd of the coefficients of the linear part. */
807 gcd1
= gcd_vector (target
[i
], i
);
809 /* Include the denominator in the GCD. */
810 gcd1
= gcd (gcd1
, determinant
);
812 /* Now divide through by the gcd. */
813 for (j
= 0; j
< i
; j
++)
814 target
[i
][j
] = target
[i
][j
] / gcd1
;
816 expression
= lambda_linear_expression_new (depth
, invariants
);
817 lambda_vector_copy (target
[i
], LLE_COEFFICIENTS (expression
), depth
);
818 LLE_DENOMINATOR (expression
) = determinant
/ gcd1
;
819 LLE_CONSTANT (expression
) = 0;
820 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression
),
822 LL_LINEAR_OFFSET (target_loop
) = expression
;
825 /* For each loop, compute the new bounds from H. */
826 for (i
= 0; i
< depth
; i
++)
828 auxillary_loop
= LN_LOOPS (auxillary_nest
)[i
];
829 target_loop
= LN_LOOPS (target_nest
)[i
];
830 LL_STEP (target_loop
) = LTM_MATRIX (H
)[i
][i
];
831 factor
= LTM_MATRIX (H
)[i
][i
];
833 /* First we do the lower bound. */
834 auxillary_expr
= LL_LOWER_BOUND (auxillary_loop
);
836 for (; auxillary_expr
!= NULL
;
837 auxillary_expr
= LLE_NEXT (auxillary_expr
))
839 target_expr
= lambda_linear_expression_new (depth
, invariants
);
840 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
841 depth
, inverse
, depth
,
842 LLE_COEFFICIENTS (target_expr
));
843 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
844 LLE_COEFFICIENTS (target_expr
), depth
,
847 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
848 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
849 LLE_INVARIANT_COEFFICIENTS (target_expr
),
851 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
852 LLE_INVARIANT_COEFFICIENTS (target_expr
),
854 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
856 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
858 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
860 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
862 LLE_INVARIANT_COEFFICIENTS
863 (target_expr
), invariants
,
865 LLE_DENOMINATOR (target_expr
) =
866 LLE_DENOMINATOR (target_expr
) * determinant
;
868 /* Find the gcd and divide by it here, rather than doing it
869 at the tree level. */
870 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
871 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
873 gcd1
= gcd (gcd1
, gcd2
);
874 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
875 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
876 for (j
= 0; j
< depth
; j
++)
877 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
878 for (j
= 0; j
< invariants
; j
++)
879 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
880 LLE_CONSTANT (target_expr
) /= gcd1
;
881 LLE_DENOMINATOR (target_expr
) /= gcd1
;
882 /* Ignore if identical to existing bound. */
883 if (!lle_equal (LL_LOWER_BOUND (target_loop
), target_expr
, depth
,
886 LLE_NEXT (target_expr
) = LL_LOWER_BOUND (target_loop
);
887 LL_LOWER_BOUND (target_loop
) = target_expr
;
890 /* Now do the upper bound. */
891 auxillary_expr
= LL_UPPER_BOUND (auxillary_loop
);
893 for (; auxillary_expr
!= NULL
;
894 auxillary_expr
= LLE_NEXT (auxillary_expr
))
896 target_expr
= lambda_linear_expression_new (depth
, invariants
);
897 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr
),
898 depth
, inverse
, depth
,
899 LLE_COEFFICIENTS (target_expr
));
900 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr
),
901 LLE_COEFFICIENTS (target_expr
), depth
,
903 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (auxillary_expr
) * factor
;
904 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr
),
905 LLE_INVARIANT_COEFFICIENTS (target_expr
),
907 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr
),
908 LLE_INVARIANT_COEFFICIENTS (target_expr
),
910 LLE_DENOMINATOR (target_expr
) = LLE_DENOMINATOR (auxillary_expr
);
912 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr
), depth
))
914 LLE_CONSTANT (target_expr
) = LLE_CONSTANT (target_expr
)
916 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
918 LLE_INVARIANT_COEFFICIENTS
919 (target_expr
), invariants
,
921 LLE_DENOMINATOR (target_expr
) =
922 LLE_DENOMINATOR (target_expr
) * determinant
;
924 /* Find the gcd and divide by it here, instead of at the
926 gcd1
= gcd_vector (LLE_COEFFICIENTS (target_expr
), depth
);
927 gcd2
= gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr
),
929 gcd1
= gcd (gcd1
, gcd2
);
930 gcd1
= gcd (gcd1
, LLE_CONSTANT (target_expr
));
931 gcd1
= gcd (gcd1
, LLE_DENOMINATOR (target_expr
));
932 for (j
= 0; j
< depth
; j
++)
933 LLE_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
934 for (j
= 0; j
< invariants
; j
++)
935 LLE_INVARIANT_COEFFICIENTS (target_expr
)[j
] /= gcd1
;
936 LLE_CONSTANT (target_expr
) /= gcd1
;
937 LLE_DENOMINATOR (target_expr
) /= gcd1
;
938 /* Ignore if equal to existing bound. */
939 if (!lle_equal (LL_UPPER_BOUND (target_loop
), target_expr
, depth
,
942 LLE_NEXT (target_expr
) = LL_UPPER_BOUND (target_loop
);
943 LL_UPPER_BOUND (target_loop
) = target_expr
;
947 for (i
= 0; i
< depth
; i
++)
949 target_loop
= LN_LOOPS (target_nest
)[i
];
950 /* If necessary, exchange the upper and lower bounds and negate
952 if (stepsigns
[i
] < 0)
954 LL_STEP (target_loop
) *= -1;
955 tmp_expr
= LL_LOWER_BOUND (target_loop
);
956 LL_LOWER_BOUND (target_loop
) = LL_UPPER_BOUND (target_loop
);
957 LL_UPPER_BOUND (target_loop
) = tmp_expr
;
963 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new
967 lambda_compute_step_signs (lambda_trans_matrix trans
, lambda_vector stepsigns
)
969 lambda_matrix matrix
, H
;
971 lambda_vector newsteps
;
972 int i
, j
, factor
, minimum_column
;
975 matrix
= LTM_MATRIX (trans
);
976 size
= LTM_ROWSIZE (trans
);
977 H
= lambda_matrix_new (size
, size
);
979 newsteps
= lambda_vector_new (size
);
980 lambda_vector_copy (stepsigns
, newsteps
, size
);
982 lambda_matrix_copy (matrix
, H
, size
, size
);
984 for (j
= 0; j
< size
; j
++)
988 for (i
= j
; i
< size
; i
++)
990 lambda_matrix_col_negate (H
, size
, i
);
991 while (lambda_vector_first_nz (row
, size
, j
+ 1) < size
)
993 minimum_column
= lambda_vector_min_nz (row
, size
, j
);
994 lambda_matrix_col_exchange (H
, size
, j
, minimum_column
);
997 newsteps
[j
] = newsteps
[minimum_column
];
998 newsteps
[minimum_column
] = temp
;
1000 for (i
= j
+ 1; i
< size
; i
++)
1002 factor
= row
[i
] / row
[j
];
1003 lambda_matrix_col_add (H
, size
, j
, i
, -1 * factor
);
1010 /* Transform NEST according to TRANS, and return the new loopnest.
1012 1. Computing a lattice base for the transformation
1013 2. Composing the dense base with the specified transformation (TRANS)
1014 3. Decomposing the combined transformation into a lower triangular portion,
1015 and a unimodular portion.
1016 4. Computing the auxillary nest using the unimodular portion.
1017 5. Computing the target nest using the auxillary nest and the lower
1018 triangular portion. */
1021 lambda_loopnest_transform (lambda_loopnest nest
, lambda_trans_matrix trans
)
1023 lambda_loopnest auxillary_nest
, target_nest
;
1025 int depth
, invariants
;
1027 lambda_lattice lattice
;
1028 lambda_trans_matrix trans1
, H
, U
;
1030 lambda_linear_expression expression
;
1031 lambda_vector origin
;
1032 lambda_matrix origin_invariants
;
1033 lambda_vector stepsigns
;
1036 depth
= LN_DEPTH (nest
);
1037 invariants
= LN_INVARIANTS (nest
);
1039 /* Keep track of the signs of the loop steps. */
1040 stepsigns
= lambda_vector_new (depth
);
1041 for (i
= 0; i
< depth
; i
++)
1043 if (LL_STEP (LN_LOOPS (nest
)[i
]) > 0)
1049 /* Compute the lattice base. */
1050 lattice
= lambda_lattice_compute_base (nest
);
1051 trans1
= lambda_trans_matrix_new (depth
, depth
);
1053 /* Multiply the transformation matrix by the lattice base. */
1055 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_BASE (lattice
),
1056 LTM_MATRIX (trans1
), depth
, depth
, depth
);
1058 /* Compute the Hermite normal form for the new transformation matrix. */
1059 H
= lambda_trans_matrix_new (depth
, depth
);
1060 U
= lambda_trans_matrix_new (depth
, depth
);
1061 lambda_matrix_hermite (LTM_MATRIX (trans1
), depth
, LTM_MATRIX (H
),
1064 /* Compute the auxiliary loop nest's space from the unimodular
1066 auxillary_nest
= lambda_compute_auxillary_space (nest
, U
);
1068 /* Compute the loop step signs from the old step signs and the
1069 transformation matrix. */
1070 stepsigns
= lambda_compute_step_signs (trans1
, stepsigns
);
1072 /* Compute the target loop nest space from the auxiliary nest and
1073 the lower triangular matrix H. */
1074 target_nest
= lambda_compute_target_space (auxillary_nest
, H
, stepsigns
);
1075 origin
= lambda_vector_new (depth
);
1076 origin_invariants
= lambda_matrix_new (depth
, invariants
);
1077 lambda_matrix_vector_mult (LTM_MATRIX (trans
), depth
, depth
,
1078 LATTICE_ORIGIN (lattice
), origin
);
1079 lambda_matrix_mult (LTM_MATRIX (trans
), LATTICE_ORIGIN_INVARIANTS (lattice
),
1080 origin_invariants
, depth
, depth
, invariants
);
1082 for (i
= 0; i
< depth
; i
++)
1084 loop
= LN_LOOPS (target_nest
)[i
];
1085 expression
= LL_LINEAR_OFFSET (loop
);
1086 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression
), depth
))
1089 f
= LLE_DENOMINATOR (expression
);
1091 LLE_CONSTANT (expression
) += f
* origin
[i
];
1093 for (j
= 0; j
< invariants
; j
++)
1094 LLE_INVARIANT_COEFFICIENTS (expression
)[j
] +=
1095 f
* origin_invariants
[i
][j
];
1102 /* Convert a gcc tree expression EXPR to a lambda linear expression, and
1103 return the new expression. DEPTH is the depth of the loopnest.
1104 OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1105 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA
1106 is the amount we have to add/subtract from the expression because of the
1107 type of comparison it is used in. */
1109 static lambda_linear_expression
1110 gcc_tree_to_linear_expression (int depth
, tree expr
,
1111 VEC(tree
) *outerinductionvars
,
1112 VEC(tree
) *invariants
, int extra
)
1114 lambda_linear_expression lle
= NULL
;
1115 switch (TREE_CODE (expr
))
1119 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1120 LLE_CONSTANT (lle
) = TREE_INT_CST_LOW (expr
);
1122 LLE_CONSTANT (lle
) = extra
;
1124 LLE_DENOMINATOR (lle
) = 1;
1131 for (i
= 0; VEC_iterate (tree
, outerinductionvars
, i
, iv
); i
++)
1134 if (SSA_NAME_VAR (iv
) == SSA_NAME_VAR (expr
))
1136 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1137 LLE_COEFFICIENTS (lle
)[i
] = 1;
1139 LLE_CONSTANT (lle
) = extra
;
1141 LLE_DENOMINATOR (lle
) = 1;
1144 for (i
= 0; VEC_iterate (tree
, invariants
, i
, invar
); i
++)
1147 if (SSA_NAME_VAR (invar
) == SSA_NAME_VAR (expr
))
1149 lle
= lambda_linear_expression_new (depth
, 2 * depth
);
1150 LLE_INVARIANT_COEFFICIENTS (lle
)[i
] = 1;
1152 LLE_CONSTANT (lle
) = extra
;
1153 LLE_DENOMINATOR (lle
) = 1;
1165 /* Return true if OP is invariant in LOOP and all outer loops. */
1168 invariant_in_loop (struct loop
*loop
, tree op
)
1170 if (is_gimple_min_invariant (op
))
1172 if (loop
->depth
== 0)
1174 if (TREE_CODE (op
) == SSA_NAME
)
1177 def
= SSA_NAME_DEF_STMT (op
);
1178 if (TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
1179 && IS_EMPTY_STMT (def
))
1181 if (IS_EMPTY_STMT (def
))
1184 && !invariant_in_loop (loop
->outer
, op
))
1186 return !flow_bb_inside_loop_p (loop
, bb_for_stmt (def
));
1191 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop,
1192 or NULL if it could not be converted.
1193 DEPTH is the depth of the loop.
1194 INVARIANTS is a pointer to the array of loop invariants.
1195 The induction variable for this loop should be stored in the parameter
1197 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */
1200 gcc_loop_to_lambda_loop (struct loop
*loop
, int depth
,
1201 VEC (tree
) ** invariants
,
1202 tree
* ourinductionvar
,
1203 VEC (tree
) * outerinductionvars
,
1204 VEC (tree
) ** lboundvars
,
1205 VEC (tree
) ** uboundvars
,
1210 tree access_fn
, inductionvar
;
1212 lambda_loop lloop
= NULL
;
1213 lambda_linear_expression lbound
, ubound
;
1217 tree lboundvar
, uboundvar
;
1220 /* Find out induction var and exit condition. */
1221 inductionvar
= find_induction_var_from_exit_cond (loop
);
1222 exit_cond
= get_loop_exit_condition (loop
);
1224 if (inductionvar
== NULL
|| exit_cond
== NULL
)
1226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1228 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1232 test
= TREE_OPERAND (exit_cond
, 0);
1234 if (SSA_NAME_DEF_STMT (inductionvar
) == NULL_TREE
)
1237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1239 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1244 phi
= SSA_NAME_DEF_STMT (inductionvar
);
1245 if (TREE_CODE (phi
) != PHI_NODE
)
1247 get_stmt_operands (phi
);
1248 uses
= STMT_USE_OPS (phi
);
1253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1255 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1260 phi
= USE_OP (uses
, 0);
1261 phi
= SSA_NAME_DEF_STMT (phi
);
1262 if (TREE_CODE (phi
) != PHI_NODE
)
1265 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1267 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1272 /* The induction variable name/version we want to put in the array is the
1273 result of the induction variable phi node. */
1274 *ourinductionvar
= PHI_RESULT (phi
);
1275 access_fn
= instantiate_parameters
1276 (loop
, analyze_scalar_evolution (loop
, PHI_RESULT (phi
)));
1279 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1281 "Unable to convert loop: Access function for induction variable phi is NULL\n");
1286 step
= evolution_part_in_loop_num (access_fn
, loop
->num
);
1287 if (!step
|| step
== chrec_dont_know
)
1289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1291 "Unable to convert loop: Cannot determine step of loop.\n");
1295 if (TREE_CODE (step
) != INTEGER_CST
)
1298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1300 "Unable to convert loop: Step of loop is not integer.\n");
1304 stepint
= TREE_INT_CST_LOW (step
);
1306 /* Only want phis for induction vars, which will have two
1308 if (PHI_NUM_ARGS (phi
) != 2)
1310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1312 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1316 /* Another induction variable check. One argument's source should be
1317 in the loop, one outside the loop. */
1318 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
)
1319 && flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 1)->src
))
1322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1324 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1329 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, 0)->src
))
1331 lboundvar
= PHI_ARG_DEF (phi
, 1);
1332 lbound
= gcc_tree_to_linear_expression (depth
, lboundvar
,
1333 outerinductionvars
, *invariants
,
1338 lboundvar
= PHI_ARG_DEF (phi
, 0);
1339 lbound
= gcc_tree_to_linear_expression (depth
, lboundvar
,
1340 outerinductionvars
, *invariants
,
1347 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1349 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1353 /* One part of the test may be a loop invariant tree. */
1354 if (TREE_CODE (TREE_OPERAND (test
, 1)) == SSA_NAME
1355 && invariant_in_loop (loop
, TREE_OPERAND (test
, 1)))
1356 VEC_safe_push (tree
, *invariants
, TREE_OPERAND (test
, 1));
1357 else if (TREE_CODE (TREE_OPERAND (test
, 0)) == SSA_NAME
1358 && invariant_in_loop (loop
, TREE_OPERAND (test
, 0)))
1359 VEC_safe_push (tree
, *invariants
, TREE_OPERAND (test
, 0));
1361 /* The non-induction variable part of the test is the upper bound variable.
1363 if (TREE_OPERAND (test
, 0) == inductionvar
)
1364 uboundvar
= TREE_OPERAND (test
, 1);
1366 uboundvar
= TREE_OPERAND (test
, 0);
1369 /* We only size the vectors assuming we have, at max, 2 times as many
1370 invariants as we do loops (one for each bound).
1371 This is just an arbitrary number, but it has to be matched against the
1373 gcc_assert (VEC_length (tree
, *invariants
) <= (unsigned int) (2 * depth
));
1376 /* We might have some leftover. */
1377 if (TREE_CODE (test
) == LT_EXPR
)
1378 extra
= -1 * stepint
;
1379 else if (TREE_CODE (test
) == NE_EXPR
)
1380 extra
= -1 * stepint
;
1381 else if (TREE_CODE (test
) == GT_EXPR
)
1382 extra
= -1 * stepint
;
1384 ubound
= gcc_tree_to_linear_expression (depth
,
1387 *invariants
, extra
);
1388 VEC_safe_push (tree
, *uboundvars
, build (PLUS_EXPR
, integer_type_node
,
1390 build_int_cst (integer_type_node
, extra
)));
1391 VEC_safe_push (tree
, *lboundvars
, lboundvar
);
1392 VEC_safe_push (int, *steps
, stepint
);
1396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1398 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1402 lloop
= lambda_loop_new ();
1403 LL_STEP (lloop
) = stepint
;
1404 LL_LOWER_BOUND (lloop
) = lbound
;
1405 LL_UPPER_BOUND (lloop
) = ubound
;
1409 /* Given a LOOP, find the induction variable it is testing against in the exit
1410 condition. Return the induction variable if found, NULL otherwise. */
1413 find_induction_var_from_exit_cond (struct loop
*loop
)
1415 tree expr
= get_loop_exit_condition (loop
);
1418 if (expr
== NULL_TREE
)
1420 if (TREE_CODE (expr
) != COND_EXPR
)
1422 test
= TREE_OPERAND (expr
, 0);
1423 if (!COMPARISON_CLASS_P (test
))
1425 /* This is a guess. We say that for a <,!=,<= b, a is the induction
1427 For >, >=, we guess b is the induction variable.
1428 If we are wrong, it'll fail the rest of the induction variable tests, and
1429 everything will be fine anyway. */
1430 switch (TREE_CODE (test
))
1435 ivarop
= TREE_OPERAND (test
, 0);
1439 ivarop
= TREE_OPERAND (test
, 1);
1444 if (TREE_CODE (ivarop
) != SSA_NAME
)
1449 DEF_VEC_GC_P(lambda_loop
);
1450 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1451 Return the new loop nest.
1452 INDUCTIONVARS is a pointer to an array of induction variables for the
1453 loopnest that will be filled in during this process.
1454 INVARIANTS is a pointer to an array of invariants that will be filled in
1455 during this process. */
1458 gcc_loopnest_to_lambda_loopnest (struct loops
*currloops
,
1459 struct loop
* loop_nest
,
1460 VEC (tree
) **inductionvars
,
1461 VEC (tree
) **invariants
,
1462 bool need_perfect_nest
)
1464 lambda_loopnest ret
;
1468 VEC (lambda_loop
) *loops
;
1469 VEC (tree
) *uboundvars
;
1470 VEC (tree
) *lboundvars
;
1472 lambda_loop newloop
;
1473 tree inductionvar
= NULL
;
1481 loops
= VEC_alloc (lambda_loop
, 1);
1482 *inductionvars
= VEC_alloc (tree
, 1);
1483 *invariants
= VEC_alloc (tree
, 1);
1484 lboundvars
= VEC_alloc (tree
, 1);
1485 uboundvars
= VEC_alloc (tree
, 1);
1486 steps
= VEC_alloc (int, 1);
1490 newloop
= gcc_loop_to_lambda_loop (temp
, depth
, invariants
,
1491 &inductionvar
, *inductionvars
,
1492 &lboundvars
, &uboundvars
,
1496 VEC_safe_push (tree
, *inductionvars
, inductionvar
);
1497 VEC_safe_push (lambda_loop
, loops
, newloop
);
1500 if (need_perfect_nest
1501 && !perfect_nestify (currloops
, loop_nest
,
1502 lboundvars
, uboundvars
, steps
, *inductionvars
))
1505 fprintf (dump_file
, "Not a perfect nest and couldn't convert to one.\n");
1508 ret
= lambda_loopnest_new (depth
, 2 * depth
);
1509 for (i
= 0; VEC_iterate (lambda_loop
, loops
, i
, newloop
); i
++)
1510 LN_LOOPS (ret
)[i
] = newloop
;
1516 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1517 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1518 inserted for us are stored. INDUCTION_VARS is the array of induction
1519 variables for the loop this LBV is from. */
1522 lbv_to_gcc_expression (lambda_body_vector lbv
,
1523 VEC (tree
) *induction_vars
, tree
* stmts_to_insert
)
1525 tree stmts
, stmt
, resvar
, name
;
1527 tree_stmt_iterator tsi
;
1529 /* Create a statement list and a linear expression temporary. */
1530 stmts
= alloc_stmt_list ();
1531 resvar
= create_tmp_var (integer_type_node
, "lbvtmp");
1532 add_referenced_tmp_var (resvar
);
1535 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1536 name
= make_ssa_name (resvar
, stmt
);
1537 TREE_OPERAND (stmt
, 0) = name
;
1538 tsi
= tsi_last (stmts
);
1539 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1541 for (i
= 0; i
< VEC_length (tree
,induction_vars
) ; i
++)
1543 if (LBV_COEFFICIENTS (lbv
)[i
] != 0)
1547 /* newname = coefficient * induction_variable */
1548 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1549 fold (build (MULT_EXPR
, integer_type_node
,
1550 VEC_index (tree
, induction_vars
, i
),
1551 build_int_cst (integer_type_node
,
1552 LBV_COEFFICIENTS (lbv
)[i
]))));
1553 newname
= make_ssa_name (resvar
, stmt
);
1554 TREE_OPERAND (stmt
, 0) = newname
;
1555 tsi
= tsi_last (stmts
);
1556 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1557 /* name = name + newname */
1558 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1559 build (PLUS_EXPR
, integer_type_node
, name
, newname
));
1560 name
= make_ssa_name (resvar
, stmt
);
1561 TREE_OPERAND (stmt
, 0) = name
;
1562 tsi
= tsi_last (stmts
);
1563 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1567 /* Handle any denominator that occurs. */
1568 if (LBV_DENOMINATOR (lbv
) != 1)
1570 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1571 build (CEIL_DIV_EXPR
, integer_type_node
,
1572 name
, build_int_cst (integer_type_node
,
1573 LBV_DENOMINATOR (lbv
))));
1574 name
= make_ssa_name (resvar
, stmt
);
1575 TREE_OPERAND (stmt
, 0) = name
;
1576 tsi
= tsi_last (stmts
);
1577 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1579 *stmts_to_insert
= stmts
;
1583 /* Convert a linear expression from coefficient and constant form to a
1585 Return the tree that represents the final value of the expression.
1586 LLE is the linear expression to convert.
1587 OFFSET is the linear offset to apply to the expression.
1588 INDUCTION_VARS is a vector of induction variables for the loops.
1589 INVARIANTS is a vector of the loop nest invariants.
1590 WRAP specifies what tree code to wrap the results in, if there is more than
1591 one (it is either MAX_EXPR, or MIN_EXPR).
1592 STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1593 statements that need to be inserted for the linear expression. */
1596 lle_to_gcc_expression (lambda_linear_expression lle
,
1597 lambda_linear_expression offset
,
1598 VEC(tree
) *induction_vars
,
1599 VEC(tree
) *invariants
,
1600 enum tree_code wrap
, tree
* stmts_to_insert
)
1602 tree stmts
, stmt
, resvar
, name
;
1604 tree_stmt_iterator tsi
;
1608 /* Create a statement list and a linear expression temporary. */
1609 stmts
= alloc_stmt_list ();
1610 resvar
= create_tmp_var (integer_type_node
, "lletmp");
1611 add_referenced_tmp_var (resvar
);
1612 results
= VEC_alloc (tree
, 1);
1614 /* Build up the linear expressions, and put the variable representing the
1615 result in the results array. */
1616 for (; lle
!= NULL
; lle
= LLE_NEXT (lle
))
1618 /* Start at name = 0. */
1619 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, integer_zero_node
);
1620 name
= make_ssa_name (resvar
, stmt
);
1621 TREE_OPERAND (stmt
, 0) = name
;
1622 tsi
= tsi_last (stmts
);
1623 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1625 /* First do the induction variables.
1626 at the end, name = name + all the induction variables added
1628 for (i
= 0; i
< VEC_length (tree
,induction_vars
); i
++)
1630 if (LLE_COEFFICIENTS (lle
)[i
] != 0)
1636 /* mult = induction variable * coefficient. */
1637 if (LLE_COEFFICIENTS (lle
)[i
] == 1)
1639 mult
= VEC_index (tree
, induction_vars
, i
);
1643 coeff
= build_int_cst (integer_type_node
,
1644 LLE_COEFFICIENTS (lle
)[i
]);
1645 mult
= fold (build (MULT_EXPR
, integer_type_node
,
1646 VEC_index (tree
, induction_vars
, i
),
1650 /* newname = mult */
1651 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1652 newname
= make_ssa_name (resvar
, stmt
);
1653 TREE_OPERAND (stmt
, 0) = newname
;
1654 tsi
= tsi_last (stmts
);
1655 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1657 /* name = name + newname */
1658 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1659 build (PLUS_EXPR
, integer_type_node
,
1661 name
= make_ssa_name (resvar
, stmt
);
1662 TREE_OPERAND (stmt
, 0) = name
;
1663 tsi
= tsi_last (stmts
);
1664 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1668 /* Handle our invariants.
1669 At the end, we have name = name + result of adding all multiplied
1671 for (i
= 0; i
< VEC_length (tree
, invariants
); i
++)
1673 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] != 0)
1679 /* mult = invariant * coefficient */
1680 if (LLE_INVARIANT_COEFFICIENTS (lle
)[i
] == 1)
1682 mult
= VEC_index (tree
, invariants
, i
);
1686 coeff
= build_int_cst (integer_type_node
,
1687 LLE_INVARIANT_COEFFICIENTS (lle
)[i
]);
1688 mult
= fold (build (MULT_EXPR
, integer_type_node
,
1689 VEC_index (tree
, invariants
, i
),
1693 /* newname = mult */
1694 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
, mult
);
1695 newname
= make_ssa_name (resvar
, stmt
);
1696 TREE_OPERAND (stmt
, 0) = newname
;
1697 tsi
= tsi_last (stmts
);
1698 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1700 /* name = name + newname */
1701 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1702 build (PLUS_EXPR
, integer_type_node
,
1704 name
= make_ssa_name (resvar
, stmt
);
1705 TREE_OPERAND (stmt
, 0) = name
;
1706 tsi
= tsi_last (stmts
);
1707 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1711 /* Now handle the constant.
1712 name = name + constant. */
1713 if (LLE_CONSTANT (lle
) != 0)
1715 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1716 build (PLUS_EXPR
, integer_type_node
,
1717 name
, build_int_cst (integer_type_node
,
1718 LLE_CONSTANT (lle
))));
1719 name
= make_ssa_name (resvar
, stmt
);
1720 TREE_OPERAND (stmt
, 0) = name
;
1721 tsi
= tsi_last (stmts
);
1722 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1725 /* Now handle the offset.
1726 name = name + linear offset. */
1727 if (LLE_CONSTANT (offset
) != 0)
1729 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1730 build (PLUS_EXPR
, integer_type_node
,
1731 name
, build_int_cst (integer_type_node
,
1732 LLE_CONSTANT (offset
))));
1733 name
= make_ssa_name (resvar
, stmt
);
1734 TREE_OPERAND (stmt
, 0) = name
;
1735 tsi
= tsi_last (stmts
);
1736 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1739 /* Handle any denominator that occurs. */
1740 if (LLE_DENOMINATOR (lle
) != 1)
1742 if (wrap
== MAX_EXPR
)
1743 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1744 build (CEIL_DIV_EXPR
, integer_type_node
,
1745 name
, build_int_cst (integer_type_node
,
1746 LLE_DENOMINATOR (lle
))));
1747 else if (wrap
== MIN_EXPR
)
1748 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1749 build (FLOOR_DIV_EXPR
, integer_type_node
,
1750 name
, build_int_cst (integer_type_node
,
1751 LLE_DENOMINATOR (lle
))));
1755 /* name = {ceil, floor}(name/denominator) */
1756 name
= make_ssa_name (resvar
, stmt
);
1757 TREE_OPERAND (stmt
, 0) = name
;
1758 tsi
= tsi_last (stmts
);
1759 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1761 VEC_safe_push (tree
, results
, name
);
1764 /* Again, out of laziness, we don't handle this case yet. It's not
1765 hard, it just hasn't occurred. */
1766 gcc_assert (VEC_length (tree
, results
) <= 2);
1768 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
1769 if (VEC_length (tree
, results
) > 1)
1771 tree op1
= VEC_index (tree
, results
, 0);
1772 tree op2
= VEC_index (tree
, results
, 1);
1773 stmt
= build (MODIFY_EXPR
, void_type_node
, resvar
,
1774 build (wrap
, integer_type_node
, op1
, op2
));
1775 name
= make_ssa_name (resvar
, stmt
);
1776 TREE_OPERAND (stmt
, 0) = name
;
1777 tsi
= tsi_last (stmts
);
1778 tsi_link_after (&tsi
, stmt
, TSI_CONTINUE_LINKING
);
1781 *stmts_to_insert
= stmts
;
1785 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1786 it, back into gcc code. This changes the
1787 loops, their induction variables, and their bodies, so that they
1788 match the transformed loopnest.
1789 OLD_LOOPNEST is the loopnest before we've replaced it with the new
1791 OLD_IVS is a vector of induction variables from the old loopnest.
1792 INVARIANTS is a vector of loop invariants from the old loopnest.
1793 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1794 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1797 lambda_loopnest_to_gcc_loopnest (struct loop
*old_loopnest
,
1799 VEC(tree
) *invariants
,
1800 lambda_loopnest new_loopnest
,
1801 lambda_trans_matrix transform
)
1808 block_stmt_iterator bsi
;
1812 transform
= lambda_trans_matrix_inverse (transform
);
1813 fprintf (dump_file
, "Inverse of transformation matrix:\n");
1814 print_lambda_trans_matrix (dump_file
, transform
);
1816 temp
= old_loopnest
;
1817 new_ivs
= VEC_alloc (tree
, 1);
1823 temp
= old_loopnest
;
1827 lambda_loop newloop
;
1829 tree ivvar
, ivvarinced
, exitcond
, stmts
;
1830 enum tree_code testtype
;
1831 tree newupperbound
, newlowerbound
;
1832 lambda_linear_expression offset
;
1833 /* First, build the new induction variable temporary */
1835 ivvar
= create_tmp_var (integer_type_node
, "lnivtmp");
1836 add_referenced_tmp_var (ivvar
);
1838 VEC_safe_push (tree
, new_ivs
, ivvar
);
1840 newloop
= LN_LOOPS (new_loopnest
)[i
];
1842 /* Linear offset is a bit tricky to handle. Punt on the unhandled
1844 offset
= LL_LINEAR_OFFSET (newloop
);
1846 gcc_assert (LLE_DENOMINATOR (offset
) == 1 &&
1847 lambda_vector_zerop (LLE_COEFFICIENTS (offset
), depth
));
1849 /* Now build the new lower bounds, and insert the statements
1850 necessary to generate it on the loop preheader. */
1851 newlowerbound
= lle_to_gcc_expression (LL_LOWER_BOUND (newloop
),
1852 LL_LINEAR_OFFSET (newloop
),
1854 invariants
, MAX_EXPR
, &stmts
);
1855 bsi_insert_on_edge (loop_preheader_edge (temp
), stmts
);
1856 bsi_commit_edge_inserts (NULL
);
1857 /* Build the new upper bound and insert its statements in the
1858 basic block of the exit condition */
1859 newupperbound
= lle_to_gcc_expression (LL_UPPER_BOUND (newloop
),
1860 LL_LINEAR_OFFSET (newloop
),
1862 invariants
, MIN_EXPR
, &stmts
);
1863 exitcond
= get_loop_exit_condition (temp
);
1864 bb
= bb_for_stmt (exitcond
);
1865 bsi
= bsi_start (bb
);
1866 bsi_insert_after (&bsi
, stmts
, BSI_NEW_STMT
);
1868 /* Create the new iv, and insert it's increment on the latch
1871 bb
= EDGE_PRED (temp
->latch
, 0)->src
;
1872 bsi
= bsi_last (bb
);
1873 create_iv (newlowerbound
,
1874 build_int_cst (integer_type_node
, LL_STEP (newloop
)),
1875 ivvar
, temp
, &bsi
, false, &ivvar
,
1878 /* Replace the exit condition with the new upper bound
1880 testtype
= LL_STEP (newloop
) >= 0 ? LE_EXPR
: GE_EXPR
;
1881 COND_EXPR_COND (exitcond
) = build (testtype
,
1883 ivvarinced
, newupperbound
);
1884 modify_stmt (exitcond
);
1885 VEC_replace (tree
, new_ivs
, i
, ivvar
);
1891 /* Rewrite uses of the old ivs so that they are now specified in terms of
1893 temp
= old_loopnest
;
1894 for (i
= 0; i
< VEC_length (tree
, old_ivs
); i
++)
1897 tree oldiv
= VEC_index (tree
, old_ivs
, i
);
1898 dataflow_t imm
= get_immediate_uses (SSA_NAME_DEF_STMT (oldiv
));
1899 for (j
= 0; j
< num_immediate_uses (imm
); j
++)
1902 tree stmt
= immediate_use (imm
, j
);
1904 get_stmt_operands (stmt
);
1905 uses
= STMT_USE_OPS (stmt
);
1906 for (k
= 0; k
< NUM_USES (uses
); k
++)
1908 use_operand_p use
= USE_OP_PTR (uses
, k
);
1909 if (USE_FROM_PTR (use
) == oldiv
)
1912 lambda_body_vector lbv
;
1913 /* Compute the new expression for the induction
1915 depth
= VEC_length (tree
, new_ivs
);
1916 lbv
= lambda_body_vector_new (depth
);
1917 LBV_COEFFICIENTS (lbv
)[i
] = 1;
1918 lbv
= lambda_body_vector_compute_new (transform
, lbv
);
1919 newiv
= lbv_to_gcc_expression (lbv
, new_ivs
, &stmts
);
1920 bsi
= stmt_for_bsi (stmt
);
1921 /* Insert the statements to build that
1923 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
1924 SET_USE (use
, newiv
);
1934 /* Returns true when the vector V is lexicographically positive, in
1935 other words, when the first nonzero element is positive. */
1938 lambda_vector_lexico_pos (lambda_vector v
,
1942 for (i
= 0; i
< n
; i
++)
1955 /* Return TRUE if this is not interesting statement from the perspective of
1956 determining if we have a perfect loop nest. */
1959 not_interesting_stmt (tree stmt
)
1961 /* Note that COND_EXPR's aren't interesting because if they were exiting the
1962 loop, we would have already failed the number of exits tests. */
1963 if (TREE_CODE (stmt
) == LABEL_EXPR
1964 || TREE_CODE (stmt
) == GOTO_EXPR
1965 || TREE_CODE (stmt
) == COND_EXPR
)
1970 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */
1973 phi_loop_edge_uses_def (struct loop
*loop
, tree phi
, tree def
)
1976 for (i
= 0; i
< PHI_NUM_ARGS (phi
); i
++)
1977 if (flow_bb_inside_loop_p (loop
, PHI_ARG_EDGE (phi
, i
)->src
))
1978 if (PHI_ARG_DEF (phi
, i
) == def
)
1983 /* Return TRUE if STMT is a use of PHI_RESULT. */
1986 stmt_uses_phi_result (tree stmt
, tree phi_result
)
1988 use_optype uses
= STMT_USE_OPS (stmt
);
1990 /* This is conservatively true, because we only want SIMPLE bumpers
1991 of the form x +- constant for our pass. */
1992 if (NUM_USES (uses
) != 1)
1994 if (USE_OP (uses
, 0) == phi_result
)
2000 /* STMT is a bumper stmt for LOOP if the version it defines is used in the
2001 in-loop-edge in a phi node, and the operand it uses is the result of that
2004 i_3 = PHI (0, i_29); */
2007 stmt_is_bumper_for_loop (struct loop
*loop
, tree stmt
)
2011 def_optype defs
= STMT_DEF_OPS (stmt
);
2015 if (NUM_DEFS (defs
) != 1)
2017 def
= DEF_OP (defs
, 0);
2018 imm
= get_immediate_uses (stmt
);
2019 for (i
= 0; i
< num_immediate_uses (imm
); i
++)
2021 use
= immediate_use (imm
, i
);
2022 if (TREE_CODE (use
) == PHI_NODE
)
2024 if (phi_loop_edge_uses_def (loop
, use
, def
))
2025 if (stmt_uses_phi_result (stmt
, PHI_RESULT (use
)))
2031 /* Return true if LOOP is a perfect loop nest.
2032 Perfect loop nests are those loop nests where all code occurs in the
2033 innermost loop body.
2034 If S is a program statement, then
2043 is not a perfect loop nest because of S1.
2051 is a perfect loop nest.
2053 Since we don't have high level loops anymore, we basically have to walk our
2054 statements and ignore those that are there because the loop needs them (IE
2055 the induction variable increment, and jump back to the top of the loop). */
2058 perfect_nest_p (struct loop
*loop
)
2066 bbs
= get_loop_body (loop
);
2067 exit_cond
= get_loop_exit_condition (loop
);
2068 for (i
= 0; i
< loop
->num_nodes
; i
++)
2070 if (bbs
[i
]->loop_father
== loop
)
2072 block_stmt_iterator bsi
;
2073 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
2075 tree stmt
= bsi_stmt (bsi
);
2076 if (stmt
== exit_cond
2077 || not_interesting_stmt (stmt
)
2078 || stmt_is_bumper_for_loop (loop
, stmt
))
2086 /* See if the inner loops are perfectly nested as well. */
2088 return perfect_nest_p (loop
->inner
);
2093 /* Add phi args using PENDINT_STMT list. */
2096 nestify_update_pending_stmts (edge e
)
2101 if (!PENDING_STMT (e
))
2106 for (phi
= phi_nodes (dest
), arg
= PENDING_STMT (e
);
2108 phi
= TREE_CHAIN (phi
), arg
= TREE_CHAIN (arg
))
2110 def
= TREE_VALUE (arg
);
2111 add_phi_arg (&phi
, def
, e
);
2114 PENDING_STMT (e
) = NULL
;
2117 /* Replace the USES of tree X in STMT with tree Y */
2120 replace_uses_of_x_with_y (tree stmt
, tree x
, tree y
)
2122 use_optype uses
= STMT_USE_OPS (stmt
);
2124 for (i
= 0; i
< NUM_USES (uses
); i
++)
2126 if (USE_OP (uses
, i
) == x
)
2127 SET_USE_OP (uses
, i
, y
);
2131 /* Return TRUE if STMT uses tree OP in it's uses. */
2134 stmt_uses_op (tree stmt
, tree op
)
2136 use_optype uses
= STMT_USE_OPS (stmt
);
2138 for (i
= 0; i
< NUM_USES (uses
); i
++)
2140 if (USE_OP (uses
, i
) == op
)
2146 /* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect
2147 one. LOOPIVS is a vector of induction variables, one per loop.
2148 ATM, we only handle imperfect nests of depth 2, where all of the statements
2149 occur after the inner loop. */
2152 can_convert_to_perfect_nest (struct loop
*loop
,
2153 VEC (tree
) *loopivs
)
2156 tree exit_condition
;
2158 block_stmt_iterator bsi
;
2160 /* Can't handle triply nested+ loops yet. */
2161 if (!loop
->inner
|| loop
->inner
->inner
)
2164 /* We only handle moving the after-inner-body statements right now, so make
2165 sure all the statements we need to move are located in that position. */
2166 bbs
= get_loop_body (loop
);
2167 exit_condition
= get_loop_exit_condition (loop
);
2168 for (i
= 0; i
< loop
->num_nodes
; i
++)
2170 if (bbs
[i
]->loop_father
== loop
)
2172 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
); bsi_next (&bsi
))
2175 tree stmt
= bsi_stmt (bsi
);
2176 if (stmt
== exit_condition
2177 || not_interesting_stmt (stmt
)
2178 || stmt_is_bumper_for_loop (loop
, stmt
))
2180 /* If the statement uses inner loop ivs, we == screwed. */
2181 for (j
= 1; j
< VEC_length (tree
, loopivs
); j
++)
2182 if (stmt_uses_op (stmt
, VEC_index (tree
, loopivs
, j
)))
2188 /* If the bb of a statement we care about isn't dominated by
2189 the header of the inner loop, then we are also screwed. */
2190 if (!dominated_by_p (CDI_DOMINATORS
,
2192 loop
->inner
->header
))
2203 /* Transform the loop nest into a perfect nest, if possible.
2204 LOOPS is the current struct loops *
2205 LOOP is the loop nest to transform into a perfect nest
2206 LBOUNDS are the lower bounds for the loops to transform
2207 UBOUNDS are the upper bounds for the loops to transform
2208 STEPS is the STEPS for the loops to transform.
2209 LOOPIVS is the induction variables for the loops to transform.
2211 Basically, for the case of
2213 FOR (i = 0; i < 50; i++)
2215 FOR (j =0; j < 50; j++)
2222 This function will transform it into a perfect loop nest by splitting the
2223 outer loop into two loops, like so:
2225 FOR (i = 0; i < 50; i++)
2227 FOR (j = 0; j < 50; j++)
2233 FOR (i = 0; i < 50; i ++)
2238 Return FALSE if we can't make this loop into a perfect nest. */
2240 perfect_nestify (struct loops
*loops
,
2242 VEC (tree
) *lbounds
,
2243 VEC (tree
) *ubounds
,
2245 VEC (tree
) *loopivs
)
2248 tree exit_condition
;
2249 tree then_label
, else_label
, cond_stmt
;
2250 basic_block preheaderbb
, headerbb
, bodybb
, latchbb
, olddest
;
2252 block_stmt_iterator bsi
;
2254 struct loop
*newloop
;
2258 tree ivvar
, ivvarinced
;
2261 if (!can_convert_to_perfect_nest (loop
, loopivs
))
2264 phis
= VEC_alloc (tree
, 1);
2266 /* Create the new loop */
2268 olddest
= loop
->single_exit
->dest
;
2269 preheaderbb
= loop_split_edge_with (loop
->single_exit
, NULL
);
2270 headerbb
= create_empty_bb (EXIT_BLOCK_PTR
->prev_bb
);
2272 /* This is done because otherwise, it will release the ssa_name too early
2273 when the edge gets redirected and it will get reused, causing the use of
2274 the phi node to get rewritten. */
2276 for (phi
= phi_nodes (olddest
); phi
; phi
= PHI_CHAIN (phi
))
2278 /* These should be simple exit phi copies. */
2279 if (PHI_NUM_ARGS (phi
) != 1)
2281 VEC_safe_push (tree
, phis
, PHI_RESULT (phi
));
2282 VEC_safe_push (tree
, phis
, PHI_ARG_DEF (phi
, 0));
2283 mark_for_rewrite (PHI_RESULT (phi
));
2285 e
= redirect_edge_and_branch (EDGE_SUCC (preheaderbb
, 0), headerbb
);
2286 unmark_all_for_rewrite ();
2287 bb_ann (olddest
)->phi_nodes
= NULL
;
2288 /* Add back the old exit phis. */
2289 while (VEC_length (tree
, phis
) != 0)
2293 def
= VEC_pop (tree
, phis
);
2294 phiname
= VEC_pop (tree
, phis
);
2296 phi
= create_phi_node (phiname
, preheaderbb
);
2297 add_phi_arg (&phi
, def
, EDGE_PRED (preheaderbb
, 0));
2300 nestify_update_pending_stmts (e
);
2301 bodybb
= create_empty_bb (EXIT_BLOCK_PTR
->prev_bb
);
2302 latchbb
= create_empty_bb (EXIT_BLOCK_PTR
->prev_bb
);
2303 make_edge (headerbb
, bodybb
, EDGE_FALLTHRU
);
2304 then_label
= build1 (GOTO_EXPR
, void_type_node
, tree_block_label (latchbb
));
2305 else_label
= build1 (GOTO_EXPR
, void_type_node
, tree_block_label (olddest
));
2306 cond_stmt
= build (COND_EXPR
, void_type_node
,
2307 build (NE_EXPR
, boolean_type_node
,
2310 then_label
, else_label
);
2311 bsi
= bsi_start (bodybb
);
2312 bsi_insert_after (&bsi
, cond_stmt
, BSI_NEW_STMT
);
2313 e
= make_edge (bodybb
, olddest
, EDGE_FALSE_VALUE
);
2314 make_edge (bodybb
, latchbb
, EDGE_TRUE_VALUE
);
2315 make_edge (latchbb
, headerbb
, EDGE_FALLTHRU
);
2317 /* Update the loop structures. */
2318 newloop
= duplicate_loop (loops
, loop
, olddest
->loop_father
);
2319 newloop
->header
= headerbb
;
2320 newloop
->latch
= latchbb
;
2321 newloop
->single_exit
= e
;
2322 add_bb_to_loop (latchbb
, newloop
);
2323 add_bb_to_loop (bodybb
, newloop
);
2324 add_bb_to_loop (headerbb
, newloop
);
2325 add_bb_to_loop (preheaderbb
, olddest
->loop_father
);
2326 set_immediate_dominator (CDI_DOMINATORS
, bodybb
, headerbb
);
2327 set_immediate_dominator (CDI_DOMINATORS
, headerbb
, preheaderbb
);
2328 set_immediate_dominator (CDI_DOMINATORS
, preheaderbb
,
2329 loop
->single_exit
->src
);
2330 set_immediate_dominator (CDI_DOMINATORS
, latchbb
, bodybb
);
2331 set_immediate_dominator (CDI_DOMINATORS
, olddest
, bodybb
);
2332 /* Create the new iv. */
2333 ivvar
= create_tmp_var (integer_type_node
, "perfectiv");
2334 add_referenced_tmp_var (ivvar
);
2335 bsi
= bsi_last (EDGE_PRED (newloop
->latch
, 0)->src
);
2336 create_iv (VEC_index (tree
, lbounds
, 0),
2337 build_int_cst (integer_type_node
,
2338 VEC_index (int, steps
, 0)),
2339 ivvar
, newloop
, &bsi
, false, &ivvar
, &ivvarinced
);
2341 /* Create the new upper bound. This may be not just a variable, so we copy
2342 it to one just in case. */
2344 exit_condition
= get_loop_exit_condition (newloop
);
2345 uboundvar
= create_tmp_var (integer_type_node
, "uboundvar");
2346 add_referenced_tmp_var (uboundvar
);
2347 stmt
= build (MODIFY_EXPR
, void_type_node
, uboundvar
,
2348 VEC_index (tree
, ubounds
, 0));
2349 uboundvar
= make_ssa_name (uboundvar
, stmt
);
2350 TREE_OPERAND (stmt
, 0) = uboundvar
;
2351 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
2352 COND_EXPR_COND (exit_condition
) = build (LE_EXPR
,
2357 bbs
= get_loop_body (loop
);
2358 /* Now replace the induction variable in the moved statements with the
2359 correct loop induction variable. */
2360 for (i
= 0; i
< loop
->num_nodes
; i
++)
2362 block_stmt_iterator tobsi
= bsi_last (bodybb
);
2363 if (bbs
[i
]->loop_father
== loop
)
2365 /* Note that the bsi only needs to be explicitly incremented
2366 when we don't move something, since it is automatically
2367 incremented when we do. */
2368 for (bsi
= bsi_start (bbs
[i
]); !bsi_end_p (bsi
);)
2370 tree stmt
= bsi_stmt (bsi
);
2371 if (stmt
== exit_condition
2372 || not_interesting_stmt (stmt
)
2373 || stmt_is_bumper_for_loop (loop
, stmt
))
2378 replace_uses_of_x_with_y (stmt
,
2379 VEC_index (tree
, loopivs
, 0),
2381 bsi_move_before (&bsi
, &tobsi
);
2386 flow_loops_find (loops
, LOOP_ALL
);
2387 return perfect_nest_p (loop
);
2390 /* Return true if TRANS is a legal transformation matrix that respects
2391 the dependence vectors in DISTS and DIRS. The conservative answer
2394 "Wolfe proves that a unimodular transformation represented by the
2395 matrix T is legal when applied to a loop nest with a set of
2396 lexicographically non-negative distance vectors RDG if and only if
2397 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2398 i.e.: if and only if it transforms the lexicographically positive
2399 distance vectors to lexicographically positive vectors. Note that
2400 a unimodular matrix must transform the zero vector (and only it) to
2401 the zero vector." S.Muchnick. */
2404 lambda_transform_legal_p (lambda_trans_matrix trans
,
2406 varray_type dependence_relations
)
2409 lambda_vector distres
;
2410 struct data_dependence_relation
*ddr
;
2412 #if defined ENABLE_CHECKING
2413 if (LTM_COLSIZE (trans
) != nb_loops
2414 || LTM_ROWSIZE (trans
) != nb_loops
)
2418 /* When there is an unknown relation in the dependence_relations, we
2419 know that it is no worth looking at this loop nest: give up. */
2420 ddr
= (struct data_dependence_relation
*)
2421 VARRAY_GENERIC_PTR (dependence_relations
, 0);
2424 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
2427 distres
= lambda_vector_new (nb_loops
);
2429 /* For each distance vector in the dependence graph. */
2430 for (i
= 0; i
< VARRAY_ACTIVE_SIZE (dependence_relations
); i
++)
2432 ddr
= (struct data_dependence_relation
*)
2433 VARRAY_GENERIC_PTR (dependence_relations
, i
);
2437 /* Don't care about relations for which we know that there is no
2438 dependence, nor about read-read (aka. output-dependences):
2439 these data accesses can happen in any order. */
2440 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
2441 || (DR_IS_READ (DDR_A (ddr
)) && DR_IS_READ (DDR_B (ddr
))))
2443 /* Conservatively answer: "this transformation is not valid". */
2444 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
2447 /* Compute trans.dist_vect */
2448 lambda_matrix_vector_mult (LTM_MATRIX (trans
), nb_loops
, nb_loops
,
2449 DDR_DIST_VECT (ddr
), distres
);
2451 if (!lambda_vector_lexico_pos (distres
, nb_loops
))